Example #1
1
 def _store_all_num_formats(self):
     """Store user defined numerical formats i.e. FORMAT records."""
     # Leaning num_format syndrome
     num_formats = {}
     num_formats_ = []
     index = 164
     # Iterate through the XF objects and write a FORMAT record if it isn't a
     # built-in format type and if the FORMAT string hasn't already been used.
     for format in self._formats:
         num_format = format._num_format
         # Check if num_format is an index to a built-in format.
         # Also check for a string of zeros, which is a valid format string
         # but would evaluate to zero.
         if not re.match(r"^0+\d", num_format):
             if re.match(r"^\d+", num_format):
                 continue
         if num_formats.has_key(num_format):
             # FORMAT has already been used
             format._num_format = num_formats[num_format]
         else:
             # Add a new FORMAT
             num_formats[num_format] = index
             format._num_format = index
             num_formats_.append(num_format)
             index += 1
     # Write the new FORMAT records starting from 0xA4
     index = 164
     for num_format in num_formats_:
         self._store_num_format(num_format, index)
         index += 1
Example #2
1
    def get_tests(self, src):
        ret = {"param_in": [], "return_possibilities": [], "shall_round": [], "set": []}

        for r in src.split("\n"):
            m = re.match("# IN (.*)$", r)
            if m:
                param_in = eval(m.group(1))

                param_in = [
                    p
                    if len(p) < 16 or not re.match("[0-9A-F]+$", p)
                    else self.convert_srs(p, self.stat["config"]["db.srs"])
                    for p in param_in
                ]

                ret["param_in"].append(param_in)
                ret["return_possibilities"].append(set())
                ret["shall_round"].append(False)
                ret["set"].append(False)

            m = re.match("# OUT(_ROUND|_SET)? (.*)$", r)
            if m:
                return_out = eval(m.group(2))

                if len(return_out) > 16 and re.match("[0-9A-F]+$", return_out):
                    return_out = self.convert_srs(return_out, self.stat["config"]["db.srs"])

                if m.group(1) == "_ROUND":
                    ret["shall_round"][-1] = True
                if m.group(1) == "_SET":
                    ret["set"][-1] = True

                ret["return_possibilities"][-1].add(return_out)

        return ret
def handle_salary(salary):
    """
	处理工作年薪, 统一化为标准格式
	:param salary:
	:return:
	"""
    if u"面议" in salary or u"不显示职位月薪范围" in salary or u"保密" in salary:
        return u"面议"
    if u"以上" in salary:
        temp = re.match(u"(\d{4,7})(元/月)?以上", salary)
        if temp:
            s = int(temp.group(1))
            s1 = int(s * 1.5)
            return str(s) + "-" + str(s1)
    if u"以下" in salary:
        temp = re.match(u"(\d{4,7})以下", salary)
        if temp:
            s = int(temp.group(1))
            s1 = int(s * 0.8)
            return str(s1) + "-" + str(s)
    if u"到" in salary:
        if re.match(u"\d+到\d+", salary):
            return salary.replace(u"到", "-")
    if re.match(u"\d+-\d+元?/月", salary):
        return salary.strip()[:-3]

    return ""
Example #4
1
    def spider_opened(self, spider):
        try:
            if hasattr(spider, "proxy"):
                m = re.match(r"^(http://[.0-9]+:[0-9]+)(,(http://[.0-9]+:[0-9]+))*$", spider.proxy)
                if m:
                    proxy = {"list": spider.proxy.split(","), "rate": 1}
                else:
                    proxy = {"file": spider.proxy, "rate": 1}
            else:
                proxy = {"enabled": False}

            self.enabled = proxy.get("enabled", True)
            if not self.enabled:
                return

            self.rate = proxy.get("rate", 10)

            for i in utils.load_keywords(proxy, msg="proxies"):
                m = re.match(r"^(?P<prot>\S+)(\s+|://)(?P<host>\S+)(\s+|:)(?P<port>\S+)$", i)
                if m:
                    self.proxy_list.append(m.groupdict())
                else:
                    log.msg("drop invalid proxy <{}>".format(i), log.WARNING)

        except Exception as ex:
            self.enabled = False
            log.msg("cannot load proxies: {}".format(ex))
Example #5
1
def list_apt_updates(apt_update=True):
    # See if we have this information cached recently.
    # Keep the information for 8 hours.
    global _apt_updates
    if _apt_updates is not None and _apt_updates[0] > datetime.datetime.now() - datetime.timedelta(hours=8):
        return _apt_updates[1]

        # Run apt-get update to refresh package list. This should be running daily
        # anyway, so on the status checks page don't do this because it is slow.
    if apt_update:
        shell("check_call", ["/usr/bin/apt-get", "-qq", "update"])

        # Run apt-get upgrade in simulate mode to get a list of what
        # it would do.
    simulated_install = shell("check_output", ["/usr/bin/apt-get", "-qq", "-s", "upgrade"])
    pkgs = []
    for line in simulated_install.split("\n"):
        if line.strip() == "":
            continue
        if re.match(r"^Conf .*", line):
            # remove these lines, not informative
            continue
        m = re.match(r"^Inst (.*) \[(.*)\] \((\S*)", line)
        if m:
            pkgs.append({"package": m.group(1), "version": m.group(3), "current_version": m.group(2)})
        else:
            pkgs.append({"package": "[" + line + "]", "version": "", "current_version": ""})

            # Cache for future requests.
    _apt_updates = (datetime.datetime.now(), pkgs)

    return pkgs
Example #6
1
    def embedding_init_code(self, pysource):
        if self._embedding:
            raise ValueError("embedding_init_code() can only be called once")
        # fix 'pysource' before it gets dumped into the C file:
        # - remove empty lines at the beginning, so it starts at "line 1"
        # - dedent, if all non-empty lines are indented
        # - check for SyntaxErrors
        import re

        match = re.match(r"\s*\n", pysource)
        if match:
            pysource = pysource[match.end() :]
        lines = pysource.splitlines() or [""]
        prefix = re.match(r"\s*", lines[0]).group()
        for i in range(1, len(lines)):
            line = lines[i]
            if line.rstrip():
                while not line.startswith(prefix):
                    prefix = prefix[:-1]
        i = len(prefix)
        lines = [line[i:] + "\n" for line in lines]
        pysource = "".join(lines)
        #
        compile(pysource, "cffi_init", "exec")
        #
        self._embedding = pysource
def getWeather(place):
    if (re.match("[a-zA-Z]+,* [a-zA-Z]{2}", place)) != None:
        city = re.search("[a-zA-Z]+ ", place).string.strip()  # Doesn't actually seperate city/state. Needs to!
        state = re.search(" [a-zA-Z]{2}", place).string.strip()

        print "city, state = " + city + ", " + state
    elif re.match("\d{5}", place) != None:
        zip = place
        print "zip code = ", zip
    else:
        return "Please enter a real location"

    if "city" in locals() and "state" in locals():
        content = urllib.urlopen("http://www.rssweather.com/wx/us/" + state + "/" + city + "/rss.php")
    else:
        content = urllib.urlopen("http://www.rssweather.com/zipcode/" + zip + "/wx.php")

    xmldoc = minidom.parse(content)

    try:
        pubDate = xmldoc.getElementsByTagName("pubDate")[0].firstChild.data
    except:
        return "I can't find the weather for that city."
    pubDate = pubDate.split(" ")[4] + " GMT " + pubDate.split(" ")[5]
    summary = xmldoc.getElementsByTagName("description")[1].firstChild.data
    reply = summary + " (" + pubDate + ")"

    return reply
Example #8
0
    def test_various_ops(self):
        # This takes about n/3 seconds to run (about n/3 clumps of tasks,
        # times about 1 second per clump).
        NUMTASKS = 10

        # no more than 3 of the 10 can run at once
        sema = threading.BoundedSemaphore(value=3)
        mutex = threading.RLock()
        numrunning = Counter()

        threads = []

        for i in range(NUMTASKS):
            t = TestThread("<thread %d>" % i, self, sema, mutex, numrunning)
            threads.append(t)
            self.assertEqual(t.ident, None)
            self.assertTrue(re.match("<TestThread\(.*, initial\)>", repr(t)))
            t.start()

        if verbose:
            print("waiting for all tasks to complete")
        for t in threads:
            t.join(NUMTASKS)
            self.assertTrue(not t.is_alive())
            self.assertNotEqual(t.ident, 0)
            self.assertFalse(t.ident is None)
            self.assertTrue(re.match("<TestThread\(.*, stopped -?\d+\)>", repr(t)))
        if verbose:
            print("all tasks done")
        self.assertEqual(numrunning.get(), 0)
Example #9
0
def what_to_say(bot, source, request, private):
    global dogged
    match = re.match("dogbot: give (\w+) dog #?(1|2)", request)
    if match:
        which_dog = int(match.groups(0)[1]) - 1
        dogged[which_dog] = match.groups(0)[0]
        return [
            "{0} has dog #{1} http://crashreports.lal.cisco.com/reports/dog{1}.jpg.".format(
                dogged[which_dog], which_dog + 1
            )
        ]
    elif "who has the dog?" in request:
        return [
            "{1} has dog #{0} http://crashreports.lal.cisco.com/reports/dog{0}.jpg.".format(index + 1, recipient)
            for index, recipient in enumerate(dogged)
        ]
    match = re.match("who has dog #?(1|2)", request)
    if match:
        which_dog = int(match.groups(0)[0]) - 1
        return [
            "{1} has dog #{0} http://crashreports.lal.cisco.com/reports/dog{0}.jpg".format(
                which_dog + 1, dogged[which_dog]
            )
        ]
    return []
Example #10
0
def start_mailpile(app_args, args):
    os_settings = get_os_settings(args)
    mailpiles = parse_usermap(args, os_settings)
    user_settings = get_user_settings(args, user=args.user, mailpiles=mailpiles)
    assert re.match("^[0-9]+$", user_settings["port"]) is not None
    assert re.match("^[a-z0-9\.]+$", user_settings["host"]) is not None
    if args.user:
        command = '%s "%s" --start --port="%s" --host="%s"' % (
            _escape(os_settings["interpreter"]),
            _escape(os_settings["mailpile-admin"]),
            _escape(user_settings["port"]),
            _escape(user_settings["host"]),
        )
        if args.password:
            print "%s%s" % run_as_user(args.user, args.password, command)
            script = None
        else:
            script = ['sudo -u "%(user)s" -- ' + command]
    else:
        script = MAILPILE_START_SCRIPT

    if script:
        run_script(args, user_settings, script)

    if args.user:
        hostport = "%s:%s" % (user_settings["host"], user_settings["port"])
        mailpiles[hostport] = (user_settings["user"], user_settings["host"], user_settings["port"], False, None, None)
        save_usermap(args, os_settings, mailpiles)
        # FIXME: If/when run_script raises exceptions, this call should
        #        be try/except wrapped to not be considered critical, as
        #        we expect some chmods to fail when not run as root.
        run_script(args, os_settings, FIX_PERMS_SCRIPT)
Example #11
0
    def sniff(self, filename):
        """
        Determines whether the file is a velveth produced RoadMap::
          142858  21      1
          ROADMAP 1
          ROADMAP 2
          ...
        """

        try:
            fh = open(filename)
            while True:
                line = fh.readline()
                if not line:
                    break  # EOF
                line = line.strip()
                if line:  # first non-empty line
                    if not re.match(r"\d+\t\d+\t\d+$", line):
                        break
                    # The next line.strip() should be 'ROADMAP 1'
                    line = fh.readline().strip()
                    if not re.match(r"ROADMAP \d+$", line):
                        break
                    return True
                else:
                    break  # we found a non-empty line, but its not a fasta header
            fh.close()
        except:
            pass
        return False
Example #12
0
    def get_description():
        branch = get_branch_name()
        try:
            description = run_git("describe --abbrev={0} --tags".format(ABBREV))
        except RuntimeError:
            description = run_git("describe --abbrev={0} --always".format(ABBREV))
            regex = r"""^
            (?P<commit>.*?)
            (?P<dirty>(-dirty)?)
            $"""
            m = re.match(regex, description, re.VERBOSE)
            commit, dirty = (m.group(_) for _ in "commit,dirty".split(","))
            return branch, "", INF, commit, dirty

        regex = r"""^
        (?P<tag>.*?)
        (?:-
            (?P<rev>\d+)-g
            (?P<commit>[0-9a-f]{5,40})
        )?
        (?P<dirty>(-dirty)?)
        $"""
        m = re.match(regex, description, re.VERBOSE)
        tag, rev, commit, dirty = (m.group(_) for _ in "tag,rev,commit,dirty".split(","))
        if rev is None:
            rev = 0
            commit = ""
        else:
            rev = int(rev)
        return branch, tag, rev, commit, dirty
Example #13
0
def _sunos_cpudata():
    """
    Return the CPU information for Solaris-like systems
    """
    # Provides:
    #   cpuarch
    #   num_cpus
    #   cpu_model
    #   cpu_flags
    grains = {}
    grains["cpu_flags"] = []

    grains["cpuarch"] = __salt__["cmd.run"]("uname -p")
    psrinfo = "/usr/sbin/psrinfo 2>/dev/null"
    grains["num_cpus"] = len(__salt__["cmd.run"](psrinfo).splitlines())
    kstat_info = "kstat -p cpu_info:0:*:brand"
    for line in __salt__["cmd.run"](kstat_info).splitlines():
        match = re.match(r"(\w+:\d+:\w+\d+:\w+)\s+(.+)", line)
        if match:
            grains["cpu_model"] = match.group(2)
    isainfo = "isainfo -n -v"
    for line in __salt__["cmd.run"](isainfo).splitlines():
        match = re.match(r"^\s+(.+)", line)
        if match:
            cpu_flags = match.group(1).split()
            grains["cpu_flags"].extend(cpu_flags)

    return grains
Example #14
0
    def _read_image_diff(self, sp):
        deadline = time.time() + 2.0
        output = None
        output_image = ""

        while True:
            output = sp.read_stdout_line(deadline)
            if sp.timed_out or sp.crashed or not output:
                break

            if output.startswith("diff"):  # This is the last line ImageDiff prints.
                break

            if output.startswith("Content-Length"):
                m = re.match("Content-Length: (\d+)", output)
                content_length = int(m.group(1))
                output_image = sp.read_stdout(deadline, content_length)
                output = sp.read_stdout_line(deadline)
                break

        if sp.timed_out:
            _log.error("ImageDiff timed out")
        if sp.crashed:
            _log.error("ImageDiff crashed")
        # FIXME: There is no need to shut down the ImageDiff server after every diff.
        sp.stop()

        diff_percent = 0
        if output and output.startswith("diff"):
            m = re.match("diff: (.+)% (passed|failed)", output)
            if m.group(2) == "passed":
                return [None, 0]
            diff_percent = float(m.group(1))

        return (output_image, diff_percent)
Example #15
0
def signup(request):
    if request.method == "GET":
        return render(request, "signup.html")
    else:
        post_data = request.POST

        username = post_data.get("username", "")
        password = post_data.get("password", "")
        email = post_data.get("email", "")

        if username == "":
            return render(request, "signup.html", {"error": "Username must not be empty."})

        if password == "":
            return render(request, "signup.html", {"error": "Password must not be empty."})

        if email == "":
            return render(request, "signup.html", {"error": "E-mail must not be empty."})

        if not re.match(r"^[A-Za-z0-9_]+$", username):
            return render(
                request, "signup.html", {"error": "Username must only contain letters, numbers and underscores."}
            )

        if not re.match(r"^.{7,}$", password):
            return render(request, "signup.html", {"error": "Password must be at least 7 characters long."})

        if not re.match(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", email):
            return render(request, "signup.html", {"error": "E-mail must be valid."})

        user = User.signup(username, password, email)
        if user is None:
            return render(request, "signup.html", {"error": "A user with that already exists."})
        user.save()
        return HttpResponseRedirect(reverse("login"))
Example #16
0
    def match_tag(tag):
        match = re.match(r"^rel_(\d+)_(\d+)_(\d\d\d\d)_?(\d\d\d\d)$", tag)
        if match:
            v_maj = match.group(1)
            v_min = match.group(2)
            r_maj = match.group(3)
            r_min = match.group(4)
            v_name = "%s.%s-%s.%s" % (v_maj, v_min, r_maj, r_min)
            return (v_maj, v_min, r_maj, r_min, v_name)

        match = re.match(r"^rel_(\d+)_(\d+)$", tag)
        if not match:
            match = re.match(r"^v?(\d+)\.(\d+)$", tag)
        if match:
            v_maj = match.group(1)
            v_min = match.group(2)
            v_name = "%s.%s" % (v_maj, v_min)
            return (v_maj, v_min, "0", "0", v_name)

        match = re.match(r"^v?(\d+)\.(\d+)\-(\d+)\.(\d+)$", tag)
        if match:
            v_maj = match.group(1)
            v_min = match.group(2)
            r_maj = match.group(3)
            r_min = match.group(4)
            v_name = "%s.%s-%s.%s" % (v_maj, v_min, r_maj, r_min)
            return (v_maj, v_min, r_maj, r_min, v_name)

        return None
Example #17
0
    def parse(self):
        # Parse the subject with a regex
        match = re.match(
            r" *CVS Update: +(?P<module>[^ ]+)( *\(branch: (?P<branch>[^\) ]+)\))? *", self.message["subject"]
        )
        if not match:
            return
        module, branch = match.group("module", "branch")
        if module:
            self.addModule(module)
        if branch:
            self.addBranch(branch)

        # Author is the first token of the from address. Most of the from addresses
        # are @XFree86.Org, so strip that out if we have it.
        address = self.message["from"].split(" ")[0]
        try:
            # If the address is in <brackets>, strip them off
            address = address.split("<", 1)[1].split(">", 1)[0]
        except IndexError:
            pass
        address = address.replace("@XFree86.Org", "")
        self.addAuthor(address)

        # Skip lines until we get to a section we can process
        while True:
            line = self.pullLine()
            if not line:
                break

            if line == "Log message:\n":
                self.readLog()
            elif re.match(" +Revision +Changes +Path\n", line):
                self.readFiles()
                break
Example #18
0
def parse_function_type(data):
    type = data["type"]
    if not re.match("fn\\(", type):
        return None
    pos = 3
    args, retval = ([], None)
    while pos < len(type) and type[pos] != ")":
        colon = type.find(":", pos)
        name = "?"
        if colon != -1:
            name = type[pos:colon]
            if not re.match("[\\w_$]+$", name):
                name = "?"
            else:
                pos = colon + 2
        type_start = pos
        depth = 0
        while pos < len(type):
            ch = type[pos]
            if ch == "(" or ch == "[" or ch == "{":
                depth += 1
            elif ch == ")" or ch == "]" or ch == "}":
                if depth > 0:
                    depth -= 1
                else:
                    break
            elif ch == "," and depth == 0:
                break
            pos += 1
        args.append((name, type[type_start:pos]))
        if type[pos] == ",":
            pos += 2
    if type[pos : pos + 5] == ") -> ":
        retval = type[pos + 5 :]
    return {"name": data.get("exprName", None) or data.get("name", None) or "fn", "args": args, "retval": retval}
Example #19
0
def area_code_lookup(request, area_id, format):
    from mapit.models import Area, CodeType

    area_code = None
    if re.match("\d\d([A-Z]{2}|[A-Z]{4}|[A-Z]{2}\d\d\d|[A-Z]|[A-Z]\d\d)$", area_id):
        area_code = CodeType.objects.get(code="ons")
    elif re.match("[EW]0[12]\d{6}$", area_id):  # LSOA/MSOA have ONS code type
        area_code = CodeType.objects.get(code="ons")
    elif re.match("[ENSW]\d{8}$", area_id):
        area_code = CodeType.objects.get(code="gss")
    if not area_code:
        return None

    args = {"format": format, "codes__type": area_code, "codes__code": area_id}
    if re.match("[EW]01", area_id):
        args["type__code"] = "OLF"
    elif re.match("[EW]02", area_id):
        args["type__code"] = "OMF"

    area = get_object_or_404(Area, **args)
    path = "/area/%d%s" % (area.id, ".%s" % format if format else "")
    # If there was a query string, make sure it's passed on in the
    # redirect:
    if request.META["QUERY_STRING"]:
        path += "?" + request.META["QUERY_STRING"]
    return HttpResponseRedirect(path)
Example #20
0
File: Utils.py Project: jpirko/lnst
def bool_it(val):
    if isinstance(val, str):
        if re.match("^\s*(?i)(true)", val) or re.match("^\s*(?i)(yes)", val):
            return True
        elif re.match("^\s*(?i)(false)", val) or re.match("^\s*(?i)(no)", val):
            return False
    return True if int_it(val) else False
Example #21
0
def series_metadata(file_name, series_name):
    def block(whole, start, end):
        start_block = whole[whole.index(start) :]
        block = start_block[: start_block.index(end)]

        return block

    content = ""
    with open(file_name) as omexml:
        content = omexml.read()

        # 1. cut the block of text representing the series from the content
    series_block = block(content, "Image:{}".format(series_name), "</Image>")

    # 2. rs, cs, zs, ts, rmop, cmop, zmop
    # stored in the line
    # <Pixels ... PhysicalSizeX="3.829397265625" PhysicalSizeY="3.829397265625" PhysicalSizeZ="1.482" ... SizeC="2" SizeT="1" SizeX="256" SizeY="256" SizeZ="1">
    pixels_line = block(series_block, "<Pixels", ">")
    pixels_line_template = r'^<.+PhysicalSizeX="(?P<cmop>.+)" PhysicalSizeY="(?P<rmop>.+)" PhysicalSizeZ="(?P<zmop>.+)" SignificantBits=".+" SizeC=".+" SizeT="(?P<ts>.+)" SizeX="(?P<cs>.+)" SizeY="(?P<rs>.+)" SizeZ="(?P<zs>.+)" Type=".+"$'

    metadata = re.match(pixels_line_template, pixels_line).groupdict()

    # 3. finally, tpf. A little more complicated. Need to find the line in the planes section where C,T,Z = 0,1,0 -> then take DeltaT
    # <Plane DeltaT="458.7209987640381" PositionX="0.06314316103006" PositionY="0.04187452934148" PositionZ="0.0" TheC="0" TheT="1" TheZ="0"/>

    tpf_in_seconds = 0
    line_template = r'^<Plane DeltaT="(?P<delta_t>.+)" PositionX=".+" PositionY=".+" PositionZ=".+" TheC="(?P<c>.+)" TheT="(?P<t>.+)" TheZ="(?P<z>.+)"/>$'
    lines = [l for l in series_block.split("\n") if "Plane DeltaT" in l]
    for line in lines:
        line_dict = re.match(line_template, line.strip()).groupdict()
        if (line_dict["c"], line_dict["t"], line_dict["z"]) == ("0", "1", "0"):
            tpf_in_seconds = float(line_dict["delta_t"])

    metadata["tpf_in_seconds"] = tpf_in_seconds
    return metadata
Example #22
0
def getstrlength(var):
    if isstringfunction(var):
        if "result" in var:
            a = var["result"]
        else:
            a = var["name"]
        if a in var["vars"]:
            return getstrlength(var["vars"][a])
        else:
            errmess("getstrlength: function %s has no return value?!\n" % a)
    if not isstring(var):
        errmess("getstrlength: expected a signature of a string but got: %s\n" % (` var `))
    len = "1"
    if "charselector" in var:
        a = var["charselector"]
        if "*" in a:
            len = a["*"]
        elif "len" in a:
            len = a["len"]
    if re.match(r"\(\s*([*]|[:])\s*\)", len) or re.match(r"([*]|[:])", len):
        # if len in ['(*)','*','(:)',':']:
        if isintent_hide(var):
            errmess("getstrlength:intent(hide): expected a string with defined length but got: %s\n" % (` var `))
        len = "-1"
    return len
Example #23
0
    def process_isolation_file(self, sql_file, output_file):
        """
            Processes the given sql file and writes the output
            to output file
        """
        try:
            command = ""
            for line in sql_file:
                tinctest.logger.info("re.match: %s" % re.match(r"^\d+[q\\<]:$", line))
                print >> output_file, line.strip(),
                (command_part, dummy, comment) = line.partition("--")
                if command_part == "" or command_part == "\n":
                    print >> output_file
                elif command_part.endswith(";\n") or re.match(r"^\d+[q\\<]:$", line):
                    command += command_part
                    tinctest.logger.info("Processing command: %s" % command)
                    self.process_command(command, output_file)
                    command = ""
                else:
                    command += command_part

            for process in self.processes.values():
                process.stop()
        except:
            for process in self.processes.values():
                process.terminate()
            raise
        finally:
            for process in self.processes.values():
                process.terminate()
Example #24
0
def convert(path):
    "Convert the given file to an NLTK tagged corpus file."

    if not os.path.exists("lxxm-corpus"):
        os.mkdir("lxxm-corpus")
    out_path = "lxxm-corpus/" + path.rsplit("-", 1)[0]
    print "Converting " + out_path
    tokens = []
    f = codecs.open("source/" + path, encoding="utf-8")
    lines = f.readlines()
    f.close()
    for line in lines:
        # Handle verse lables
        if re.match(r"^[A-Z]", line):
            pass
        # Empty lines signify verse breaks, treat as sentence breaks
        elif re.match(r"\n", line):
            tokens.append("\n")
        else:
            fields = line.split()
            pos = fields[1][:3].replace("-", "")
            parse = fields[1][3:].replace("-", "")
            tag = pos
            if len(parse) > 0:
                tag += "-" + parse
            token = fields[0] + "/" + tag
            tokens.append(token)

    text = " ".join(tokens)
    g = open(out_path, "w")
    g.write(text.encode("utf-8"))
    g.close()
Example #25
0
def yn(prompt, default="y", batch=False):
    # A sanity check against default value
    # If not y/n then y is assumed
    if default not in ["y", "n"]:
        default = "y"

    # Let's build the prompt
    choicebox = "[Y/n]" if default == "y" else "[y/N]"
    prompt = prompt + " " + choicebox + " "

    # If input is not a yes/no variant or empty
    # keep asking
    while True:
        # If batch option is True then auto reply
        # with default input
        if not batch:
            input = raw_input(prompt).strip()
        else:
            print prompt
            input = ""

        # If input is empty default choice is assumed
        # so we return True
        if input == "":
            return True

        # Given 'yes' as input if default choice is y
        # then return True, False otherwise
        if match("y(?:es)?", input, I):
            return True if default == "y" else False

        # Given 'no' as input if default choice is n
        # then return True, False otherwise
        elif match("n(?:o)?", input, I):
            return True if default == "n" else False
Example #26
0
def load_config(config, configfiles, filter=[]):
    for configfile in configfiles:
        if os.path.exists(configfile):
            output("info", "Loading config file %s." % configfile)
            f = open(configfile)
            lines = f.readlines()
            for line in lines:
                res = re.match(r"\s*([\w\.]+)\s*=\s*(.*)\s*", line)
                if res:
                    key = res.group(1)
                    value = res.group(2)

                    found = len(filter) == 0
                    for fi in filter:
                        res2 = re.match(fi, key)
                        if res2:
                            found = 1
                            break

                    if not found:
                        continue

                        # value = res.group(2).replace("%h", host);
                    key = key.split(".")
                    k = -1
                    itr = config
                    for k in range(0, len(key) - 1):
                        if not key[k] in itr:
                            itr[key[k]] = {}
                        itr = itr[key[k]]
                    itr[key[k + 1]] = value
            f.close()
        else:
            output("error", "Configfile %s does not exist." % configfile)
    def add_blog_directory(self, entry):
        # Thumbnail...
        div_entry_image = entry.find("div", {"class": "entry-image"})
        thumbnail = div_entry_image.find("img", {"class": "thumb"})["src"]

        if not re.match("^https?:", thumbnail):
            thumbnail = "%s%s" % (utils.url_root, thumbnail)

        # Title
        div_entry_meta = entry.find("div", {"class": "entry-meta"})
        a_title = div_entry_meta.find("a", {"class": "title"})
        title = a_title.string

        # Show page URL
        show_url = a_title["href"]

        # this is to ignore things that are not linked to channel9
        if re.match("^https?:", show_url):
            return

        div_description = div_entry_meta.find("div", {"class": "description"})
        plot = div_description.string

        # Show page URL
        show_url = a_title["href"]

        # Add to list...
        list_item = control.item(title, iconImage=utils.icon_folder, thumbnailImage=thumbnail)
        list_item.setArt({"thumb": thumbnail, "fanart": thumbnail, "landscape": thumbnail, "poster": thumbnail})
        list_item.setInfo("video", {"plot": plot, "title": title})
        plugin_list_show = "%s?action=list-blog&blog-url=%s" % (sys.argv[0], urllib.quote_plus(show_url))
        control.addItem(handle=int(sys.argv[1]), url=plugin_list_show, listitem=list_item, isFolder=True)
        return
Example #28
0
    def history(self, page):
        GIT_COMMIT_FIELDS = ["commit", "author", "date", "date_relative", "message"]
        GIT_LOG_FORMAT = "%x1f".join(["%h", "%an", "%ad", "%ar", "%s"]) + "%x1e"
        output = git.log("--format=%s" % GIT_LOG_FORMAT, "--follow", "-z", "--shortstat", page.abspath)
        output = output.split("\n")
        history = []
        for line in output:
            if "\x1f" in line:
                log = line.strip("\x1e\x00").split("\x1f")
                history.append(dict(zip(GIT_COMMIT_FIELDS, log)))
            else:
                insertion = re.match(r".* (\d+) insertion", line)
                deletion = re.match(r".* (\d+) deletion", line)
                history[-1]["insertion"] = int(insertion.group(1)) if insertion else 0
                history[-1]["deletion"] = int(deletion.group(1)) if deletion else 0

        max_changes = float(max([(v["insertion"] + v["deletion"]) for v in history])) or 1.0
        for v in history:
            v.update(
                {
                    "insertion_relative": str((v["insertion"] / max_changes) * 100),
                    "deletion_relative": str((v["deletion"] / max_changes) * 100),
                }
            )
        return history
Example #29
0
def parse():
    # print(set([len(l) for l in read()]))
    counter = 0
    linetype = LineTypes.UNKNOWN
    counts = []

    for l in read():
        counter += 1
        columns_count = len(l)
        counts.append(columns_count)

        if columns_count > 1:
            if re.match("\d{1,6}[-\d{1,4}]{0,1}", l[0]):
                # DataRow
                linetype = LineTypes.DATA
                # print('DATA:%d: %s' % (counter, '\t'.join(l).encode('utf8').replace('\t',' | ')))
            elif l[0].startswith(u"\u0634\u0645\u0627\u0631\u0647"):
                # HEADER
                # print(('HEADER:%d: %s' % (counter, l[0].encode('utf8'))))
                linetype = LineTypes.HEADER
            elif re.match("^CD", l[0]):
                linetype = LineTypes.MEDIA_NO
                # print(('MEDIA_NO:%d: %s' % (counter, l[0].encode('utf8'))))
            else:
                linetype = LineTypes.CATEGORY
                # print(('CATEGORY:%d: %s' % (counter, l[0].encode('utf8'))))

        else:
            # print(('UNKNOWN:%d :%s' % (counter, l[0].encode('utf8'))))
            linetype = LineTypes.UNKNOWN
        yield linetype, l
Example #30
0
def _validate_hostname(data):
    # NOTE: An individual name regex instead of an entire FQDN was used
    # because its easier to make correct. Feel free to replace with a
    # full regex solution. The logic should validate that the hostname
    # matches RFC 1123 (section 2.1) and RFC 952.
    hostname_pattern = "[a-zA-Z0-9-]{1,63}$"
    try:
        # Trailing periods are allowed to indicate that a name is fully
        # qualified per RFC 1034 (page 7).
        trimmed = data if data[-1] != "." else data[:-1]
        if len(trimmed) > 255:
            raise TypeError(_("'%s' exceeds the 255 character hostname limit") % trimmed)
        names = trimmed.split(".")
        for name in names:
            if not name:
                raise TypeError(_("Encountered an empty component."))
            if name[-1] == "-" or name[0] == "-":
                raise TypeError(_("Name '%s' must not start or end with a hyphen.") % name)
            if not re.match(hostname_pattern, name):
                raise TypeError(
                    _("Name '%s' must be 1-63 characters long, each of " "which can only be alphanumeric or a hyphen.")
                    % name
                )
        # RFC 1123 hints that a TLD can't be all numeric. last is a TLD if
        # it's an FQDN.
        if len(names) > 1 and re.match("^[0-9]+$", names[-1]):
            raise TypeError(_("TLD '%s' must not be all numeric") % names[-1])
    except TypeError as e:
        msg = _("'%(data)s' is not a valid hostname. Reason: %(reason)s") % {"data": data, "reason": e.message}
        LOG.debug(msg)
        return msg