def get_phonenumbers(file_name):
    page_phones = []
    page_phonedesc = ""
    page_phonenumb = ""
    f = open(file_name, "r")
    for line in f:
        if re.search("<td>.+</td>", line):
            # if re.search('[0-9]', line):
            if not re.search("911", line):
                page_phonedesc = re.sub("<td.*?>", "", line)
                page_phonedesc = re.sub("</td>.*", "", page_phonedesc)
                page_phonedesc = re.sub("<.*?>", " ", page_phonedesc)
                # print line.strip()

                page_phonenumb = re.sub(".*</td><td.*?>", "", line)
                page_phonenumb = re.sub("<.*?>", "", page_phonenumb)
                # print page_phonenumb.strip()
                # print

                if re.search("http:.+", page_phonenumb) or re.search("https:.+", page_phonenumb):
                    page_phones.append(
                        "<a href='" + page_phonenumb.strip() + "'>" + str(page_phonedesc.strip() + "</a>")
                    )
                else:
                    page_phones.append(
                        str(page_phonedesc.strip() + "<br><span class='srchsub'>" + page_phonenumb.strip() + "</span>")
                    )
    f.close()
    return page_phones
    def _calc_rating(self, subsfile, file_original_path):
        file_name = os.path.basename(file_original_path)
        folder_name = os.path.split(os.path.dirname(file_original_path))[-1]

        subsfile = re.sub("\W+", ".", subsfile).lower()
        file_name = re.sub("\W+", ".", file_name).lower()
        folder_name = re.sub("\W+", ".", folder_name).lower()
        log(
            __scriptname__,
            "# Comparing Releases:\n [subtitle-rls] %s \n [filename-rls] %s \n [folder-rls] %s"
            % (subsfile, file_name, folder_name),
        )

        subsfile = subsfile.split(".")
        file_name = file_name.split(".")[:-1]
        folder_name = folder_name.split(".")

        if len(file_name) > len(folder_name):
            diff_file = list(set(file_name) - set(subsfile))
            rating = (1 - (len(diff_file) / float(len(file_name)))) * 5
        else:
            diff_folder = list(set(folder_name) - set(subsfile))
            rating = (1 - (len(diff_folder) / float(len(folder_name)))) * 5

        log(
            __scriptname__,
            "\n rating: %f (by %s)" % (round(rating, 1), "file" if len(file_name) > len(folder_name) else "folder"),
        )

        return round(rating, 1)
def doReplacements(script, updateUrl, downloadUrl, pluginName=None):

    script = re.sub("@@INJECTCODE@@", loadCode, script)

    script = script.replace("@@PLUGINSTART@@", pluginWrapperStart)
    script = script.replace("@@PLUGINEND@@", pluginWrapperEnd)

    script = re.sub("@@INCLUDERAW:([0-9a-zA-Z_./-]+)@@", loaderRaw, script)
    script = re.sub("@@INCLUDESTRING:([0-9a-zA-Z_./-]+)@@", loaderString, script)
    script = re.sub("@@INCLUDEMD:([0-9a-zA-Z_./-]+)@@", loaderMD, script)
    script = re.sub("@@INCLUDEIMAGE:([0-9a-zA-Z_./-]+)@@", loaderImage, script)

    script = script.replace("@@BUILDDATE@@", buildDate)
    script = script.replace("@@DATETIMEVERSION@@", dateTimeVersion)

    if resourceUrlBase:
        script = script.replace("@@RESOURCEURLBASE@@", resourceUrlBase)
    else:
        if "@@RESOURCEURLBASE@@" in script:
            raise Exception("Error: '@@RESOURCEURLBASE@@' found in script, but no replacement defined")

    script = script.replace("@@BUILDNAME@@", buildName)

    script = script.replace("@@UPDATEURL@@", updateUrl)
    script = script.replace("@@DOWNLOADURL@@", downloadUrl)

    if pluginName:
        script = script.replace("@@PLUGINNAME@@", pluginName)

    return script
Example #4
1
def convert_notebook(name):
    # Convert the notebook into restructured text suitable for the documentation.
    subprocess.check_call(
        [
            "ipython",
            "nbconvert",
            "--execute",
            "--to",
            "rst",
            os.path.join(docs_dir, "%s.ipynb" % name),
            "--output",
            os.path.join(docs_dir, name),
        ]
    )

    # Unmangle Sphinx cross-references in the tutorial that get mangled by markdown.
    with open(os.path.join(docs_dir, "%s.rst" % name), "r") as file:
        content = file.read()
        content = re.sub(":([^:]+):``([^`]+)``", ":\\1:`\\2`", content)
        content = re.sub("[.][.].*(_[^:]+):", ".. \\1:", content)

        content = (
            """
  .. image:: ../artwork/toyplot.png
    :width: 200px
    :align: right
  """
            + content
        )

    with open(os.path.join(docs_dir, "%s.rst" % name), "w") as file:
        file.write(content)
Example #5
1
        def make_bibtex_citation(entry, template_citation, bibtexclass):

            # define a function to replace the template entry by its value
            def tpl_replace(objtplname):

                tpl_field = re.sub(u"[\{\}]", u"", objtplname.group())

                if tpl_field in TEMPLATE_ALLOWED_FIELDS:
                    if tpl_field in ["pubdate", "timestamp"]:
                        tpl_field = isoformat(entry[tpl_field]).partition("T")[0]
                    elif tpl_field in ["tags", "authors"]:
                        tpl_field = entry[tpl_field][0]
                    elif tpl_field in ["id", "series_index"]:
                        tpl_field = str(entry[tpl_field])
                    else:
                        tpl_field = entry[tpl_field]
                    return tpl_field
                else:
                    return u""

            if len(template_citation) > 0:
                tpl_citation = bibtexclass.utf8ToBibtex(
                    bibtexclass.ValidateCitationKey(re.sub(u"\{[^{}]*\}", tpl_replace, template_citation))
                )

                if len(tpl_citation) > 0:
                    return tpl_citation

            if len(entry["isbn"]) > 0:
                template_citation = u"%s" % re.sub(u"[\D]", u"", entry["isbn"])

            else:
                template_citation = u"%s" % str(entry["id"])

            return bibtexclass.ValidateCitationKey(template_citation)
Example #6
1
def clean_title(s):
    # replace smart quote characters
    s = re.sub(ur"[\u2018\u2019]", "'", s)
    s = re.sub(ur"[\u201C\u201D]", '"', s)
    s = re.sub(u"\u00e2\u20ac\u2122", u"'", s)
    s = re.sub(ur"\xe2\u20ac\u02dc", "'", s)
    return s
Example #7
1
    def _make_request(self, command, parameters=None):

        """Command is a string, parameters is a dictionary"""
        if ":" in self.server:
            host, port = self.server.split(":")
            port = int(port)
        else:
            host = self.server
            port = 8096

        url = "http://" + self.server + "/client/api?"

        if not parameters:
            parameters = {}
        if self.apiKey is not None and self.securityKey is not None:
            return self._make_request_with_auth(command, parameters)
        else:
            parameters["command"] = command
            parameters["response"] = self.responseformat
            querystring = urllib.urlencode(parameters)

        url += querystring

        f = urllib2.urlopen(url)
        data = f.read()
        if self.stripxml == "true":
            data = re.sub("<\?.*\?>", "\n", data)
            data = re.sub("</[a-z]*>", "\n", data)
            data = data.replace(">", "=")
            data = data.replace("=<", "\n")
            data = data.replace("\n<", "\n")
            data = re.sub("\n.*cloud-stack-version=.*", "", data)
            data = data.replace("\n\n\n", "\n")

        return data
def build_pythonxy_plugin(plugin_dir, plugin_version):
    """Build Python(x,y) plugin -- requires Python(x,y) 2.7+
    For Windows platforms only"""
    nsis_files = [
        osp.join(plugin_dir, fname)
        for fname in os.listdir(plugin_dir)
        if osp.splitext(fname)[1] == ".nsi" and fname.startswith("install")
    ]

    vi_version = re.sub(r"[^0-9\.]*", "", plugin_version)
    while len(vi_version.split(".")) < 4:
        # VI_VERSION must match X.X.X.X
        vi_version += ".0"

    for fname in nsis_files:
        text = re.sub(
            r"!define VERSION \"[0-9\.a-zA-Z\_]*\"", '!define VERSION "%s"' % plugin_version, open(fname, "rb").read()
        )
        text = re.sub(r"!define VI_VERSION \"[\$\{\}0-9\.a-zA-Z\_]*\"", '!define VI_VERSION "%s"' % vi_version, text)
        open(fname, "wb").write(text)

    for nsis_exe in (r"C:\Program Files\NSIS\makensis.exe", r"C:\Program Files (x86)\NSIS\makensis.exe"):
        if osp.isfile(nsis_exe):
            break
    else:
        raise RuntimeError("NSIS is not installed on this computer.")

    for fname in nsis_files:
        os.system('"%s" %s' % (nsis_exe, fname))
Example #9
0
def formatBugLinks(value):
    def addLink(match):
        linkApp = match.group(1)
        if linkApp != None:
            linkApp = linkApp.lower()
        linkType = match.group(2).lower()
        linkNum = int(match.group(3))
        if linkType == "topic":
            link = "https://adblockplus.org/forum/viewtopic.php?t=%i" % linkNum
        elif linkApp == None and linkType == "issue":
            link = "https://issues.adblockplus.org/ticket/%i" % linkNum
        elif linkApp == "webkit":
            link = "https://bugs.webkit.org/show_bug.cgi?id=%i" % linkNum
        elif linkApp != None:
            link = "http://code.google.com/p/chromium/issues/detail?id=%i" % linkNum
        else:
            link = "https://bugzilla.mozilla.org/show_bug.cgi?id=%i" % linkNum
        return '<a href="%s">%s</a>' % (link, match.group(0))

    regexp = re.compile(r'(https?://\S+?)([.,:;!?"\']?(?:\s|$))', re.I | re.U)
    regexp2 = re.compile(r"(?:\b(WebKit|Chrome|Chromium)\s+)?\b(bug|issue|topic)\s+(\d+)", re.I | re.U)
    value = unicode(Markup.escape(value))
    value = re.sub(regexp, r'<a href="\1">\1</a>\2', value)
    value = re.sub(regexp2, addLink, value)
    return Markup(value)
Example #10
0
File: scripts.py Project: rubik/pyg
def script_args(dist):
    spec = dist.as_req
    header = get_script_header("", sys.executable)
    for group in "console_scripts", "gui_scripts":
        for name, ep in dist.entry_points_map(group).items():
            script_text = SCRIPT_TEXT.format(**locals())
            if sys.platform == "win32":
                # On Windows/wininst, add a .py extension and an .exe launcher
                if group == "gui_scripts":
                    ext, launcher = "-script.pyw", "gui.exe"
                    new_header = re.sub("(?i)python.exe", "pythonw.exe", header)
                else:
                    ext, launcher = "-script.py", "cli.exe"
                    new_header = re.sub("(?i)pythonw.exe", "python.exe", header)

                if os.path.exists(new_header[2:-1]):
                    hdr = new_header
                else:
                    hdr = header
                yield (name + ext, hdr + script_text, "t")
                yield (
                    name + ".exe",
                    pkg_resources.resource_string("setuptools", launcher),
                    "b",  # write in binary mode
                )
            else:
                # On other platforms, we assume the right thing to do is to
                # just write the stub with no extension.
                yield (name, header + script_text, "")
Example #11
0
def mkLex(keeppos=True, files=allfiles, numbers=False, senseId=False, meld=False, old=False):
    if len(files) == 0:
        return {}
    allentries = []
    lex = {}
    if type(files[0]) == type("fil"):
        print "reading files"
        print "making lexicon"
        for fil in files:
            entries, _ = readIt(fil)
            allentries.append(entries)
    else:
        allentries = files
    for entry in allentries:
        lem = getLem(entry, old)
        lemgram = re.sub("\*|\?", "", lem)
        if not keeppos:
            lem = lem.split(".")[0]
        pos, _ = getTag(entry, old)
        lem1 = lem if not meld else re.sub("\*|\?", "", lem)
        standard = [{"form": lem, "pos": pos, "lemgram": lemgram}]  # "file" : fil,
        attr = 1 if numbers else standard
        insert(lex, lem1, attr)
        if senseId:
            map(lambda (ids, s): insert(lex, re.sub("\*|\?", "", ids), attr), getSenseid(entry))
    print "lexicon complete"
    return lex
Example #12
0
def quotedata(data):
    """Quote data for email.

    Double leading '.', and change Unix newline '\\n', or Mac '\\r' into
    Internet CRLF end-of-line.
    """
    return re.sub(r"(?m)^\.", "..", re.sub(r"(?:\r\n|\n|\r(?!\n))", CRLF, data))
Example #13
0
def generate_html(texto, item):
    f = codecs.open("base.html", "r", "utf-8")
    base_html = f.read()
    f.close()

    f = codecs.open("proyectos_data.json", "r", "utf-8")
    data = json.loads(f.read())
    f.close()

    html = string.replace(base_html, "{% content %}", pre(texto))
    title = re.sub("pdf/", "", item)
    title = title.replace("_", "/")
    title = title.replace(".pdf", "")
    html = string.replace(html, "{% title %}", title)

    for i in data:
        if i["numero_proyecto"] == title:
            titulo = prettify(i)

    html = string.replace(html, "{% titulo %}", titulo)

    html_file = re.sub(".pdf", ".html", item)
    f = codecs.open(html_file, "w", "utf-8")
    f.write(html)
    f.close()
Example #14
0
    def scrape_event_page(self, session, chamber, url, datetime):
        page = self.lxmlize(url)
        info = page.xpath("//p")
        metainf = {}
        plaintext = ""
        for p in info:
            content = re.sub("\s+", " ", p.text_content())
            plaintext += content + "\n"
            if ":" in content:
                key, val = content.split(":", 1)
                metainf[key.strip()] = val.strip()
        ctty = metainf["COMMITTEE"]
        where = metainf["PLACE"]

        plaintext = re.sub("\s+", " ", plaintext).strip()
        regexp = r"(S|J|H)(B|M|R) (\d+)"
        bills = re.findall(regexp, plaintext)

        event = Event(session, datetime, "committee:meeting", ctty, chamber=chamber, location=where, agenda=plaintext)
        event.add_source(url)
        event.add_participant("host", ctty, chamber=chamber)

        for bill in bills:
            chamber, type, number = bill
            bill_id = "%s%s %s" % (chamber, type, number)
            event.add_related_bill(bill_id, type="consideration", description="Bill up for discussion")

        self.save_event(event)
Example #15
0
def stats_file(path):
    if fnmatch.fnmatch(path, "*.input"):
        fname1 = re.sub(".input", ".output", path)
        fname2 = re.sub(".input", ".output_tmp", path)
        return compare_file(fname1, fname2)
    else:
        return (0, 0)
Example #16
0
    def process(self):
        """
        Create the necessary input file links and run mothur command
        """

        if type(self.input_fasta) != list:
            self.input_fasta = [self.input_fasta]
        if type(self.input_counts) != list:
            self.input_counts = [self.input_counts]

        for idx, input_fasta in enumerate(self.input_fasta):

            self.mk_links([input_fasta], self.output_dir)
            self.mk_links([self.input_counts[idx]], self.output_dir)

            input_fasta = os.path.join(self.output_dir, os.path.basename(input_fasta))
            input_counts = os.path.join(self.output_dir, os.path.basename(self.input_counts[idx]))

            groups = self.groups.replace("-", "\-")  # need to escape hiphens in groups name param

            extra_params = {"fasta": input_fasta, "groups": groups, "count": input_counts}
            self.run_cmd("get.groups", extra_params)

            self.output_fasta = re.sub(".fasta$", ".pick.fasta", input_fasta)
            self.output_counts = re.sub(".count_table$", ".pick.count_table", input_counts)
Example #17
0
    def calc(self, irc, msg, args, expr):
        """<expression>

        Uses Google's calculator to calculate the value of <expression>.
        """
        channel = msg.args[0]
        if not ircutils.isChannel(channel):
            channel = None
        url = self._googleUrl(expr, channel)
        h = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36"
        }
        html = utils.web.getUrl(url, headers=h).decode("utf8")
        match = self._calcRe1.search(html)
        if not match:
            match = self._calcRe2.search(html)
            if not match:
                match = self._calcRe3.search(html)
                if not match:
                    irc.reply("I could not find an output from Google Calc for: %s" % expr)
                    return
                else:
                    s = match.group(1)
            else:
                s = match.group(1)
        else:
            s = match.group(1)
        # do some cleanup of text
        s = re.sub(r"<sup>(.*)</sup>&#8260;<sub>(.*)</sub>", r" \1/\2", s)
        s = re.sub(r"<sup>(.*)</sup>", r"^\1", s)
        s = utils.web.htmlToText(s)
        irc.reply("%s = %s" % (expr, s))
Example #18
0
def obfuscate_email(email, linktext=None, autoescape=None):
    """
    Given a string representing an email address,
    returns a mailto link with rot13 JavaScript obfuscation.

    Accepts an optional argument to use as the link text;
    otherwise uses the email address itself.
    """
    if autoescape:
        esc = conditional_escape
    else:
        esc = lambda x: x

    email = re.sub("@", "\\\\100", re.sub("\.", "\\\\056", esc(email))).encode("rot13")

    if linktext:
        linktext = esc(linktext).encode("rot13")
    else:
        linktext = email

    rotten_link = """<script type="text/javascript">document.write \
        ("<n uers=\\\"znvygb:%s\\\">%s<\\057n>".replace(/[a-zA-Z]/g, \
        function(c){return String.fromCharCode((c<="Z"?90:122)>=\
        (c=c.charCodeAt(0)+13)?c:c-26);}));</script>""" % (
        email,
        linktext,
    )
    return mark_safe(rotten_link)
def vector_to_line_stroke(
    image, vector, layer, color="#000000", width=1, capstyle="butt", joinstyle="miter", miterlimit=10
):
    import re, tempfile

    newelements = {
        "stroke": color,
        "stroke-width": width,
        "stroke-linecap": capstyle,
        "stroke-linejoin": joinstyle,
        "stroke-miterlimit": miterlimit,
    }
    svg = pdb.gimp_vectors_export_to_string(image, vector)
    # fix width and height to be resolution (px/inch)-independent
    svg = re.sub(r'(<svg\s[^>]*\swidth\s*=\s*)\S*"', r'\1"%dpx"' % image.width, svg, flags=re.DOTALL)
    svg = re.sub(r'(<svg\s[^>]*\sheight\s*=\s*)\S*"', r'\1"%dpx"' % image.height, svg, flags=re.DOTALL)
    svg = re.sub(r'(<path\s[^>]*)\sstroke\s*=\s*"black"', r"\1", svg, flags=re.DOTALL)
    svg = re.sub(r'(<path\s[^>]*)\sstroke-width\s*=\s*"1"', r"\1", svg, flags=re.DOTALL)
    svg = re.sub(r"(<path\s)", r"\1" + "".join([r'%s="%s" ' % i for i in newelements.items()]), svg, flags=re.DOTALL)
    tmpfile = tempfile.NamedTemporaryFile(suffix=".svg")
    tmpfile.write(svg)
    tmpfile.flush()
    newlayer = pdb.gimp_file_load_layer(image, tmpfile.name)
    tmpfile.close()
    image.add_layer(newlayer)  # needs to be added to the image to be able to copy from
    copyname = pdb.gimp_edit_named_copy(newlayer, "stroke")
    image.remove_layer(newlayer)
    floating_sel = pdb.gimp_edit_named_paste(layer, copyname, True)
    pdb.gimp_floating_sel_anchor(floating_sel)
Example #20
0
    def _clean_text(self, text):
        """ Cleans up text before we make it into an HTML tree:
            1. Nukes <![CDATA stuff.
            2. Nukes XML encoding declarations
            3. Replaces </br> with <br/>
            4. Nukes invalid bytes in input
            5. ?
        """
        # Remove <![CDATA because it causes breakage in lxml.
        text = re.sub(r"<!\[CDATA\[", u"", text)
        text = re.sub(r"\]\]>", u"", text)

        # Remove <?xml> declaration in Unicode objects, because it causes an error:
        # "ValueError: Unicode strings with encoding declaration are not supported."
        # Note that the error only occurs if the <?xml> tag has an "encoding"
        # attribute, but we remove it in all cases, as there's no downside to
        # removing it. This moves our encoding detection to chardet, rather than
        # lxml.
        if isinstance(text, unicode):
            text = re.sub(r"^\s*<\?xml\s+.*?\?>", "", text)

        # Fix </br>
        text = re.sub("</br>", "<br/>", text)

        # Fix invalid bytes (http://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python)
        text = re.sub(u"[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+", "", text)

        return text
Example #21
0
def tamper(payload, **kwargs):
    """
    Adds multiple spaces around SQL keywords

    Notes:
        * Useful to bypass very weak and bespoke web application firewalls
          that has poorly written permissive regular expressions

    Reference: https://www.owasp.org/images/7/74/Advanced_SQL_Injection.ppt

    >>> random.seed(0)
    >>> tamper('1 UNION SELECT foobar')
    '1    UNION     SELECT   foobar'
    """

    retVal = payload

    if payload:
        words = set()

        for match in re.finditer(r"[A-Za-z_]+", payload):
            word = match.group()

            if word.upper() in kb.keywords:
                words.add(word)

        for word in words:
            retVal = re.sub(
                "(?<=\W)%s(?=[^A-Za-z_(]|\Z)" % word,
                "%s%s%s" % (" " * random.randrange(1, 4), word, " " * random.randrange(1, 4)),
                retVal,
            )
            retVal = re.sub("(?<=\W)%s(?=[(])" % word, "%s%s" % (" " * random.randrange(1, 4), word), retVal)

    return retVal
Example #22
0
    def generatePlainText(self):
        resultText = "Join the " + self.city + " Sunday Night Film Club "

        resultText += "(" + self.clubURL + ") this "

        resultText += self.nextSunday.strftime("%A, %b %e") + self.daySuffix
        resultText += " at " + self.showTime + " for " + self.film

        resultText += " at the " + self.location + ". "

        resultText += "Look for " + self.host + " "
        resultText += "wearing "
        resultText += self.wearing
        resultText += " in the theatre lobby about 15 "
        resultText += "minutes before the film. As always, after the film "
        resultText += "we will descend on a local establishment for "
        resultText += "dinner/drinks/discussion.\n\n"

        resultText = textwrap.fill(resultText, 70)
        resultText += "\n\n"

        # hacky attempt at breaking synopsis up into paragraphs...
        r = re.compile("^\r$", re.MULTILINE)
        syn = r.split(self.synopsis)

        for x in syn:
            s = x
            s = re.sub("\r", "", s)
            s = re.sub("\n", "", s)
            s = textwrap.dedent(s).strip()
            resultText += textwrap.fill(s, 70)
            resultText += "\n\n"

        return resultText
Example #23
0
def html_remove_image_history(doc):
    """
  Remove image history and links to information.
  """
    doc = re.sub(r"<h2>Image history</h2>[\s\S]+?</ul>", r"", doc)
    doc = re.sub(r"<h2>Image links</h2>[\s\S]+?</ul>", r"", doc)
    return doc
Example #24
0
def decide_sentence_period(accum):
    """
    accum is the n-grams sized windows of list of tokens to be analysed.
    N-grams size are given in function `decide`
    
    Checking whether the token preceding the period marked as sentence boundary marker in the 
    first-stage classification is one of rare abbreviation
    or
    The preceding and the following tokens form a collocation which is evidence against a sentence
    boundary marker in the middle.
    """
    import re

    global CONTEXT_SIZE, rare_abbreviations
    center = CONTEXT_SIZE
    token = accum[center]

    if token.endswith("<S>"):
        buf = normalize(token[:-4], False)
        # Reclassify rare abbreviations
        if buf in rare_abbreviations:
            accum[center] = re.sub(r"\<S\>", "<A>", accum[center])
            # Check again whether token is also a sentence boundary marker
            accum = decide_abbreviation(accum)
        # There is collocation between following and preceding tokens
        elif decide_collocational(accum) == True:
            # Following token is not a frequent sentence starter
            if decide_sentence_starter(accum) == False:
                # Reclassify the sentence boundary marker as abbreviation and assume no
                # following sentence boundary marker because of the collocation
                accum[center] = re.sub(r"\<S\>$", "<A>", accum[center])

    return accum
Example #25
0
def cleanFunctionNodeString(x):
    """
            Makes FunctionNode strings easier to read
    """
    s = re.sub("lambda", u"\u03BB", str(x))  # make lambdas the single char
    s = re.sub("_", "", s)  # remove underscores
    return s
Example #26
0
def load(sentence, kws, score):
    """
    Fills list of Holder objects for finding patterns
    """
    kws = [kw.rstrip().lstrip() for kw in kws]
    kws = fill_blanks(kws, sentence)
    hs = []
    for kw in kws:
        try:
            hs.append(Holder(kw, sentence.index(kw), True))
        except ValueError:
            # print 'INDEX OF',re.sub('[\W_]+', ' ',kw),'IN',re.sub('[\W_]+', ' ', sentence)
            hs.append(Holder(kw, re.sub("[\W_]+", " ", sentence).index(re.sub("[\W_]+", " ", kw)), True))
    hs.append(Holder(score, sentence.index(score), False, True))
    hs.sort(key=lambda x: x.pos)
    pos = 0
    fillers = []

    for h in hs:
        if h.pos > pos:
            t = sentence[pos : h.pos]
            if any([x.isalpha() for x in t]):
                fillers.append(Holder(t, sentence.index(t)))
        pos = h.pos + len(h.text)
    hs += fillers
    hs.sort(key=lambda x: x.pos)
    return hs
Example #27
0
def procFile(f, prompt):
    global rules, trace
    env = []
    while 1:
        if prompt:
            sys.stdout.write(prompt)
            sys.stdout.flush()
        sent = f.readline()
        if sent == "":
            break
        s = re.sub("#.*", "", sent[:-1])  # clip comments and newline
        s = re.sub(" ", "", s)  # remove spaces
        if s == "":
            continue

        if s[-1] in "?.":
            punc = s[-1]
            s = s[:-1]
        else:
            punc = "."

        if s == "trace=0":
            trace = 0
        elif s == "trace=1":
            trace = 1
        elif s == "quit":
            sys.exit(0)
        elif s == "dump":
            for rule in rules:
                print rule
        elif punc == "?":
            search(Term(s))
        else:
            rules.append(Rule(s))
    def __generate_templates(self):
        current_imports = list(sys.modules.keys())
        count_templates = 0
        error_text = ""

        for template in self.python_templates:
            rel_path = os.path.splitext(os.path.relpath(template, self.path))[0]

            import_path = re.sub(os.path.sep, ".", rel_path)
            try:
                mod = importlib.import_module(import_path)
                outputfile = re.sub(r"_html\.py$", ".html", template)
                with open(outputfile, "w") as of:
                    for line in mod.result():
                        of.write(str(line))
                        of.write(os.linesep)
            except:
                error_text += traceback.format_exc() + os.linesep
            else:
                count_templates += 1

        for new_import in [x for x in sys.modules.keys() if x not in current_imports]:
            del sys.modules[new_import]

        print("Generated %d templates on %s" % (count_templates, datetime.datetime.now()))
        if error_text:
            print(error_text)
Example #29
0
def slugify(in_string):
    """
    Prepare string for slug (i.e. URL or file/dir name)

    @param in_string: input string
    @type in_string: C{basestring}

    @return: slug-string
    @rtype: C{str}

    @raise TypeError: when in_string isn't C{unicode} or C{str}
    @raise ValueError: if in_string is C{str}, but it isn't ascii
    """
    if not isinstance(in_string, basestring):
        raise TypeError("Argument must be basestring, not %s" % type(in_string))
    try:
        u_in_string = unicode(in_string).lower()
    except UnicodeDecodeError:
        raise ValueError(
            "We expects when in_string is str type,"
            + "it is an ascii, but now it isn't. Use unicode "
            + "in this case."
        )
    # convert & to "and"
    u_in_string = re.sub("\&amp\;|\&", " and ", u_in_string)
    # replace spaces by hyphen
    u_in_string = re.sub("[-\s]+", "-", u_in_string)
    # remove symbols that not in alphabet
    u_in_string = u"".join([symb for symb in u_in_string if symb in ALPHABET])
    # translify it
    out_string = translify(u_in_string)
    # remove non-alpha
    return re.sub("[^\w\s-]", "", out_string).strip().lower()
Example #30
0
 def test_get_vm_create_spec(self):
     instance_uuid = uuidutils.generate_uuid()
     fake_instance = {"id": 7, "name": "fake!", "uuid": instance_uuid, "vcpus": 2, "memory_mb": 2048}
     result = vm_util.get_vm_create_spec(fake.FakeFactory(), fake_instance, instance_uuid, "fake-datastore", [])
     expected = """{
         'files': {'vmPathName': '[fake-datastore]',
         'obj_name': 'ns0:VirtualMachineFileInfo'},
         'instanceUuid': '%(instance_uuid)s',
         'name': '%(instance_uuid)s', 'deviceChange': [],
         'extraConfig': [{'value': '%(instance_uuid)s',
                          'key': 'nvp.vm-uuid',
                          'obj_name': 'ns0:OptionValue'}],
         'memoryMB': 2048,
         'obj_name': 'ns0:VirtualMachineConfigSpec',
         'guestId': 'otherGuest',
         'tools': {'beforeGuestStandby': True,
                   'beforeGuestReboot': True,
                   'beforeGuestShutdown': True,
                   'afterResume': True,
                   'afterPowerOn': True,
         'obj_name': 'ns0:ToolsConfigInfo'},
         'numCPUs': 2}""" % {
         "instance_uuid": instance_uuid
     }
     expected = re.sub(r"\s+", "", expected)
     result = re.sub(r"\s+", "", repr(result))
     self.assertEqual(expected, result)