Exemple #1
0
def perlReToPythonRe(s, allowG=False):
    """Converts a string representation of a Perl regular expression (i.e.,
    m/^foo$/i or /foo|bar/) to a Python regular expression.
    """
    opener = closer = _getSep(s, True)
    if opener in '{[(<':
        closer = _closers[_openers.index(opener)]
    opener = re.escape(opener)
    closer = re.escape(closer)
    matcher = re.compile(r'm?%s((?:\\.|[^\\])*)%s(.*)' % (opener, closer))
    try:
        (regexp, flags) = matcher.match(s).groups()
    except AttributeError: # Unpack list of wrong size.
        raise ValueError('Must be of the form m/.../ or /.../')
    regexp = regexp.replace('\\'+opener, opener)
    if opener != closer:
        regexp = regexp.replace('\\'+closer, closer)
    flag = 0
    g = False
    try:
        for c in flags.upper():
            if c == 'G' and allowG:
                g = True
                continue
            flag |= getattr(re, c)
    except AttributeError:
        raise ValueError('Invalid flag: %s' % c)
    try:
        r = re.compile(regexp, flag)
    except re.error as e:
        raise ValueError(str(e))
    if allowG:
        return (r, g)
    else:
        return r
Exemple #2
0
    def name_matches_object(cls, name, task_id=None, exact=True):
        """Determine if a resource name could have been created by this class.

        :param name: The resource name to check against this class's
                     RESOURCE_NAME_FORMAT.
        :param task_id: The task ID that must match the task portion of
                        the random name
        :param exact: If False, then additional information may follow
                      the expected name. (For instance, this is useful
                      when bulk creating instances, since Nova
                      automatically appends a UUID to each instance
                      created thusly.)
        :returns: bool
        """
        match = cls._resource_name_placeholder_re.match(
            cls.RESOURCE_NAME_FORMAT)
        parts = match.groupdict()
        subst = {
            "prefix": re.escape(parts["prefix"]),
            "sep": re.escape(parts["sep"]),
            "suffix": re.escape(parts["suffix"]),
            "chars": re.escape(cls.RESOURCE_NAME_ALLOWED_CHARACTERS),
            "rand_length": len(parts["rand"])}
        if task_id:
            subst["task_id"] = cls._generate_task_id_part(task_id,
                                                          len(parts["task"]))
        else:
            subst["task_id"] = "[%s]{%s}" % (subst["chars"],
                                             len(parts["task"]))
        subst["extra"] = "" if exact else ".*"
        name_re = re.compile(
            "%(prefix)s%(task_id)s%(sep)s"
            "[%(chars)s]{%(rand_length)s}%(suffix)s%(extra)s$" % subst)
        return bool(name_re.match(name))
    def fix_truncation(self, view, words):
        fixed_words = []
        start_time = time.time()

        for i, w in enumerate(words):
            #The word is truncated if and only if it cannot be found with a word boundary before and after

            # this fails to match strings with trailing non-alpha chars, like
            # 'foo?' or 'bar!', which are common for instance in Ruby.
            match = view.find(r'\b' + re.escape(w) + r'\b', 0)
            truncated = match.empty()
            if truncated:
                #Truncation is always by a single character, so we extend the word by one word character before a word boundary
                extended_words = []
                view.find_all(r'\b' + re.escape(w) + r'\w\b', 0, "$0", extended_words)
                if len(extended_words) > 0:
                    fixed_words += extended_words
                else:
                    # to compensate for the missing match problem mentioned above, just
                    # use the old word if we didn't find any extended matches
                    fixed_words.append(w)
            else:
                #Pass through non-truncated words
                fixed_words.append(w)

            # if too much time is spent in here, bail out,
            # and don't bother fixing the remaining words
            if time.time() - start_time > MAX_FIX_TIME_SECS_PER_VIEW:
                return fixed_words + words[i+1:]

        return fixed_words
def remove_punct(text, marks=None, beginning_marks=None, trailing_marks=None):
    """
    Remove punctuation from ``text`` by replacing all instances of ``marks``,
    ``beginning_marks`` or ``trailing_marks`` with an empty string.
    Args:
        text (str): raw text
        marks (str): If specified, remove only the characters in this string,
            e.g. ``marks=',;:'`` removes commas, semi-colons, and colons.
        beginning_marks (str): If specified, remove only the characters in this
            string from the beginning of the text, e.g. ``marks='^'`` removes
            ``^`` from the beginning of text.
        trailing_marks (str): If specified, remove only the characters in this
            string from the end of the text, e.g. ``marks='%'`` removes ``%``
            from the beginning of text. If non the above is given, all punctuation
            marks are removed.
    Returns:
        str
    """
    # First off, replace dashes with white space: import-export banks --> import export banks
    text = re.sub('-', ' ', text, flags=re.UNICODE)
    text = re.sub('  ', ' ', text, flags=re.UNICODE).strip()
    if beginning_marks:
        text = re.sub('^[{}]+'.format(re.escape(beginning_marks)), '', text, flags=re.UNICODE)
    if trailing_marks:
        text = re.sub('$[{}]+'.format(re.escape(trailing_marks)), '', text, flags=re.UNICODE)
    if marks:
        return re.sub('[{}]+'.format(re.escape(marks)), '', text, flags=re.UNICODE)
    else:
        if isinstance(text, unicode_):
            return text.translate(PUNCT_TRANSLATE_UNICODE)
        else:
            return text.translate(None, PUNCT_TRANSLATE_BYTES)
    def cleanup(self):
        self.logger.info('Starting to clean up old Nuclide processes/files.')
        # TODO: Remove it after migration is complete.
        # For migration, stop the forever monitor processes of Nuclide server.
        # This does not stop existing Nuclide server processes themselves.
        # It just removes the monitor so that we can kill them on upgrade.
        for proc in ProcessInfo.get_processes(
                getpass.getuser(), '%s.*%s' %
                (re.escape('forever/bin/monitor'), re.escape('nuclide-main.js'))):
            self.logger.info('Stopping forever monitor process: %s' % proc)
            proc.stop()

        # Clean up multiple Nuclide processes on same port.
        # There should be no more than one on a given port.
        # TODO: log the error to analytics db.
        # { port1 => [proc1, proc2, ...], ...}
        server_proc_map = defaultdict(list)
        # Group the processes by port.
        for proc in NuclideServer.get_processes():
            port = int(proc.get_command_param('port'))
            server_proc_map[port].append(proc)
        for port in server_proc_map:
            if len(server_proc_map[port]) > 1:
                self.logger.warning(
                    'Multiple Nuclide processes on port %d. Something wrong. Clean them up...' %
                    port)
                for proc in server_proc_map[port]:
                    proc.stop()

        self.cleanup_certificates(CERTS_EXPIRATION_DAYS)
        self.logger.info('Finished cleaning up old Nuclide processes/files.')
def fix_truncation(view, words):
    fixed_words = set()
    words_to_fix = set()
    for w in words:
        if len(w) >= MIN_WORD_SIZE:
            words_to_fix.add(w)
        else:
            fixed_words.add(w)

    for w in words_to_fix:
        #The word is truncated if and only if it cannot be found with a word boundary before and after

        # this fails to match strings with trailing non-alpha chars, like
        # 'foo?' or 'bar!', which are common for instance in Ruby.
        truncated = view.find(r'\b' + re.escape(w) + r'\b', 0) is None
        if truncated:
            #Truncation is always by a single character, so we extend the word by one word character before a word boundary
            extended_words = []
            view.find_all(r'\b' + re.escape(w) + r'\w\b', 0, "$0", extended_words)
            if len(extended_words) > 0:
                fixed_words.update(extended_words)
            else:
                # to compensate for the missing match problem mentioned above, just
                # use the old word if we didn't find any extended matches
                fixed_words.add(w)
        else:
            #Pass through non-truncated words
            fixed_words.add(w)
    return fixed_words
Exemple #7
0
def findAll(document, search_text, line_interval=None, case_sensitive=True, word=False):
    """
    Find all occurrences of a given search text (evt. regexp text).
    """
    match_pos = []
    if line_interval is None:
        line_interval = (1, document.numLines()+1)
    else:
        line_interval = (max(1, line_interval[0]), min(document.numLines()+1, line_interval[1]))

    flags = 0
    if not case_sensitive:
       flags = re.IGNORECASE

    if word:
        search_text = r'\b'+re.escape(search_text)+r'\b'
    else:
        search_text = re.escape(search_text)

    allMatches = re.compile(search_text, flags).finditer

    for line_num in range(*line_interval):
        line_text = document.lineText(line_num)
        match_pos.extend( ((line_num, m.start()+1, m.end()+1) for m in allMatches(line_text)) )

    return match_pos
Exemple #8
0
def _setoption(arg):
    import re
    parts = arg.split(':')
    if len(parts) > 5:
        raise _OptionError("too many fields (max 5): %s" % `arg`)
    while len(parts) < 5:
        parts.append('')
    action, message, category, module, lineno = [s.strip()
                                                 for s in parts]
    action = _getaction(action)
    message = re.escape(message)
    category = _getcategory(category)
    module = re.escape(module)
    if module:
        module = module + '$'
    if lineno:
        try:
            lineno = int(lineno)
            if lineno < 0:
                raise ValueError
        except (ValueError, OverflowError):
            raise _OptionError("invalid lineno %s" % `lineno`)
    else:
        lineno = 0
    filterwarnings(action, message, category, module, lineno)
Exemple #9
0
 def replace_section(self, rst, section_name, replacement, remove_header=False):
     if not len(replacement):
         replacement = u"\n"
     elif replacement[-1] != u"\n":
         replacement = u"%s\n" % replacement
     if remove_header:
         replacement = u"%s\n" % replacement
     else:
         replacement = u"\\1\n%s\n" % replacement
     regex = (ur"""(?msx)
     (\n
         %(section_name)s\n
         ([%(header_chars)s])\2[^\n]+\n
     ).*?\n
     (?=(?:
         ^[^\n]+\n
         \2\2\2
       |
         \Z
     ))
     """) % {
         'section_name': re.escape(section_name),
         'header_chars': re.escape('-#=.'),
     }
     return re.sub(regex, replacement, rst)
    def getChapterText(self, url):

        logger.debug('Getting chapter text from: %s' % url)

        soup = self.make_soup(self._fetchUrl(url))

        div = soup.find('div', {'id' : 'fanfiction'})
        if None == div:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)

        #remove all the unnecessary bookmark tags
        [s.extract() for s in div('div',{'class':"tiny_box2"})]

        #is there a review link?
        r = div.find('a',href=re.compile(re.escape("./index.php?act=irv")+".*$"))
        if r is not None:
        #remove the review link and its parent div
            r.parent.extract()

        #There might also be a link to the sequel on the last chapter
        #I'm inclined to keep it in, but the URL needs to be changed from relative to absolute
        #Shame there isn't proper series metadata available
        #(I couldn't find it anyway)
        s = div.find('a',href=re.compile(re.escape("./index.php?act=ovr")+".*$"))
        if s is not None:
            s['href'] = 'http://'+self.getSiteDomain()+'/fanfictions'+s['href'][1:]

        return self.utf8FromSoup(url,div)
Exemple #11
0
 def ignore(self, path, exact=False):
     """Ignores a path."""
     if exact:
         path = "^{0}$".format(re.escape(path))
     else:
         path = re.escape(path)
     self._ignore.append(re.compile(path))
Exemple #12
0
    def wait_for_load_finished_url(self, url, *, timeout=None,
                                   load_status='success'):
        """Wait until a URL has finished loading."""
        __tracebackhide__ = (lambda e: e.errisinstance(
            testprocess.WaitForTimeout))

        if timeout is None:
            if 'CI' in os.environ:
                timeout = 15000
            else:
                timeout = 5000

        # We really need the same representation that the webview uses in its
        # __repr__
        qurl = QUrl(url)
        if not qurl.isValid():
            raise ValueError("Invalid URL {}: {}".format(url,
                                                         qurl.errorString()))
        url = utils.elide(qurl.toDisplayString(QUrl.EncodeUnicode), 100)
        assert url

        pattern = re.compile(
            r"(load status for <qutebrowser\.browser\..* "
            r"tab_id=\d+ url='{url}/?'>: LoadStatus\.{load_status}|fetch: "
            r"PyQt5\.QtCore\.QUrl\('{url}'\) -> .*)".format(
                load_status=re.escape(load_status), url=re.escape(url)))

        try:
            self.wait_for(message=pattern, timeout=timeout)
        except testprocess.WaitForTimeout:
            raise testprocess.WaitForTimeout("Timed out while waiting for {} "
                                             "to be loaded".format(url))
def isGoodResult(name, show, log=True, season=-1):
    """
    Use an automatically-created regex to make sure the result actually is the show it claims to be
    """

    all_show_names = allPossibleShowNames(show, season=season)
    showNames = map(sanitizeSceneName, all_show_names) + all_show_names
    showNames += map(unidecode, all_show_names)

    for curName in set(showNames):
        if not show.is_anime:
            escaped_name = re.sub('\\\\[\\s.-]', '\W+', re.escape(curName))
            if show.startyear:
                escaped_name += "(?:\W+" + str(show.startyear) + ")?"
            curRegex = '^' + escaped_name + '\W+(?:(?:S\d[\dE._ -])|(?:\d\d?x)|(?:\d{4}\W\d\d\W\d\d)|(?:(?:part|pt)[\._ -]?(\d|[ivx]))|Season\W+\d+\W+|E\d+\W+|(?:\d{1,3}.+\d{1,}[a-zA-Z]{2}\W+[a-zA-Z]{3,}\W+\d{4}.+))'
        else:
            escaped_name = re.sub('\\\\[\\s.-]', '[\W_]+', re.escape(curName))
            # FIXME: find a "automatically-created" regex for anime releases # test at http://regexr.com?2uon3
            curRegex = '^((\[.*?\])|(\d+[\.-]))*[ _\.]*' + escaped_name + '(([ ._-]+\d+)|([ ._-]+s\d{2})).*'

        if log:
            logger.log(u"Checking if show " + name + " matches " + curRegex, logger.DEBUG)

        match = re.search(curRegex, name, re.I)
        if match:
            logger.log(u"Matched " + curRegex + " to " + name, logger.DEBUG)
            return True

    if log:
        logger.log(
            u"Provider gave result " + name + " but that doesn't seem like a valid result for " + show.name + " so I'm ignoring it")
    return False
Exemple #14
0
    def run(self):
        """
        Starts the robot's action.
        """
        # regular expression to find the original template.
        # {{vfd}} does the same thing as {{Vfd}}, so both will be found.
        # The old syntax, {{msg:vfd}}, will also be found.
        # The group 'parameters' will either match the parameters, or an
        # empty string if there are none.

        replacements = []
        exceptions = {}

        for old, new in self.templates.iteritems():
            if not pywikibot.getSite().nocapitalize:
                pattern = '[' + re.escape(old[0].upper()) + re.escape(old[0].lower()) + ']' + re.escape(old[1:])
            else:
                pattern = re.escape(old)
            pattern = re.sub(r'_|\\ ', r'[_ ]', pattern)
            templateRegex = re.compile(r'\{\{ *([Tt]emplate:|[mM][sS][gG]:)?' + pattern + r'(?P<parameters>\s*\|.+?|) *}}', re.DOTALL)

            if self.remove:
                replacements.append((templateRegex, ''))
            elif self.subst:
                replacements.append((templateRegex, '{{subst:' + old + '\g<parameters>}}'))
                exceptions['inside-tags']=['ref']
            else:
                replacements.append((templateRegex, '{{' + new + '\g<parameters>}}'))

        replaceBot = replace.ReplaceRobot(self.generator, replacements, exceptions, acceptall = self.acceptAll, addedCat=self.addedCat, editSummary=self.editSummary)
        replaceBot.run()
Exemple #15
0
 def Filter(self, text, filter_type):
     '''
     Filters content of browser by various expressions (type of expression
     is defined by filter_type).
     '''
     if text == '':
         self.values = self.allvalues
     else:
         num = None
         if text.isdigit():
             num = int(text)
         if filter_type == 0:
             match = re.compile('.*%s.*' % re.escape(text), re.I)
         elif filter_type == 1:
             try:
                 match = re.compile(text, re.I)
             except:
                 raise FilterException('Failed to compile regexp')
         elif filter_type == 2:
             text = text.replace('*', '__SEARCH_ALL__')
             text = text.replace('?', '__SEARCH_ONE__')
             text = re.escape(text)
             text = text.replace('\\_\\_SEARCH\\_ALL\\_\\_', '.*')
             text = text.replace('\\_\\_SEARCH\\_ONE\\_\\_', '.')
             match = re.compile('.*%s.*' % text, re.I)
         else:
             raise Exception('Unsupported filter type %s!' % filter_type)
         self.values = [
             item for item in self.allvalues
             if Wammu.Utils.MatchesText(item, match, num)
         ]
     self.SetItemCount(len(self.values))
     self.RefreshView()
     self.ShowRow(0)
Exemple #16
0
 def commandToChangeHostname(cls, oldHostname, newHostname):
     """Build command to change hostname.
     
     Must be root to succeed.
     
     As implemented works in Enterprise Linux versions 6.x.
     
     Clearly, some machines will have more settings that may or may not need changing too.
     
     Example use:
     
         vm = VMwareMachine("~/vmware/examples/example68/example68.vmx")
         VMwareHypervisor.local.start(vm.vmxFilePath)
         vm.sleepUntilSshIsAvailable(ticker=True)
         vm.sshCommand([ElClone.commandToChangeStaticIPAddress("10.123.45.67", "10.123.45.68")])
         vm.portsFile.changeIPAddress("10.123.45.67", "10.123.45.68")
         vm.sleepUntilSshIsAvailable(ticker=True)
         vm.acceptKnownHostKey()
         vm.sshCommand([ElClone.commandToChangeHostname("example67", "example68")])
     
     Return command to change static hostname."""
     if re.search(r"\s", oldHostname):
         raise Exception("not accepting whitespace in hostname ({0})".format(oldHostname))
     if re.search(r"\s", newHostname):
         raise Exception("not accepting whitespace in hostname ({0})".format(newHostname))
     # quite sensitive to quoting and not quoting
     settingReplacementCommand = r"sed -i -e 's/=\"\?" + re.escape(oldHostname) + r"\"\?/=\"" + re.escape(newHostname) + r"\"/'"
     command = settingReplacementCommand + r" '/etc/sysconfig/network'"
     # quite sensitive to quoting and not quoting
     command += r" ; for f in /etc/sysconfig/network-scripts/ifcfg-* ; do " + settingReplacementCommand + r" $f ; done"
     # immediate effect without restart
     command += r" ; hostname " + re.escape(newHostname)
     command = r"if [ `hostname` = " + re.escape(oldHostname) + r" ] ; then " + command + r" ; fi"
     return command
    def test_realm_host_assignation(self):
        """ Test host realm assignation
        Test realms on each host

        :return: None
        """
        with pytest.raises(SystemExit):
            self.setup_with_file('cfg/realms/several_realms.cfg', 'cfg/realms/several_realms.ini')
        self.show_logs()
        assert not self.conf_is_correct

        self.assert_any_cfg_log_match(re.escape(
            "Configuration in hostgroup::in_realm2 is incorrect; "
        ))
        self.assert_any_cfg_log_match(re.escape(
            "host test_host3_hg_realm2 (realm: realm1) is not in the same realm than its hostgroup in_realm2 (realm: realm2)"
        ))

        # self.assert_any_cfg_log_match(re.escape(
        #     "hostgroup in_realm2 got the default realm but it has some hosts that are from different realms"
        # ))

        # Some error messages
        assert len(self.configuration_errors) == 3

        realm1 = self._arbiter.conf.realms.find_by_name('realm1')
        assert realm1 is not None
        realm2 = self._arbiter.conf.realms.find_by_name('realm2')
        assert realm2 is not None

        host = self._arbiter.conf.hosts.find_by_name('test_host_realm1')
        assert realm1.uuid == host.realm

        host = self._arbiter.conf.hosts.find_by_name('test_host_realm2')
        assert realm2.uuid == host.realm
Exemple #18
0
    def wait_for_load_finished(self, path, *, port=None, https=False,
                               timeout=None, load_status='success'):
        """Wait until any tab has finished loading."""
        __tracebackhide__ = True

        if timeout is None:
            if 'CI' in os.environ:
                timeout = 15000
            else:
                timeout = 5000

        url = self.path_to_url(path, port=port, https=https)
        # We really need the same representation that the webview uses in its
        # __repr__
        url = utils.elide(QUrl(url).toDisplayString(QUrl.EncodeUnicode), 100)
        pattern = re.compile(
            r"(load status for <qutebrowser\.browser\.webview\.WebView "
            r"tab_id=\d+ url='{url}/?'>: LoadStatus\.{load_status}|fetch: "
            r"PyQt5\.QtCore\.QUrl\('{url}'\) -> .*)".format(
                load_status=re.escape(load_status), url=re.escape(url)))

        try:
            self.wait_for(message=pattern, timeout=timeout)
        except testprocess.WaitForTimeout:
            raise testprocess.WaitForTimeout("Timed out while waiting for {} "
                                             "to be loaded".format(url))
    def sanitize(self, text):
        """ Parse Wikitext from MediaWiki into clean XML. """

        # delete header and footer from wikitext
        text = re.sub(re.escape("{{#invoke:Mathe für Nicht-Freaks/Seite|")
                      + "(oben|unten)" + re.escape("}}"), "", text)

        # parse wikitext with Parsoid
        result = self.wiki.parse_text(text)

        # parse xml
        result = etree.fromstring(result)

        # handle templates
        result = self.sanitize_templates(result)

        # handle math tags
        for math_tag in result.findall(".//*[@typeof='mw:Extension/math']"):
            xml_replace(math_tag, etree.Element("math", tex=math_tag.get("alt")))

        # delete all id's
        for tag in result.findall(".//*[@id]"):
            del tag.attrib["id"]
    
        # return string of xml
        return etree.tostring(result, pretty_print=True, encoding=str)
Exemple #20
0
 def _adjustParameter(paramString, parameter, newValue):
     retVal = paramString
     match = re.search("%s=(?P<value>[^&]*)" % re.escape(parameter), paramString)
     if match:
         origValue = match.group("value")
         retVal = re.sub("%s=[^&]*" % re.escape(parameter), "%s=%s" % (parameter, newValue), paramString)
     return retVal
Exemple #21
0
 def _randomizeParameter(paramString, randomParameter):
     retVal = paramString
     match = re.search(r"(\A|\b)%s=(?P<value>[^&;]+)" % re.escape(randomParameter), paramString)
     if match:
         origValue = match.group("value")
         retVal = re.sub(r"(\A|\b)%s=[^&;]+" % re.escape(randomParameter), "%s=%s" % (randomParameter, randomizeParameterValue(origValue)), paramString)
     return retVal
Exemple #22
0
    def search(self, q, user):
        required_visibility = self.required_visibility(user)
        query_filter = {"path": self.mongo_path_prefix, "effectiveVisibility": {"$in": tuple(required_visibility)}}
        if q.startswith("album:"):
            album = q[len("album:") :]
            query_filter["type"] = "album"
            query_filter = {
                "$and": [
                    query_filter,
                    {
                        "$or": [
                            {"name": {"$regex": re.compile(re.escape(album), re.IGNORECASE)}},
                            {"title": {"$regex": re.compile(re.escape(album), re.IGNORECASE)}},
                        ]
                    },
                ]
            }
        elif q.startswith("tag:"):
            _id = Es.id_by_name(q[len("tag:") :])
            if _id is None:
                return
            query_filter["type"] = {"$ne": "album"}
            query_filter["tags"] = _id
        else:
            # do a full-text search
            for result in db.command(
                "text", "fotos", search=q, filter=query_filter, limit=96  # dividable by 2, 3 and 4
            )["results"]:
                yield entity(result["obj"])
            return

        # search for album or tag
        for o in fcol.find(query_filter).sort("date", -1):
            yield entity(o)
Exemple #23
0
    def test_add_FailoverMORoute_smpps(self):
        rorder = '10'
        rtype = 'FailoverMORoute'
        cid1 = 'smppuser1'
        typed_cid1 = 'smpps(%s)' % cid1
        cid2 = 'smppuser2'
        typed_cid2 = 'smpps(%s)' % cid2
        fid = 'f1'
        _str_ = ['%s to 2 connectors:' % rtype, '\t- %s' % re.escape(typed_cid1), '\t- %s' % re.escape(typed_cid2)]

        # Add MORoute
        extraCommands = [{'command': 'order %s' % rorder},
                         {'command': 'type %s' % rtype},
                         {'command': 'connectors %s;%s' % (typed_cid1, typed_cid2)},
                         {'command': 'filters %s' % fid}]
        yield self.add_moroute('jcli : ', extraCommands)

        # Make asserts
        expectedList = _str_
        yield self._test('jcli : ', [{'command': 'morouter -s %s' % rorder, 'expect': expectedList}])
        expectedList = ['#Order Type                    Connector ID\(s\)                                  Filter\(s\)',
                        '#%s %s %s     <T>' % (
                        rorder.ljust(5), rtype.ljust(23), (re.escape(typed_cid1) + ', ' + re.escape(typed_cid2)).ljust(48)),
                        'Total MO Routes: 1']
        commands = [{'command': 'morouter -l', 'expect': expectedList}]
        yield self._test(r'jcli : ', commands)
def replaceDeployFile(out_file,template_file,train_file,fix_layers=None):
    f=open(template_file,'rb');
    text=f.read()[:];
    f.close();
    

    text=text.replace('$TRAIN_TXT','"'+train_file+'"');
    if fix_layers is not None:
        start_excludes=[];
        for fix_layer_curr in fix_layers:
            starts = [match.start() for match in re.finditer(re.escape('name: "'+fix_layer_curr), text)]
            assert len(starts)==1;
            # start_excludes=starts[:];
            start_excludes.append(starts[0]);
        starts=[match.start() for match in re.finditer(re.escape('name: '), text)]
        starts=[idx for idx in starts if idx not in start_excludes];
        starts.sort();
        starts=starts[::-1];
        # starts=starts[1:];
        for start in starts:
            string_orig=text[start:];   
            string_orig=string_orig[:string_orig.index('\n')]
            # [:string_orig.rindex('"')+1]
            # print string_orig
            string_new=string_orig[:string_orig.rindex('"')]+'_fix"';
            # print string_new,string_orig
            text=text.replace(string_orig,string_new);


    f=open(out_file,'wb')
    f.write(text);
    f.close();    
Exemple #25
0
def parse_route_template(template):
    rbuilder = ["^"]
    fbuilder = []
    position = 0
    schema = {}

    for match in template_var_re_finditer(template):
        param_name = match.group("name")
        param_type = match.group("type") or "id"
        # TODO: Handle KeyError, maybe we want to use a custom error here.
        param_formatchar, param_re, param_schema = _schema_map[param_type]
        schema[param_name] = param_schema

        rbuilder.append(re.escape(template[position:match.start()]))
        rbuilder.append(param_re.format(param_name))

        fbuilder.append(template[position:match.start()])
        fbuilder.append("{")
        fbuilder.append(param_name)
        fbuilder.append(":")
        fbuilder.append(param_formatchar)
        fbuilder.append("}")

        position = match.end()

    rbuilder.append(re.escape(template[position:]))
    rbuilder.append("$")
    fbuilder.append(template[position:])

    return (valid.Schema(schema),
            re.compile(u"".join(rbuilder)),
            u"".join(fbuilder).format)
def patch_up_variable(origdata, data, origtype, var, ret):
    type = origtype
    var = re.sub(r"\s*=\s*[^;,\)]+", "", var)
    curlybracere = re.compile(r"\s*(\S+)\s*({})\s*(\S*)", re.MULTILINE)
    for var in var.split(","):
        var = var.strip()
        pat = r"%s\s*([^;{]*)%s\s*(%s)" % (re.escape(origtype), re.escape(var), _endpattern)
        end = re.search(pat, data)
        if end.group(2) == "[":
            type += re.search(r"([\[\]]+)", data[end.start():]).group(1)
        i = var.find("[]")
        if i != -1:
            type += var[i:]
            var = var[:i]
        if "<" in type and ">" in type:
            s = r"(%s.+%s)(const)?[^{};]*(%s)" % (type[:type.find("<")+1], type[type.find(">"):], var)
            regex = re.compile(s)
            match = None
            for m in regex.finditer(origdata):
                match = m
            type = match.group(1)
        match = curlybracere.search(var)
        if match:
            if match.group(3):
                var = match.group(3)
                type += " %s" % match.group(1)
            else:
                var = match.group(1)
        ret.append((type, var))
Exemple #27
0
def find_used_variables_in_text(variant, recipe_text, selectors=False):
    used_variables = set()
    recipe_lines = recipe_text.splitlines()
    for v in variant:
        all_res = []
        compiler_match = re.match(r'(.*?)_compiler(_version)?$', v)
        if compiler_match and not selectors:
            compiler_lang = compiler_match.group(1)
            compiler_regex = (
                r"\{\s*compiler\([\'\"]%s[\"\'][^\{]*?\}" % re.escape(compiler_lang)
            )
            all_res.append(compiler_regex)
            variant_lines = [line for line in recipe_lines if v in line or compiler_lang in line]
        else:
            variant_lines = [line for line in recipe_lines if v in line.replace('-', '_')]
        if not variant_lines:
            continue
        v_regex = re.escape(v)
        v_req_regex = '[-_]'.join(map(re.escape, v.split('_')))
        variant_regex = r"\{\s*(?:pin_[a-z]+\(\s*?['\"])?%s[^'\"]*?\}\}" % v_regex
        selector_regex = r"^[^#\[]*?\#?\s\[[^\]]*?(?<![_\w\d])%s[=\s<>!\]]" % v_regex
        conditional_regex = r"(?:^|[^\{])\{%\s*(?:el)?if\s*" + v_regex + r"\s*(?:[^%]*?)?%\}"
        # plain req name, no version spec.  Look for end of line after name, or comment or selector
        requirement_regex = r"^\s+\-\s+%s\s*(?:\s[\[#]|$)" % v_req_regex
        if selectors:
            all_res.extend([selector_regex])
        else:
            all_res.extend([variant_regex, requirement_regex, conditional_regex])
        # consolidate all re's into one big one for speedup
        all_res = r"|".join(all_res)
        if any(re.search(all_res, line) for line in variant_lines):
            used_variables.add(v)
    return used_variables
def generate_manual_patterns_and_replacements():
    manual_replacement_file = open("lib/manual_replacement_library.txt", 'r')
    manual_mapping_0 = []
    manual_mapping_1 = []
    replacement_count=0
    for line in manual_replacement_file:
        #allow # to be a comment
        if(line[0]=='#' or line=='\n'):
            continue
        line_without_newline = remove_eol_pattern.sub('',line)
        line_split = line_without_newline.split("|")
        #An individual re can only hold 100 entities. So split if necessary.
        if(replacement_count<99):
            manual_mapping_0.append((line_split[0],line_split[1].decode('utf-8')))
        else:
            manual_mapping_1.append((line_split[0],line_split[1].decode('utf-8')))
        replacement_count+=1
        
    #multisub, but only done once for speed
    manual_pattern_0 = '|'.join('(%s)' % re.escape(p) for p, s in manual_mapping_0)
    substs_0 = [s for p, s in manual_mapping_0]
    manual_replacements_0 = lambda m: substs_0[m.lastindex - 1]
    manual_pattern_compiled_0 = re.compile(manual_pattern_0, re.UNICODE)
    
    manual_pattern_1 = '|'.join('(%s)' % re.escape(p) for p, s in manual_mapping_1)
    substs_1 = [s for p, s in manual_mapping_1]
    manual_replacement_1 = lambda n: substs_1[n.lastindex - 1]
    manual_pattern_compiled_1 = re.compile(manual_pattern_1, re.UNICODE)
    
    return {'patterns':(manual_pattern_compiled_0, manual_pattern_compiled_1),
            'replacements':(manual_replacements_0, manual_replacement_1)}
Exemple #29
0
    def getValue(self, value, disable_quote):
        """Return the value of this parameter.

        params:
            value: text value
            disable_quote: is quote disabled?
        return value
        """
        # NULL value for any type
        if str(value).upper() == "NULL":
            if self.type.strip().lower() in ("text", "varchar", "character varying"):
                return value + "::text"
            elif self.type.replace(" ", "").lower() in ("text[]", "varchar[]"):
                return value + "::text[]"
            else:
                return value + "::" + self.type.strip()

        if re.search(r"ARRAY\[.*\]", value.upper()):
            value = re.sub("(?i)" + re.escape("NAN"), "'NAN'::double precision", value)
            value = re.sub("(?i)" + re.escape("Infinity"), "'Infinity'::double precision", value)
            if self.type.lower() != "text" and self.type.lower() != "varchar":
                return value + "::" + self.type
        # does the invoke need quote and does the value support quote itself?
        if disable_quote and not self.quote:
            return value
        if self.type.lower() in ("text", "varchar", "character varying") and value.lower() == "empty":
            return "''::text"  # empty string
        else:
            # return "'%s'::%s" % (value, self.type)
            # value = value.replace("ARRAY[","{")
            # value = value.replace("]","}")
            return "$_valString$%s$_valString$::%s" % (value, self.type)
  def SetJavaAssertsEnabled(self, enable):
    """Sets or removes the device java assertions property.

    Args:
      enable: If True the property will be set.

    Returns:
      True if the file was modified (reboot is required for it to take effect).
    """
    # First ensure the desired property is persisted.
    temp_props_file = tempfile.NamedTemporaryFile()
    properties = ''
    if self._adb.Pull(LOCAL_PROPERTIES_PATH, temp_props_file.name):
      properties = file(temp_props_file.name).read()
    re_search = re.compile(r'^\s*' + re.escape(JAVA_ASSERT_PROPERTY) +
                           r'\s*=\s*all\s*$', re.MULTILINE)
    if enable != bool(re.search(re_search, properties)):
      re_replace = re.compile(r'^\s*' + re.escape(JAVA_ASSERT_PROPERTY) +
                              r'\s*=\s*\w+\s*$', re.MULTILINE)
      properties = re.sub(re_replace, '', properties)
      if enable:
        properties += '\n%s=all\n' % JAVA_ASSERT_PROPERTY

      file(temp_props_file.name, 'w').write(properties)
      self._adb.Push(temp_props_file.name, LOCAL_PROPERTIES_PATH)

    # Next, check the current runtime value is what we need, and
    # if not, set it and report that a reboot is required.
    was_set = 'all' in self.RunShellCommand('getprop ' + JAVA_ASSERT_PROPERTY)
    if was_set == enable:
      return False

    self.RunShellCommand('setprop %s "%s"' % (JAVA_ASSERT_PROPERTY,
                                              enable and 'all' or ''))
    return True
Exemple #31
0
def regex_pattern(regex):
    return ".*" + re.escape(regex) + ".*"
Exemple #32
0
def test_register_resolver_error_empty_name(restore_resolvers: Any) -> None:
    with raises(ValueError,
                match=re.escape("cannot use an empty resolver name")):
        OmegaConf.register_new_resolver("", lambda: None)
Exemple #33
0
def test_register_resolver_error_non_callable(restore_resolvers: Any) -> None:
    with raises(TypeError, match=re.escape("resolver must be callable")):
        OmegaConf.register_new_resolver("foo", 0)  # type: ignore
def ordered_char_list_regex(chars):
    """
    Turn a list of characters into a regex pattern that matches them all in order
    """
    return ".*?".join(re.escape(char) for char in chars)
Exemple #35
0
 def pretty_print_path(self, path):
     if self._strip_path:
         return re.sub(re.escape(self._strip_path) + '/*', '', path)
     return path
Exemple #36
0
def weight(reports):
    
    stop_words = set(stopwords.words('english'))

    chars_to_remove = ['?', '!', '[', ']', '`', '\'\'', '<', '>', '(', ')', ',', ':']
    rx = '[' + re.escape(''.join(chars_to_remove)) + ']'

    words = {}
    i = 1
    for report in reports:
        words[i] = []
        for k1 in report.keys():
            if (k1 != 'title'):
                for k2 in report[k1]['text'].keys():
                    sentence = report[k1]['text'][k2]
                    sentence = re.sub(r'(?<!\d)\.(?!\d)', '', sentence)
                    sentence = re.sub(rx, '', sentence)
                    sentence = sentence.lower()
                    
                    word_tokens = word_tokenize(sentence)
                    for w in word_tokens:
                        if w not in stop_words:
                            words[i].append(w)
        
        # print(words[i])
        words[i] = list(filter(None, words[i]))
        words[i] = list(set(words[i]))
        # print(words[i])
        i += 1
        # break

    s_words = {}
    t_words = {}
    r_words = {}
    i = 1
    for report in reports:
        s_words[i] = {}
        t_words[i] = {}
        r_words[i] = {}
        for k1 in report.keys():
            if (k1 != 'title'):
                user = report[k1]['user']
                t_words[i][k1] = []
                if user not in s_words[i]:
                    s_words[i][user]=[]
        
                for k2, v in report[k1]['text'].items():
                    sentence = v
                    sentence = re.sub(r'(?<!\d)\.(?!\d)', '', sentence)
                    sentence = re.sub(rx, '', sentence)
                    sentence = sentence.lower()

                    word_tokens = word_tokenize(sentence)
                    
                    r_words[i][k2] = []
                    temp = []
                    for w in word_tokens:
                        if w not in stop_words:
                            r_words[i][k2].append(w)
                            temp.append(w)

                    s_words[i][user].extend(temp)
                    t_words[i][k1].extend(temp)
        
        i += 1
        # break

    for k1 in s_words.keys():
        for k2 in s_words[k1].keys():
            s_words[k1][k2] = nltk.FreqDist(s_words[k1][k2])

    for k1 in t_words.keys():
        for k2 in t_words[k1].keys():
            t_words[k1][k2] = nltk.FreqDist(t_words[k1][k2])

    pp = pprint.PrettyPrinter(indent=4)
    # pp.pprint(t_words[1])
    # pp.pprint(s_words[1])
    # pp.pprint(r_words[1])

    sprob = {}
    tprob = {}
    for r_key in words.keys():
        sprob[r_key] = {}
        for i in range(len(words[r_key])-1):
            max = 0
            sum = 0
            for u_key in s_words[r_key].keys():
                if words[r_key][i] in s_words[r_key][u_key]:
                    if s_words[r_key][u_key][words[r_key][i]] > max:
                        max = s_words[r_key][u_key][words[r_key][i]]
                    sum += s_words[r_key][u_key][words[r_key][i]]
            if sum != 0:
                sprob[r_key][words[r_key][i]] = max / sum
            else:
                sprob[r_key][words[r_key][i]] = sum

        tprob[r_key] = {}
        for i in range(len(words[r_key])-1):
            max = 0
            sum = 0
            for t_key in t_words[r_key].keys():
                if words[r_key][i] in t_words[r_key][t_key]:
                    if t_words[r_key][t_key][words[r_key][i]] > max:
                        max = t_words[r_key][t_key][words[r_key][i]]
                    sum += t_words[r_key][t_key][words[r_key][i]]
            if sum != 0:
                tprob[r_key][words[r_key][i]] = max / sum
            else:
                tprob[r_key][words[r_key][i]] = sum

    # pp = pprint.PrettyPrinter(indent=4)
    # pp.pprint(tprob[35])
    # pp.pprint(sprob[35])

    return sprob, tprob, r_words
Exemple #37
0
def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = False
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None

    # Read commandline parameters.
    for arg in pywikibot.handleArgs(*args):
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    pywikibot.input(u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        else:
            commandline_replacements.append(arg)
    pywikibot.Site().login()
    gen = genFactory.getCombinedGenerator()
    if (len(commandline_replacements) % 2):
        raise pywikibot.Error, 'require even number of replacements.'
    elif (len(commandline_replacements) == 2 and fix == None):
        replacements.append(
            (commandline_replacements[0], commandline_replacements[1]))
        if not summary_commandline:
            edit_summary = i18n.twtranslate(
                pywikibot.getSite(), 'replace-replacing', {
                    'description':
                    ' (-%s +%s)' %
                    (commandline_replacements[0], commandline_replacements[1])
                })
    elif (len(commandline_replacements) > 1):
        if (fix == None):
            for i in xrange(0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if not summary_commandline:
                pairs = [(commandline_replacements[i],
                          commandline_replacements[i + 1])
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                edit_summary = i18n.twtranslate(
                    pywikibot.getSite(), 'replace-replacing',
                    {'description': replacementsDescription})
        else:
            raise pywikibot.Error(
                'Specifying -fix with replacements is undefined')
    elif fix == None:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        new = pywikibot.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = pywikibot.input(
                u'Please enter another text that should be replaced,' +
                u'\nor press Enter to start:')
            if old == '':
                change += ')'
                break
            new = i18n.input('pywikibot-enter-new-text')
            change += ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline:
            default_summary_message = i18n.twtranslate(pywikibot.getSite(),
                                                       'replace-replacing',
                                                       {'description': change})
            pywikibot.output(u'The summary message will default to: %s' %
                             default_summary_message)
            summary_message = pywikibot.input(
                u'Press Enter to use this default message, or enter a ' +
                u'description of the\nchanges your bot will make:')
            if summary_message == '':
                summary_message = default_summary_message
            edit_summary = summary_message

    else:
        # Perform one of the predefined actions.
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             fixes.fixes.keys())
            return
        if "regex" in fix:
            regex = fix['regex']
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                edit_summary = i18n.twtranslate(pywikibot.getSite(),
                                                str(fix['msg']))
            else:
                edit_summary = pywikibot.translate(pywikibot.getSite(),
                                                   fix['msg'])
        if "exceptions" in fix:
            exceptions = fix['exceptions']
        if "nocase" in fix:
            caseInsensitive = fix['nocase']
        replacements = fix['replacements']

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new

    for exceptionCategory in [
            'title', 'require-title', 'text-contains', 'inside'
    ]:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
            for (old, new) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [
            pywikibot.Page(pywikibot.getSite(), PageTitle)
            for PageTitle in PageTitles
        ]
        gen = iter(pages)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary)
    bot.run()
def _get_process_ids_for_test(context):
    return _get_process_ids_for_processes_matching(
        re.escape("-Dservice.manager.testId=%s" % context.instance_id))
Exemple #39
0
from objutils.utils import createStringBuffer, slicer, PYTHON_VERSION
from objutils import checksums
import objutils.utils as utils

DATA_ABS = 1
DATA_INC = 2
DATA_REL = 3
EOF = 4

PREFIX = '$'

MAPPING = dict(enumerate(chr(n) for n in range(37, 123) if not n in (42, )))
REV_MAPPING = {ord(value): key for key, value in MAPPING.items()}
NULLS = re.compile(r'\0*\s*!M\s*(.*)', re.DOTALL | re.M)
VALID_CHARS = re.compile("^\{0}[{1}]+$".format(
    PREFIX, re.escape(''.join(MAPPING.values()))))

atoi16 = partial(int, base=16)


class Reader(hexfile.Reader):

    FORMAT_SPEC = ((DATA_ABS, "CCLL0000AAAAAAAADD"), (DATA_INC, "CCLL0001DD"),
                   (DATA_REL, "CCLL0002AAAAAAAADD"), (EOF, "00000000"))

    def decode(self, fp):
        self.lastAddress = 0  # TODO: decode!
        outLines = []
        for line in fp.readlines():
            line = line.strip()
            startSym, line = line[0], line[1:]
Exemple #40
0
 def RegexForStringifiedValueMatch(label_name):
   return "(.+,|\\A)%s(,.+|\\Z)" % re.escape(label_name)
Exemple #41
0
    def parse_file(self, filename):
        """
        Parse through individual files looking for issue tags, then generate
        formatted issue hash.
        """
        # Identify method entry.
        debug_print(get_current_method_signature())

        relative_path = filename
        absolute_path = os.path.abspath(filename)

        # Error check on input, use input filename to make sure relative path is
        # correct.
        if FS.check_file(relative_path):
            debug_print("Opened %s for parsing." % absolute_path)
            debug_print("Short path: %s" % relative_path)
        else:
            print "Unable to open %s, exiting" % relative_path
            # [review] - do I really want to return None here? what consumes this?
            return None 

        # Get file extension and set corresponding comment type.
        comment_type = self.get_comment_type(relative_path)
        if not comment_type:
            debug_print("Using default (#) comment type.")
            comment_type = '#'

        # Open file and read in entire thing into an array.
        # Use an array so we can look ahead when creating issues later.
        # [review] - Not sure if explicit file close is required here.
        data = []
        file_to_parse = open(absolute_path, 'r')
        for line in file_to_parse:
            data.append(line.strip())

        # Initialize issue list hash
        issue_list = {}
        issue_list['relative_path'] = relative_path
        issue_list['absolute_path'] = absolute_path
        issue_list['has_issues'] = False
        for tag in self.config.tag_list:
            debug_print("Create array named %s" % tag)
            issue_list[tag] = []

        
        # For each comment type allowed for the file type convert it into a
        # string that can be used in a regex.
        comment_type_re_str = '|'.join('(' + re.escape(ext) + ')' for ext in comment_type)
        issue_re = re.compile(r'^(%s)+?\s+\[(?P<tag_name>\w+)\]\s+-\s+(?P<title_text>.+)' % comment_type_re_str)
        # Loop through all array elements (lines in file) and look for issues
        for i, line in enumerate(data):
            # Find any comment line with [tag] - text (any combination of space
            # and # acceptable).
            # Using if match to stay consistent (with config.py) see there for 
            # explanation of why I do this (not a good good one persay...)
            mtch = issue_re.match(line.strip())
            if not mtch:
                debug_print("No valid tag found in line, skipping")
                continue

            tag = mtch.group('tag_name')

            if tag not in self.config.tag_list:
                Printer.print_status('!', RED)
                print "Unknown tag [%s] found, ignoring" % tag
                print "      You might want to include it in you RC or with the -t/--tags flag"
                continue

            # Found a valid match (with recognized tag).
            # Set flag for this issue_list (for_file) to indicate that.
            issue_list['hase_issues'] = True

            title = mtch.group('title_text')
            debug_print("Issue found")
            debug_print("Tag: %s" % tag)
            debug_print("Issue: %s" % title)

            # Create dict for each issue found.
            issue = {}
            issue['line_number'] = i + 1
            issue['title'] = title

            # Grab context of issue specified by Config param (+1 to include issue iteself.)
            issue['context'] = data[i:i + self.config.context_depth + 1]
 def __doSearch(self):
     """
     Private slot to handle the find button being pressed.
     """
     if self.__replaceMode and \
        not e5App().getObject("ViewManager").checkAllDirty():
         return
     
     self.__cancelSearch = False
     
     if self.filterCheckBox.isChecked():
         fileFilter = self.filterEdit.text()
         fileFilterList = \
             ["^{0}$".format(filter.replace(".", "\.").replace("*", ".*"))
              for filter in fileFilter.split(";")]
         filterRe = re.compile("|".join(fileFilterList))
     
     if self.projectButton.isChecked():
         if self.filterCheckBox.isChecked():
             files = [self.project.getRelativePath(file)
                      for file in
                      self.__getFileList(
                          self.project.getProjectPath(), filterRe)]
         else:
             files = []
             if self.sourcesCheckBox.isChecked():
                 files += self.project.pdata["SOURCES"]
             if self.formsCheckBox.isChecked():
                 files += self.project.pdata["FORMS"]
             if self.interfacesCheckBox.isChecked():
                 files += self.project.pdata["INTERFACES"]
             if self.resourcesCheckBox.isChecked():
                 files += self.project.pdata["RESOURCES"]
     elif self.dirButton.isChecked():
         if not self.filterCheckBox.isChecked():
             filters = []
             if self.sourcesCheckBox.isChecked():
                 filters.extend(
                     ["^{0}$".format(
                         assoc.replace(".", "\.").replace("*", ".*"))
                      for assoc in list(
                          Preferences.getEditorLexerAssocs().keys())
                      if assoc not in self.formsExt + self.interfacesExt])
             if self.formsCheckBox.isChecked():
                 filters.append(self.filterForms)
             if self.interfacesCheckBox.isChecked():
                 filters.append(self.filterInterfaces)
             if self.resourcesCheckBox.isChecked():
                 filters.append(self.filterResources)
             filterString = "|".join(filters)
             filterRe = re.compile(filterString)
         files = self.__getFileList(
             os.path.abspath(self.dirCombo.currentText()),
             filterRe)
     elif self.openFilesButton.isChecked():
         vm = e5App().getObject("ViewManager")
         vm.checkAllDirty()
         files = vm.getOpenFilenames()
     
     self.findList.clear()
     QApplication.processEvents()
     QApplication.processEvents()
     self.findProgress.setMaximum(len(files))
     
     # retrieve the values
     reg = self.regexpCheckBox.isChecked()
     wo = self.wordCheckBox.isChecked()
     cs = self.caseCheckBox.isChecked()
     ct = self.findtextCombo.currentText()
     if reg:
         txt = ct
     else:
         txt = re.escape(ct)
     if wo:
         txt = "\\b{0}\\b".format(txt)
     flags = re.UNICODE | re.LOCALE
     if not cs:
         flags |= re.IGNORECASE
     try:
         search = re.compile(txt, flags)
     except re.error as why:
         E5MessageBox.critical(
             self,
             self.tr("Invalid search expression"),
             self.tr("""<p>The search expression is not valid.</p>"""
                     """<p>Error: {0}</p>""").format(str(why)))
         self.stopButton.setEnabled(False)
         self.findButton.setEnabled(True)
         self.findButton.setDefault(True)
         return
     # reset the findtextCombo
     if ct in self.searchHistory:
         self.searchHistory.remove(ct)
     self.searchHistory.insert(0, ct)
     self.findtextCombo.clear()
     self.findtextCombo.addItems(self.searchHistory)
     Preferences.Prefs.settings.setValue(
         "FindFileDialog/SearchHistory",
         self.searchHistory[:30])
     
     if self.__replaceMode:
         replTxt = self.replacetextCombo.currentText()
         if replTxt in self.replaceHistory:
             self.replaceHistory.remove(replTxt)
         self.replaceHistory.insert(0, replTxt)
         self.replacetextCombo.clear()
         self.replacetextCombo.addItems(self.replaceHistory)
         Preferences.Prefs.settings.setValue(
             "FindFileDialog/ReplaceHistory",
             self.replaceHistory[:30])
     
     if self.dirButton.isChecked():
         searchDir = self.dirCombo.currentText()
         if searchDir in self.dirHistory:
             self.dirHistory.remove(searchDir)
         self.dirHistory.insert(0, searchDir)
         self.dirCombo.clear()
         self.dirCombo.addItems(self.dirHistory)
         Preferences.Prefs.settings.setValue(
             "FindFileDialog/DirectoryHistory",
             self.dirHistory[:30])
     
     # set the button states
     self.stopButton.setEnabled(True)
     self.stopButton.setDefault(True)
     self.findButton.setEnabled(False)
     
     # now go through all the files
     self.__populating = True
     self.findList.setUpdatesEnabled(False)
     progress = 0
     breakSearch = False
     occurrences = 0
     fileOccurrences = 0
     for file in files:
         self.__lastFileItem = None
         found = False
         if self.__cancelSearch or breakSearch:
             break
         
         self.findProgressLabel.setPath(file)
         
         if self.projectButton.isChecked():
             fn = os.path.join(self.project.ppath, file)
         else:
             fn = file
         # read the file and split it into textlines
         try:
             text, encoding, hash = Utilities.readEncodedFileWithHash(fn)
             lines = text.splitlines(True)
         except (UnicodeError, IOError):
             progress += 1
             self.findProgress.setValue(progress)
             continue
         
         # now perform the search and display the lines found
         count = 0
         for line in lines:
             if self.__cancelSearch:
                 break
             
             count += 1
             contains = search.search(line)
             if contains:
                 occurrences += 1
                 found = True
                 start = contains.start()
                 end = contains.end()
                 if self.__replaceMode:
                     rline = search.sub(replTxt, line)
                 else:
                     rline = ""
                 line = self.__stripEol(line)
                 if len(line) > 1024:
                     line = "{0} ...".format(line[:1024])
                 if self.__replaceMode:
                     if len(rline) > 1024:
                         rline = "{0} ...".format(line[:1024])
                     line = "- {0}\n+ {1}".format(
                         line, self.__stripEol(rline))
                 self.__createItem(file, count, line, start, end,
                                   rline, hash)
                 
                 if self.feelLikeCheckBox.isChecked():
                     fn = os.path.join(self.project.ppath, file)
                     self.sourceFile.emit(fn, count, "", start, end)
                     QApplication.processEvents()
                     breakSearch = True
                     break
             
             QApplication.processEvents()
         
         if found:
             fileOccurrences += 1
         progress += 1
         self.findProgress.setValue(progress)
     
     if not files:
         self.findProgress.setMaximum(1)
         self.findProgress.setValue(1)
     
     resultFormat = self.tr("{0} / {1}", "occurrences / files")
     self.findProgressLabel.setPath(resultFormat.format(
         self.tr("%n occurrence(s)", "", occurrences),
         self.tr("%n file(s)", "", fileOccurrences)))
     
     self.findList.setUpdatesEnabled(True)
     self.findList.sortItems(self.findList.sortColumn(),
                             self.findList.header().sortIndicatorOrder())
     self.findList.resizeColumnToContents(1)
     if self.__replaceMode:
         self.findList.header().resizeSection(0, self.__section0Size + 30)
     self.findList.header().setStretchLastSection(True)
     self.__populating = False
     
     self.stopButton.setEnabled(False)
     self.findButton.setEnabled(True)
     self.findButton.setDefault(True)
     
     if breakSearch:
         self.close()
 def doSpecial(self, text, start, end, method):
     pattern = re.compile(
         r'(^|\s|[\[({>|])%s(.*?)%s($|[\])}])?' %
         (re.escape(start), re.escape(end)), re.M | re.S)
     return pattern.sub(method, text)
        banner, location, source_line = tb_lines
        self.assertTrue(banner.startswith('Traceback'))
        self.assertTrue(location.startswith('  File'))
        self.assertTrue(source_line.startswith('    raise'))


cause_message = (
    "\nThe above exception was the direct cause "
    "of the following exception:\n\n")

context_message = (
    "\nDuring handling of the above exception, "
    "another exception occurred:\n\n")

boundaries = re.compile(
    '(%s|%s)' % (re.escape(cause_message), re.escape(context_message)))


class BaseExceptionReportingTests:

    def get_exception(self, exception_or_callable):
        if isinstance(exception_or_callable, Exception):
            return exception_or_callable
        try:
            exception_or_callable()
        except Exception as e:
            return e

    def zero_div(self):
        1/0 # In zero_div
Exemple #45
0
def test_schedule_decorators_bad():
    @solid
    def do_nothing(_):
        pass

    @pipeline
    def foo_pipeline():
        do_nothing()

    with pytest.raises(DagsterInvalidDefinitionError):

        @monthly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_month=32,
            start_date=datetime(year=2019, month=1, day=1),
        )
        def monthly_foo_schedule_over():
            return {}

    with pytest.warns(
        UserWarning,
        match=re.escape(
            "`start_date` must be at the beginning of the first day of the month for a monthly schedule."
        ),
    ):

        @monthly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_month=7,
            start_date=datetime(year=2019, month=1, day=5),
        )
        def monthly_foo_schedule_later_in_month():
            return {}

    with pytest.raises(DagsterInvalidDefinitionError):

        @monthly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_month=0,
            start_date=datetime(year=2019, month=1, day=1),
        )
        def monthly_foo_schedule_under():
            return {}

    with pytest.raises(DagsterInvalidDefinitionError):

        @weekly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_week=7,
            start_date=datetime(year=2019, month=1, day=1),
        )
        def weekly_foo_schedule_over():
            return {}

    with pytest.warns(
        UserWarning,
        match=re.escape("`start_date` must be at the beginning of a day for a weekly schedule."),
    ):

        @weekly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_week=3,
            start_date=datetime(year=2019, month=1, day=1, hour=2),
        )
        def weekly_foo_schedule_start_later_in_day():
            return {}

    with pytest.warns(
        UserWarning,
        match=re.escape("`start_date` must be at the beginning of a day for a daily schedule."),
    ):

        @daily_schedule(
            pipeline_name="foo_pipeline",
            start_date=datetime(year=2019, month=1, day=1, hour=2),
        )
        def daily_foo_schedule_start_later_in_day():
            return {}

    with pytest.warns(
        UserWarning,
        match=re.escape(
            "`start_date` must be at the beginning of the hour for an hourly schedule."
        ),
    ):

        @hourly_schedule(
            pipeline_name="foo_pipeline",
            start_date=datetime(year=2019, month=1, day=1, hour=2, minute=30),
        )
        def hourly_foo_schedule_start_later_in_hour():
            return {}

    with pytest.raises(DagsterInvalidDefinitionError, match="invalid cron schedule"):

        @schedule(cron_schedule="", pipeline_name="foo_pipeline")
        def bad_cron_string(context):
            return {}

    with pytest.raises(DagsterInvalidDefinitionError, match="invalid cron schedule"):

        @schedule(cron_schedule="bad_schedule_two", pipeline_name="foo_pipeline")
        def bad_cron_string_two(context):
            return {}
Exemple #46
0
def test_load_default_location(default_config):
    with pytest.raises(utils.UsageError,
                       match=re.escape(f'Could not find the file {default_config}')):
        Settings.from_yaml_file()
Exemple #47
0
 def _add_option_word(self, directive):
     regex = r"(?P<directive>%s)%s" r"(?P<val>\w+)" % (
         re.escape(directive),
         self._optional_equals,
     )
     self._pr_options.append(_pr_compile(regex))
Exemple #48
0
def test_schedule_decorators_sanity():
    @solid
    def do_nothing(_):
        pass

    @pipeline
    def foo_pipeline():
        do_nothing()

    @schedule(cron_schedule="* * * * *", pipeline_name="foo_pipeline")
    def foo_schedule(context):
        return {}

    @monthly_schedule(
        pipeline_name="foo_pipeline",
        execution_day_of_month=3,
        start_date=datetime(year=2019, month=1, day=1),
    )
    def monthly_foo_schedule():
        return {}

    @weekly_schedule(
        pipeline_name="foo_pipeline",
        execution_day_of_week=1,
        start_date=datetime(year=2019, month=1, day=1),
    )
    def weekly_foo_schedule():
        return {}

    @daily_schedule(
        pipeline_name="foo_pipeline",
        start_date=datetime(year=2019, month=1, day=1),
    )
    def daily_foo_schedule():
        return {}

    @hourly_schedule(
        pipeline_name="foo_pipeline",
        start_date=datetime(year=2019, month=1, day=1),
    )
    def hourly_foo_schedule():
        return {}

    assert not foo_schedule.execution_timezone
    assert not monthly_foo_schedule.execution_timezone
    assert not weekly_foo_schedule.execution_timezone
    assert not hourly_foo_schedule.execution_timezone
    assert not daily_foo_schedule.execution_timezone

    @schedule(
        cron_schedule="* * * * *",
        pipeline_name="foo_pipeline",
        execution_timezone="US/Central",
    )
    def foo_schedule_timezone(context):
        return {}

    assert foo_schedule_timezone.execution_timezone == "US/Central"

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match=re.escape(
            "Invalid execution timezone MadeUpTimeZone for invalid_timezone_foo_schedule"
        ),
    ):

        @daily_schedule(
            pipeline_name="foo_pipeline",
            start_date=datetime(year=2019, month=1, day=1),
            execution_timezone="MadeUpTimeZone",
        )
        def invalid_timezone_foo_schedule():
            return {}
                        if j == 0:
                            core_entities.append(entity)
            catcallall = " ".join([pgTitle, secTitle, caption, " ".join(headers)])
            remained_core_entities = [z for z in core_entities if z in all_entity_set]
            if len(remained_core_entities) < 5:
                tables_ignored += 1
                continue
            seed = remained_core_entities[:1]

            # pdb.set_trace()
            # A1 = eva.find_core_candidates_cat(seed, k)
            # A1 = set()
            # B = eva.find_core_candidates_c(seed, re.escape(catcallall), k)
            # C = eva.find_core_candidates_e(seed, k)
            # pdb.set_trace()
            pall, pee, pce, ple, cand_e, cand_c = eva.rank_core_candidates(seed, re.escape(caption), [re.escape(headers[0])], num=k)
            target_entities = set(remained_core_entities[1:])
            ranked_entities = [1 if z[0] in target_entities else 0 for z in sorted(pall.items(),key=lambda z:z[1],reverse=True)]
            # dev_result[table_id] = [set(seed), B, C, B|C]
            dev_result[table_id] = [set(seed), target_entities, ranked_entities, pall, pee, pce, ple, cand_e, cand_c]
            # pdb.set_trace()
    # pdb.set_trace()
    # for i in range(3):
        # print(np.mean([len(x[0]&x[i+1])/len(x[0]) for _,x in dev_result.items()]), np.mean([len(x[i+1])for _,x in dev_result.items()]))
    print("tables ignored %d"%tables_ignored)
    pdb.set_trace()
    print("map: %f"%mean_average_precision([z[2] for _,z in dev_result.items()]))
    with open(os.path.join(data_dir, "dev_result.pkl"),"wb") as f:
        pickle.dump(dev_result, f)
    print("finish val")
        
Exemple #50
0
    def parseConfigurationFiles(self, prefiles, postfiles, mc = "default"):
        data = bb.data.createCopy(self.basedata)
        data.setVar("BB_CURRENT_MC", mc)

        # Parse files for loading *before* bitbake.conf and any includes
        for f in prefiles:
            data = parse_config_file(f, data)

        layerconf = self._findLayerConf(data)
        if layerconf:
            parselog.debug(2, "Found bblayers.conf (%s)", layerconf)
            # By definition bblayers.conf is in conf/ of TOPDIR.
            # We may have been called with cwd somewhere else so reset TOPDIR
            data.setVar("TOPDIR", os.path.dirname(os.path.dirname(layerconf)))
            data = parse_config_file(layerconf, data)

            layers = (data.getVar('BBLAYERS') or "").split()
            broken_layers = []

            data = bb.data.createCopy(data)
            approved = bb.utils.approved_variables()

            # Check whether present layer directories exist
            for layer in layers:
                if not os.path.isdir(layer):
                    broken_layers.append(layer)

            if broken_layers:
                parselog.critical("The following layer directories do not exist:")
                for layer in broken_layers:
                    parselog.critical("   %s", layer)
                parselog.critical("Please check BBLAYERS in %s" % (layerconf))
                sys.exit(1)

            for layer in layers:
                parselog.debug(2, "Adding layer %s", layer)
                if 'HOME' in approved and '~' in layer:
                    layer = os.path.expanduser(layer)
                if layer.endswith('/'):
                    layer = layer.rstrip('/')
                data.setVar('LAYERDIR', layer)
                data.setVar('LAYERDIR_RE', re.escape(layer))
                data = parse_config_file(os.path.join(layer, "conf", "layer.conf"), data)
                data.expandVarref('LAYERDIR')
                data.expandVarref('LAYERDIR_RE')

            data.delVar('LAYERDIR_RE')
            data.delVar('LAYERDIR')

            bbfiles_dynamic = (data.getVar('BBFILES_DYNAMIC') or "").split()
            collections = (data.getVar('BBFILE_COLLECTIONS') or "").split()
            invalid = []
            for entry in bbfiles_dynamic:
                parts = entry.split(":", 1)
                if len(parts) != 2:
                    invalid.append(entry)
                    continue
                l, f = parts
                invert = l[0] == "!"
                if invert:
                    l = l[1:]
                if (l in collections and not invert) or (l not in collections and invert):
                    data.appendVar("BBFILES", " " + f)
            if invalid:
                bb.fatal("BBFILES_DYNAMIC entries must be of the form {!}<collection name>:<filename pattern>, not:\n    %s" % "\n    ".join(invalid))

            layerseries = set((data.getVar("LAYERSERIES_CORENAMES") or "").split())
            collections_tmp = collections[:]
            for c in collections:
                collections_tmp.remove(c)
                if c in collections_tmp:
                    bb.fatal("Found duplicated BBFILE_COLLECTIONS '%s', check bblayers.conf or layer.conf to fix it." % c)
                compat = set((data.getVar("LAYERSERIES_COMPAT_%s" % c) or "").split())
                if compat and not (compat & layerseries):
                    bb.fatal("Layer %s is not compatible with the core layer which only supports these series: %s (layer is compatible with %s)"
                              % (c, " ".join(layerseries), " ".join(compat)))
                elif not compat and not data.getVar("BB_WORKERCONTEXT"):
                    bb.warn("Layer %s should set LAYERSERIES_COMPAT_%s in its conf/layer.conf file to list the core layer names it is compatible with." % (c, c))

        if not data.getVar("BBPATH"):
            msg = "The BBPATH variable is not set"
            if not layerconf:
                msg += (" and bitbake did not find a conf/bblayers.conf file in"
                        " the expected location.\nMaybe you accidentally"
                        " invoked bitbake from the wrong directory?")
            raise SystemExit(msg)

        data = parse_config_file(os.path.join("conf", "bitbake.conf"), data)

        # Parse files for loading *after* bitbake.conf and any includes
        for p in postfiles:
            data = parse_config_file(p, data)

        # Handle any INHERITs and inherit the base class
        bbclasses  = ["base"] + (data.getVar('INHERIT') or "").split()
        for bbclass in bbclasses:
            data = _inherit(bbclass, data)

        # Nomally we only register event handlers at the end of parsing .bb files
        # We register any handlers we've found so far here...
        for var in data.getVar('__BBHANDLERS', False) or []:
            handlerfn = data.getVarFlag(var, "filename", False)
            if not handlerfn:
                parselog.critical("Undefined event handler function '%s'" % var)
                sys.exit(1)
            handlerln = int(data.getVarFlag(var, "lineno", False))
            bb.event.register(var, data.getVar(var, False),  (data.getVarFlag(var, "eventmask") or "").split(), handlerfn, handlerln)

        data.setVar('BBINCLUDED',bb.parse.get_file_depends(data))

        return data
Exemple #51
0
class AFF4LabelsIndex(aff4.AFF4Volume):
  """Index for objects' labels with vaiorus querying capabilities."""

  # Separator is a character that's not allowed in labels names.
  SEPARATOR = "|"
  ESCAPED_SEPARATOR = re.escape("|")

  def Initialize(self):
    super(AFF4LabelsIndex, self).Initialize()

    self._urns_index = None
    self._used_labels_index = None

  @property
  def urns_index(self):
    if self._urns_index is None:
      self._urns_index = aff4.FACTORY.Create(
          self.urn.Add("urns_index"), "AFF4Index", mode=self.mode,
          token=self.token)

    return self._urns_index

  @property
  def used_labels_index(self):
    if self._used_labels_index is None:
      self._used_labels_index = aff4.FACTORY.Create(
          self.urn.Add("used_labels_index"), "AFF4IndexSet", mode=self.mode,
          token=self.token)

    return self._used_labels_index

  def IndexNameForLabel(self, label_name, label_owner):
    return label_owner + self.SEPARATOR + label_name

  def LabelForIndexName(self, index_name):
    label_owner, label_name = utils.SmartStr(index_name).split(
        self.SEPARATOR, 1)
    return aff4_rdfvalues.AFF4ObjectLabel(name=label_name, owner=label_owner)

  def AddLabel(self, urn, label_name, owner=None):
    if owner is None:
      raise ValueError("owner can't be None")

    index_name = self.IndexNameForLabel(label_name, owner)
    self.urns_index.Add(urn, aff4.AFF4Object.SchemaCls.LABELS, index_name)
    self.used_labels_index.Add(index_name)

  def RemoveLabel(self, urn, label_name, owner=None):
    if owner is None:
      raise ValueError("owner can't be None")

    self.urns_index.DeleteAttributeIndexesForURN(
        aff4.AFF4Object.SchemaCls.LABELS,
        self.IndexNameForLabel(label_name, owner), urn)

  def ListUsedLabels(self, owner=None):
    results = []
    index_results = self.used_labels_index.ListValues()
    for name in index_results:
      label = self.LabelForIndexName(name)
      if label:
        if owner and label.owner != owner:
          continue
        results.append(label)
    return results

  def ListUsedLabelNames(self, owner=None):
    return [x.name for x in self.ListUsedLabels(owner=owner)]

  def FindUrnsByLabel(self, label, owner=None):
    results = self.MultiFindUrnsByLabel([label], owner=owner).values()
    if not results:
      return []
    else:
      return results[0]

  def MultiFindUrnsByLabel(self, labels, owner=None):
    if owner is None:
      owner = ".+"
    else:
      owner = re.escape(owner)

    query_results = self.urns_index.MultiQuery(
        [aff4.AFF4Object.SchemaCls.LABELS],
        [owner + self.ESCAPED_SEPARATOR + re.escape(label) for label in labels])

    results = {}
    for key, value in query_results.iteritems():
      results[self.LabelForIndexName(key)] = value
    return results

  def FindUrnsByLabelNameRegex(self, label_name_regex, owner=None):
    return self.MultiFindUrnsByLabelNameRegex([label_name_regex], owner=owner)

  def MultiFindUrnsByLabelNameRegex(self, label_name_regexes, owner=None):
    if owner is None:
      owner = ".+"
    else:
      owner = re.escape(owner)

    query_results = self.urns_index.MultiQuery(
        [aff4.AFF4Object.SchemaCls.LABELS],
        [owner + self.ESCAPED_SEPARATOR + regex
         for regex in label_name_regexes])

    results = {}
    for key, value in query_results.iteritems():
      results[self.LabelForIndexName(key)] = value
    return results

  def CleanUpUsedLabelsIndex(self):
    raise NotImplementedError()

  def Flush(self, sync=False):
    super(AFF4LabelsIndex, self).Flush(sync=sync)

    self.urns_index.Flush(sync=sync)
    self.used_labels_index.Flush(sync=sync)

  def Close(self, sync=False):
    self.Flush(sync=sync)

    super(AFF4LabelsIndex, self).Close(sync=sync)
Exemple #52
0
    def _prep_regexes(self):
        """Pre-compile regular expressions."""

        self._re_columns = []
        self._pr_options = []

        _final = self.preparer.final_quote

        quotes = dict(
            zip(
                ("iq", "fq", "esc_fq"),
                [
                    re.escape(s) for s in (
                        self.preparer.initial_quote,
                        _final,
                        self.preparer._escape_identifier(_final),
                    )
                ],
            ))

        self._pr_name = _pr_compile(
            r"^CREATE (?:\w+ +)?TABLE +"
            r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($" % quotes,
            self.preparer._unescape_identifier,
        )

        # `col`,`col2`(32),`col3`(15) DESC
        #
        self._re_keyexprs = _re_compile(
            r"(?:"
            r"(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)"
            r"(?:\((\d+)\))?(?: +(ASC|DESC))?(?=\,|$))+" % quotes)

        # 'foo' or 'foo','bar' or 'fo,o','ba''a''r'
        self._re_csv_str = _re_compile(r"\x27(?:\x27\x27|[^\x27])*\x27")

        # 123 or 123,456
        self._re_csv_int = _re_compile(r"\d+")

        # `colname` <type> [type opts]
        #  (NOT NULL | NULL)
        #   DEFAULT ('value' | CURRENT_TIMESTAMP...)
        #   COMMENT 'comment'
        #  COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT)
        #  STORAGE (DISK|MEMORY)
        self._re_column = _re_compile(
            r"  "
            r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
            r"(?P<coltype>\w+)"
            r"(?:\((?P<arg>(?:\d+|\d+,\d+|"
            r"(?:'(?:''|[^'])*',?)+))\))?"
            r"(?: +(?P<unsigned>UNSIGNED))?"
            r"(?: +(?P<zerofill>ZEROFILL))?"
            r"(?: +CHARACTER SET +(?P<charset>[\w_]+))?"
            r"(?: +COLLATE +(?P<collate>[\w_]+))?"
            r"(?: +(?P<notnull>(?:NOT )?NULL))?"
            r"(?: +DEFAULT +(?P<default>"
            r"(?:NULL|'(?:''|[^'])*'|[\w\(\)]+"
            r"(?: +ON UPDATE [\w\(\)]+)?)"
            r"))?"
            r"(?: +(?P<autoincr>AUTO_INCREMENT))?"
            r"(?: +COMMENT +'(?P<comment>(?:''|[^'])*)')?"
            r"(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?"
            r"(?: +STORAGE +(?P<storage>\w+))?"
            r"(?: +(?P<extra>.*))?"
            r",?$" % quotes)

        # Fallback, try to parse as little as possible
        self._re_column_loose = _re_compile(
            r"  "
            r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
            r"(?P<coltype>\w+)"
            r"(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?"
            r".*?(?P<notnull>(?:NOT )NULL)?" % quotes)

        # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))?
        # (`col` (ASC|DESC)?, `col` (ASC|DESC)?)
        # KEY_BLOCK_SIZE size | WITH PARSER name  /*!50100 WITH PARSER name */
        self._re_key = _re_compile(
            r"  "
            r"(?:(?P<type>\S+) )?KEY"
            r"(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?"
            r"(?: +USING +(?P<using_pre>\S+))?"
            r" +\((?P<columns>.+?)\)"
            r"(?: +USING +(?P<using_post>\S+))?"
            r"(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?"
            r"(?: +WITH PARSER +(?P<parser>\S+))?"
            r"(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?"
            r"(?: +/\*(?P<version_sql>.+)\*/ +)?"
            r",?$" % quotes)

        # https://forums.mysql.com/read.php?20,567102,567111#msg-567111
        # It means if the MySQL version >= \d+, execute what's in the comment
        self._re_key_version_sql = _re_compile(
            r"\!\d+ "
            r"(?: *WITH PARSER +(?P<parser>\S+) *)?")

        # CONSTRAINT `name` FOREIGN KEY (`local_col`)
        # REFERENCES `remote` (`remote_col`)
        # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE
        # ON DELETE CASCADE ON UPDATE RESTRICT
        #
        # unique constraints come back as KEYs
        kw = quotes.copy()
        kw["on"] = "RESTRICT|CASCADE|SET NULL|NOACTION"
        self._re_fk_constraint = _re_compile(
            r"  "
            r"CONSTRAINT +"
            r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
            r"FOREIGN KEY +"
            r"\((?P<local>[^\)]+?)\) REFERENCES +"
            r"(?P<table>%(iq)s[^%(fq)s]+%(fq)s"
            r"(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +"
            r"\((?P<foreign>[^\)]+?)\)"
            r"(?: +(?P<match>MATCH \w+))?"
            r"(?: +ON DELETE (?P<ondelete>%(on)s))?"
            r"(?: +ON UPDATE (?P<onupdate>%(on)s))?" % kw)

        # CONSTRAINT `CONSTRAINT_1` CHECK (`x` > 5)'
        # testing on MariaDB 10.2 shows that the CHECK constraint
        # is returned on a line by itself, so to match without worrying
        # about parenthesis in the expresion we go to the end of the line
        self._re_ck_constraint = _re_compile(
            r"  "
            r"CONSTRAINT +"
            r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
            r"CHECK +"
            r"\((?P<sqltext>.+)\),?" % kw)

        # PARTITION
        #
        # punt!
        self._re_partition = _re_compile(r"(?:.*)(?:SUB)?PARTITION(?:.*)")

        # Table-level options (COLLATE, ENGINE, etc.)
        # Do the string options first, since they have quoted
        # strings we need to get rid of.
        for option in _options_of_type_string:
            self._add_option_string(option)

        for option in (
                "ENGINE",
                "TYPE",
                "AUTO_INCREMENT",
                "AVG_ROW_LENGTH",
                "CHARACTER SET",
                "DEFAULT CHARSET",
                "CHECKSUM",
                "COLLATE",
                "DELAY_KEY_WRITE",
                "INSERT_METHOD",
                "MAX_ROWS",
                "MIN_ROWS",
                "PACK_KEYS",
                "ROW_FORMAT",
                "KEY_BLOCK_SIZE",
        ):
            self._add_option_word(option)

        self._add_option_regex("UNION", r"\([^\)]+\)")
        self._add_option_regex("TABLESPACE", r".*? STORAGE DISK")
        self._add_option_regex(
            "RAID_TYPE",
            r"\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+",
        )
def main():
    """
    Import the ngeo apps files
    """

    parser = ArgumentParser(description='import ngeo apps files')

    parser.add_argument('--html', action="store_true", help="Import the html template")
    parser.add_argument('--js', action="store_true", help="Import the javascript controller")
    parser.add_argument('--package', action="store_true", help="Import the package JSON")
    parser.add_argument('interface', metavar='INTERFACE', help="The interface we import")
    parser.add_argument('src', metavar='SRC', help="The ngeo source file")
    parser.add_argument('dst', metavar='DST', help="The destination file")

    args = parser.parse_args()

    with open(args.src) as src:
        data = src.read()

        if args.package:
            ngeo_json_data = loads(data)
            json_data = {}
            json_data["name"] = "{{package}}"
            json_data["version"] = "2.0.0"
            json_data["description"] = "A GeoMapFish project"

            json_data["devDependencies"] = ngeo_json_data["devDependencies"]
            # freeze the ngeo version
            json_data["devDependencies"]["ngeo"] = _get_ngeo_version()
            for package in [
                "angular-jsdoc",
                "angular-mocks",
                "coveralls",
                "gaze",
                "jsdoc",
                "jsdom",
                "karma",
                "karma-coverage",
                "karma-jasmine",
                "karma-phantomjs-launcher",
            ]:
                del json_data["devDependencies"][package]

            data = dumps(json_data, indent=2, sort_keys=True)
            data = _sub(r" +\n", "\n", data)
            data = data + "\n"

        else:
            data = re.sub(r"{{", r"\\{\\{", data)
            data = re.sub(r"}}", r"\\}\\}", data)
            data = re.sub("app", "{{package}}", data)

# temporary disable ...
#        if args.js:
            # Full text search
#            data = _sub(r"datasetTitle: 'Internal',", r"datasetTitle: '{{project}}',", data)

        if args.html:
            data = "<%\n" \
                "from json import dumps\n" \
                "from c2cgeoportal.lib.cacheversion import get_cache_version\n" \
                "%>\n" + \
                data
            # back for ng-app
            data = _sub(r"ng-{{package}}", r"ng-app", data)
            # back for gmf-app- css prefix
            data = _sub(r"gmf-{{package}}-", r"gmf-app-", data, required=False)
            if args.interface == "mobile":
                # back for mobile-web-app-capable
                data = _sub(
                    r"mobile-web-{{package}}-capable",
                    r"mobile-web-app-capable", data
                )
            else:
                data = _sub(
                    r'<img src="image/([^"]+)"( alt="")? ?/>',
                    '<img src="${request.static_url(\'{{package}}:static-ngeo/images/\\1\')}" />',
                    data,
                )
            data = _sub(
                r'<link rel="shortcut icon" href="image/favicon.ico"/>',
                '<link rel="shortcut icon" href="${request.static_url(\'{{package}}:static-ngeo/images/favicon.ico\')}"/>',  # noqa
                data,
            )
            # Styles
            data = _sub(
                r'    <link rel="stylesheet.*/build/{}.css">'.format(args.interface),
                r"""    <link rel="stylesheet" href="${{request.static_url('{{{{package}}}}:static-ngeo/build/{interface}.css')}}" type="text/css">""".format(interface=args.interface),  # noqa
                data,
                count=1,
                flags=re.DOTALL
            )
            # Scripts
            data = _sub(
                r'    <script',
                r"""% if debug:
    <script>
        window.CLOSURE_BASE_PATH = '';
        window.CLOSURE_NO_DEPS = true;
    </script>
    <script""",
                data, count=1
            )
            data = _sub(
                re.escape('    <script src="/@?main=') + ".*" + re.escape('watchwatchers.js"></script>'),
                r"""
    <script src="${{request.static_url('%s/closure/goog/base.js' % request.registry.settings['closure_library_path'])}}"></script>
    <script src="${{request.route_url('deps.js')}}"></script>
    <script>
        goog.require('{{{{package}}}}_{interface}');
    </script>
    <script src="${{request.static_url('{{{{package}}}}:static-ngeo/build/templatecache.js')}}"></script>
    <script src="${{request.static_url('%s/ngeo/utils/watchwatchers.js' % request.registry.settings['node_modules_path'])}}"></script>
    <script>
        {{{{package}}}}.componentsBaseTemplateUrl = '${{request.static_url("{{{{package}}}}:static-ngeo/components")}}';
        // {{{{package}}}}.partialsBaseTemplateUrl = '${{request.static_url("{{{{package}}}}:static-ngeo/partials")}}';
        // {{{{package}}}}.baseTemplateUrl = '${{request.static_url("{{{{package}}}}:static-ngeo/js")}}';
    </script>
% else:
    <script src="${{request.static_url('{{{{package}}}}:static-ngeo/build/{interface}.js')}}"></script>
% endif""".format(interface=args.interface),  # noqa
                data,
                count=1,
                flags=re.DOTALL
            )
            data = _sub(
                '{}([^"]+){}(.*){}'.format(
                    re.escape('<script src="../../../../node_modules/'),
                    re.escape('"'),
                    re.escape("></script>"),
                ),
                r"""<script src="${request.static_url('%s/\1' % request.registry.settings['node_modules_path'])}"\2></script>""",  # noqa
                data,
            )
            data = _sub(
                '{}([^"]+){}(.*){}'.format(
                    re.escape('<script src="../../../../'),
                    re.escape('"'),
                    re.escape("></script>"),
                ),
                r"""<script src="${request.static_url('%s/ngeo/\1' % request.registry.settings['node_modules_path'])}"\2></script>""",  # noqa
                data,
            )
            # i18n
            data = _sub(
                "module.constant\('defaultLang', 'en'\);",
                "module.constant('defaultLang', "
                "'${request.registry.settings[\"default_locale_name\"]}');",
                data,
            )
            data = _sub(re.escape(r"""
        var cacheVersion = '0';
"""), "", data)
            data = _sub(
                re.escape(r"""
        var angularLocaleScriptUrlElements = urlElements.slice(0, urlElements.length - 3);
        angularLocaleScriptUrlElements.push('build', 'angular-locale_\{\{locale\}\}.js?cache_version=' + cacheVersion);"""),  # noqa
                "",
                data,
            )
            data = _sub(
                re.escape(
                    "gmfModule.constant('angularLocaleScript', "
                    "angularLocaleScriptUrlElements.join('/'));"
                ),
                "gmfModule.constant('angularLocaleScript', "
                "'${request.static_url('{{package}}:static-ngeo/build/')}"
                "angular-locale_\{\{locale\}\}.js');",
                data,
            )
            data = _sub(
                re.escape("""
        var langUrls = {};
        ['en', 'fr', 'de'].forEach(function(lang) {
            var langUrlElements = urlElements.slice(0, urlElements.length - 3);
            langUrlElements.push('build', 'gmf-' + lang + '.json?cache_version=' + cacheVersion)
            langUrls[lang] = langUrlElements.join('/')
        });"""),
                r"""        var langUrls = {
${ ',\\n'.join([
    "          '{lang}': '{url}'".format(
        lang=lang,
        url=request.static_url('{{package}}:static-ngeo/build/{lang}.json'.format(lang=lang))
    )
    for lang in request.registry.settings["available_locale_names"]
]) | n}
        };""",
                data,
            )
            data = _sub(
                re.escape("module.constant('cacheVersion', cacheVersion);"),
                "module.constant('cacheVersion', '${get_cache_version()}');",
                data,
            )
            data = _subs(
                [(
                    "module.constant\('gmfSearchGroups', \[\]\);",
                    False
                ), (
                    "module.constant\('gmfSearchGroups', \[[^\]]*\]\);",
                    "module.constant('gmfSearchGroups', ${dumps(fulltextsearch_groups) | n});",
                )],
                data,
            )

            # replace routes
            for constant, url_end, route, required in [
                ("authenticationBaseUrl", r"", "base", True),
                ("fulltextsearchUrl", r"/fulltextsearch", "fulltextsearch", True),
                ("gmfRasterUrl", r"/raster", "raster", args.interface != "mobile"),
                ("gmfProfileCsvUrl", r"/profile.csv", "profile.csv", args.interface != "mobile"),
                ("gmfProfileJsonUrl", r"/profile.json", "profile.json", args.interface != "mobile"),
                ("gmfPrintUrl", r"/printproxy", "printproxy", args.interface != "mobile"),
                ("gmfTreeUrl", r"/themes", "themes", True),
                ("gmfShortenerCreateUrl", r"/short/create", "shortener_create", args.interface != "mobile"),
            ]:
                data = _sub(
                    r"module.constant\('%s', "
                    "'https://geomapfish-demo.camptocamp.net/2.[0-9]/wsgi%s\??([^\']*)'\);" % (
                        constant, url_end
                    ),
                    _RouteDest(constant, route),
                    data,
                    required=required,
                )
            data = _sub(
                re.escape("module.constant('gmfContextualdatacontentTemplateUrl', window.location.pathname + 'contextualdata.html');"),  # noqa
                "module.constant('gmfContextualdatacontentTemplateUrl', {{package}}.componentsBaseTemplateUrl + '/contextualdata/contextualdata.html');",  # noqa
                data, required=False
            )
            data = _sub(
                re.escape("module.value('ngeoWfsPermalinkOptions',") + ".*defaultFeatureNS",
                """module.value('ngeoWfsPermalinkOptions', /** @type {ngeox.WfsPermalinkOptions} */ ({
              url: '${request.route_url('mapserverproxy') | n}',
              wfsTypes: ${dumps(wfs_types) | n},
              defaultFeatureNS""",
                data,
                count=1,
                flags=re.DOTALL,
            )
            data = _sub(
                re.escape("module.constant('defaultTheme', 'OSM');"),
                "module.constant('defaultTheme', 'Demo');",
                data,
            )

        with open(args.dst, "wt") as dst:
            dst.write(data)
Exemple #54
0
 def getSiteURLPattern(self):
     return r"https?://(www.)?" + re.escape(self.getSiteDomain() +
                                            "/viewstory.php?sid=") + r"\d+$"
Exemple #55
0
def get_standardize(input_data):
    lower_case = tf.strings.lower(input_data)
    cleaned_data = tf.strings.regex_replace(lower_case, '<br />', '')
    regex_data = tf.strings.regex_replace(
        cleaned_data, '[%s]' % re.escape(string.punctuation), '')
    return regex_data
import tempfile

from project.preprocessing.get_input import ProcessInput
import os, subprocess
import re, logging
from pathlib import Path
from langdetect import detect
import enchant, spacy, json
from collections import Counter
from timeit import default_timer as timer
from langdetect import DetectorFactory
DetectorFactory.seed = 0

control_chars = ''.join(map(chr, list(range(0, 32)) + list(range(127, 160))))
control_char_re = re.compile('[%s]' % re.escape(control_chars))


def remove_control_chars(s):
    return control_char_re.sub('', s)


class PublicationPreprocessing:
    def __init__(self, path='data/input/'):
        self.nlp = spacy.load('en', disable=['ner', 'textcat'])
        self.pi = ProcessInput()
        self.pub_df = self.pi.load_publication_input(path=path)

    def write_nounPharses_to_file(self, np_dict, file_name):
        """
        Writes the dict to a file on the disk.
        :param np_dict: a dict containing noun phrases present in the relevant sections of a publication
Exemple #57
0
# Configuration for urlize() function.
TRAILING_PUNCTUATION = ['.', ',', ':', ';']
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('&lt;', '&gt;')]

# List of possible strings used for bullets in bulleted lists.
DOTS = [u'&middot;', u'*', u'\u2022', u'&#149;', u'&bull;', u'&#8226;']

unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
word_split_re = re.compile(r'(\s+)')
simple_url_re = re.compile(r'^https?://\w')
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$')
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
del x # Temporary variable

def escape(html):
    return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;').replace("`", "&#145;")
    
def urlize(text, trim_url_limit = None, nofollow = False, autoescape = False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.
Exemple #58
0
        scheme = data.group('scheme')
        hostname = data.group('hostname')
        return (path, scheme, hostname)
    return None


def dns_dig_records(hostname):

    try:
        _dig = check_output(['dig', '@8.8.8.8', 'ANY', hostname])
    except CalledProcessError, e:
        _dig = e.output

    results = [
        r.groupdict() for r in re.finditer(
            re.escape(hostname) +
            '\.\s+\d+\s+\w+\s+(?P<record_type>\w+)\s+(?P<record>.+)', _dig)
    ]
    records = {}
    for r in results:
        if r['record_type'] in records:
            records[r['record_type']].append(r['record'])
        else:
            records[r['record_type']] = [r['record']]
    return records


def url_get_host(url):
    hostname = split_url(url)[2]
    if hostname == "":
        return None
class Dictionary():
    sentence_punctuation = {'.','?','!','\n'}
    _TYPE_BASIC = 'basic'
    _TYPE_PRE = 'pre'
    _TYPE_POST = 'post'
    
    def __init__(self, filename, use_long_category_names=True, internal_category_list=None):
        """@param internal_category_list: Should be None or '2001' or '2007' """
        self._stems = dict()#this is a prefix tree for the stems, the leaves are sets of categories
        self._lookup = defaultdict(dict) #word->type->????->{categories} 
                                                    #type can be one of "basic", "pre", "post". 
                                                    #basic leads to a set of categories, 
                                                    #pre and post lead to a list of tuples of (conditions, if_true categories, if_false categories)
        self._ignored=set() #caches words that are searched for but not found, this favors processing over memory
        
        self._setup_category_lookup(internal_category_list, use_long_category_names)
        try:
            self.load_dictionary_file(filename, internal_category_list)
        except:
            sys.stderr.writelines(["Failed to load dictionary file: "+filename+"\n",
                                   "Is the dictionary file correct?\n",
                                   "Does a % precede the category list?\n",
                                   "If there is no category list, did you set internal_category_list='2007' ?\n",
                                   "Hope this helps...\n"])
            raise
    
    
    _dictionary_line_re =  re.compile(r'^(\w+)(\*?)\s*(.*)$')
    _dictionary_line_categories_re = re.compile(r'(\d+|\<(\w+(\s+\w+)*)\>(\d+)(\/(\d+))?|\(\s*(\d+(\s+\d+)*)\s*\)(\d+)(\/(\d+))?)')
    def load_dictionary_file(self, filename, internal_category_list=None):
        category_mode = False
        for line in open(filename):
            line = line.strip()
            
            if line=='' or line.startswith('#'): 
                continue
            if line.startswith('%'):
                category_mode = not category_mode
                continue
            
            if category_mode:
                if internal_category_list == None:
                    number, category_name = line.split()
                    category_name = self._translate_category_name(category_name)
                    self._category_lookup[int(number)]=category_name
                continue
            
            word, is_stem, all_word_categories = Dictionary._dictionary_line_re.match(line).groups()
            for category_group in Dictionary._dictionary_line_categories_re.findall(all_word_categories):
                category = category_group[0]
                if category == '00':
                    continue
                elif category.isdigit():
                    if is_stem=='*':
                        self._add_stemmed(word, self._category_lookup[int(category)])
                    else:
                        if Dictionary._TYPE_BASIC not in self._lookup[word]:
                            self._lookup[word][Dictionary._TYPE_BASIC]=set()
                        self._lookup[word][Dictionary._TYPE_BASIC].add(self._category_lookup[int(category)])
                
                elif '(' in category or '<' in category: #convoluted special cases lead to much of the complexity in this program
                    junk, post, junk, if_post, junk, if_not_post, pre, junk, if_pre, junk, if_not_pre = category_group
                    if pre != '':
                        entry_type = Dictionary._TYPE_PRE
                        conditions = sorted([self._category_lookup[int(number)] for number in pre.split()])
                        if_true = self._category_lookup[int(if_pre)]
                        if if_not_pre != '':
                            if_not_true = self._category_lookup[int(if_not_pre)]
                    elif post != '':
                        entry_type = Dictionary._TYPE_POST
                        conditions = sorted(post.lower().split())
                        if_true = self._category_lookup[int(if_post)]
                        if if_not_post != '':
                            if_not_true = self._category_lookup[int(if_not_post)]
                        
                    if entry_type not in self._lookup[word]:
                            self._lookup[word][entry_type]=list()
                    
                    for other_conditions, other_if_set, other_if_not_set in self._lookup[word][entry_type]:
                        if str(other_conditions)==str(conditions): #a little costly on load means less on use
                            other_if_set.add(if_true)
                            other_if_not_set.add(if_not_true)
                            break
                    else: #for else means the for ended naturally
                        self._lookup[word][entry_type].append( (conditions, {if_true}, {if_not_true}) )
    
    def _translate_category_name(self, category_name):
        if category_name.lower() in self._category_name_lookup:
            return self._category_name_lookup[category_name.lower()]
        return category_name
    
    def _add_stemmed(self, word, category):
        current_node = self._stems
        for char in word[:-1]:
            if char not in current_node:
                current_node[char]=dict()
            current_node = current_node[char]
        if word[-1] not in current_node:
            current_node[word[-1]]=set()
        current_node = current_node[word[-1]]

        current_node.add(category)
    
    _pure_punctuation_re = re.compile('^['+re.escape(string.punctuation)+']+$')
    _punctuation_of_interest = {'?':'Question Marks', '!':'Exclamation Marks', '"':'Quote Marks',
                                ',':'Comma',':':'Colon',';':'Semicolon','-':'Dash','\'':'Apostrophe',
                                '(':'Parenthesis', ')':'Parenthesis', '{':'Parenthesis', '}':'Parenthesis', '[':'Parenthesis', ']':'Parenthesis' }
    def score_word(self, word, previous_word=None, next_word=None):
        scores = Counter()
        if word is None:
            return scores
        
        if '\n' in word:
            scores['Newlines']+=1
            
        word = word.strip().lower()
        
        if len(word)==0:
            pass
        elif word[0].isdigit():
            scores['Word Count']+=1
            scores['Numerals']+=1
        elif Dictionary._pure_punctuation_re.match(word):
            scores['All Punctuation']+=1
            for char in word:
                if char in Dictionary._punctuation_of_interest:
                    scores[Dictionary._punctuation_of_interest[char]]+=1
                else:
                    scores['Other Punctuation']+=1
        else:
            scores['Word Count']+=1
            if len(word) > 6:
                scores['Six Letter Words'] += 1
            if word not in self._ignored:
                if word in self._lookup:
                    for entry_type in self._lookup[word]:
                        if entry_type==Dictionary._TYPE_BASIC:
                            scores.update(self._lookup[word][entry_type])
                        else:
                            for conditions, if_set, if_not_set in self._lookup[word][entry_type]:
                                if ((entry_type==Dictionary._TYPE_PRE and not set(self.score_word(word=previous_word, next_word=word).keys()).isdisjoint(set(conditions))) or 
                                    (entry_type==Dictionary._TYPE_POST and next_word is not None and next_word.lower() in conditions)):
                                    scores.update(if_set)
                                else:
                                    scores.update(if_not_set)
                else:
                    current_node = self._stems
                    for char in word:
                        if char in current_node:
                            current_node = current_node[char]
                            if isinstance(current_node, set):
                                if Dictionary._TYPE_BASIC not in self._lookup[word]:
                                    self._lookup[word][Dictionary._TYPE_BASIC]=set()
                                self._lookup[word][Dictionary._TYPE_BASIC].update(current_node) #add to main lookup for time efficiency
                                scores.update(self._lookup[word][Dictionary._TYPE_BASIC])
                                break
                        else:
                            self._ignored.add(word) #dead end
                            break
                    else:
                        self._ignored.add(word) #not found but didn't hit a dead end

                if word not in self._ignored: #Note this is "still not in"
                    scores['Dictionary Words']+=1
        return scores
    
    def _setup_category_lookup(self, internal_category_list, use_long_category_names):
        self._category_name_lookup = dict()
        if use_long_category_names:
            for long_name, LIWC2007_number, LIWC2007_short, LIWC2001_number, LIWC2001_short in Dictionary._liwc_categories:
                if LIWC2001_short is not None:
                    self._category_name_lookup[LIWC2001_short]=long_name
                if LIWC2007_short is not None:
                    self._category_name_lookup[LIWC2007_short]=long_name
        
        self._category_lookup = dict()
        if internal_category_list is not None:
            for long_name, LIWC2007_number, LIWC2007_short, LIWC2001_number, LIWC2001_short in Dictionary._liwc_categories:
                if internal_category_list == '2001' and LIWC2001_number is not None:
                    self._category_lookup[LIWC2001_number]=self._translate_category_name(LIWC2001_short)
                if internal_category_list == '2007' and LIWC2007_number is not None:
                    self._category_lookup[LIWC2007_number]=self._translate_category_name(LIWC2007_short)
        
    #In case it is needed:
    #(long_name, LIWC2007_number, LIWC2007_short, LIWC2001_number, LIWC2001_short)
    _liwc_categories =  [
    ('Total Function Words',1,'funct',None,None), 
    ('Total Pronouns',2,'pronoun',1,'pronoun'), 
    ('Personal Pronouns',3,'ppron',None,None), 
    ('First Person Singular',4,'i',2,'i'), 
    ('First Person Plural',5,'we',3,'we'), 
    ('Second Person',6,'you',5,'you'), 
    ('Third Person Singular',7,'shehe',None,None), 
    ('Third Person Plural',8,'they',None,None), 
    (' Impersonal Pronouns',9,'ipron',None,None), 
    ('Articles',10,'article',9,'article'), 
    ('Common Verbs',11,'verb',None,None), 
    ('Auxiliary Verbs',12,'auxverb',None,None), 
    ('Past Tense',13,'past',38,'past'), 
    ('Present Tense',14,'present',39,'present'), 
    ('Future Tense',15,'future',40,'future'), 
    ('Adverbs',16,'adverb',None,None), 
    ('Prepositions',17,'preps',10,'preps'), 
    ('Conjunctions',18,'conj',None,None), 
    ('Negations',19,'negate',7,'negate'), 
    ('Quantifiers',20,'quant',None,None), 
    ('Number',21,'number',11,'number'), 
    ('Swear Words',22,'swear',66,'swear'), 
    ('Social Processes',121,'social',31,'social'), 
    ('Family',122,'family',35,'family'), 
    ('Friends',123,'friend',34,'friends'), 
    ('Humans',124,'humans',36,'humans'), 
    ('Affective Processes',125,'affect',12,'affect'), 
    ('Positive Emotion',126,'posemo',13,'posemo'), 
    ('Negative Emotion',127,'negemo',16,'negemo'), 
    ('Anxiety',128,'anx',17,'anx'), 
    ('Anger',129,'anger',18,'anger'), 
    ('Sadness',130,'sad',19,'sad'), 
    ('Cognitive Processes',131,'cogmech',20,'cogmech'), 
    ('Insight',132,'insight',22,'insight'), 
    ('Causation',133,'cause',21,'cause'), 
    ('Discrepancy',134,'discrep',23,'discrep'), 
    ('Tentative',135,'tentat',25,'tentat'), 
    ('Certainty',136,'certain',26,'certain'), 
    ('Inhibition',137,'inhib',24,'inhib'), 
    ('Inclusive',138,'incl',44,'incl'), 
    ('Exclusive',139,'excl',45,'excl'), 
    ('Perceptual Processes',140,'percept',27,'senses'), 
    ('See',141,'see',28,'see'), 
    ('Hear',142,'hear',29,'hear'), 
    ('Feel',143,'feel',30,'feel'), 
    ('Biological Processes',146,'bio',None,None), 
    ('Body',147,'body',61,'body'), 
    ('Health',148,'health',None,None), 
    ('Sexual',149,'sexual',62,'sexual'), 
    ('Ingestion',150,'ingest',63,'eating'), 
    ('Relativity',250,'relativ',None,None), 
    ('Motion',251,'motion',46,'motion'), 
    ('Space',252,'space',41,'space'), 
    ('Time',253,'time',37,'time'), 
    ('Work',354,'work',49,'job'), 
    ('Achievement',355,'achieve',50,'achieve'), 
    ('Leisure',356,'leisure',51,'leisure'), 
    ('Home',357,'home',52,'home'), 
    ('Money',358,'money',56,'money'), 
    ('Religion',359,'relig',58,'relig'), 
    ('Death',360,'death',59,'death'), 
    ('Assent',462,'assent',8,'assent'), 
    ('Nonfluencies',463,'nonfl',67,'nonfl'), 
    ('Fillers',464,'filler',68,'fillers'), 
    ('Total first person',None,None,4,'self'), 
    ('Total third person',None,None,6,'other'), 
    ('Positive feelings',None,None,14,'posfeel'), 
    ('Optimism and energy',None,None,15,'optim'), 
    ('Communication',None,None,32,'comm'), 
    ('Other references to people',None,None,33,'othref'), 
    ('Up',None,None,42,'up'), 
    ('Down',None,None,43,'down'), 
    ('Occupation',None,None,47,'occup'), 
    ('School',None,None,48,'school'), 
    ('Sports',None,None,53,'sports'), 
    ('TV',None,None,54,'tv'), 
    ('Music',None,None,55,'music'), 
    ('Metaphysical issues',None,None,57,'metaph'), 
    ('Physical states and functions',None,None,60,'physcal'), 
    ('Sleeping',None,None,64,'sleep'), 
    ('Grooming',None,None,65,'groom')]
    def delimitedTextData(self, testname, filename, requests, verbose,
                          **params):
        # Retrieve the data for a delimited text url
        # Create a layer for the specified file and query parameters
        # and return the data for the layer (fields, data)

        filepath = os.path.join(unitTestDataPath("delimitedtext"), filename)
        url = MyUrl.fromLocalFile(filepath)
        if not requests:
            requests = [{}]
        for k in list(params.keys()):
            url.addQueryItem(k, params[k])
        urlstr = url.toString()
        log = []
        with MessageLogger('DelimitedText') as logger:
            if verbose:
                print(testname)
            layer = QgsVectorLayer(urlstr, 'test', 'delimitedtext')
            uri = layer.dataProvider().dataSourceUri()
            if verbose:
                print(uri)
            basename = os.path.basename(filepath)
            if not basename.startswith('test'):
                basename = 'file'
            uri = re.sub(r'^file\:\/\/[^\?]*', 'file://' + basename, uri)
            fields = []
            fieldTypes = []
            data = {}
            if layer.isValid():
                for nr, r in enumerate(requests):
                    if verbose:
                        print(("Processing request", nr + 1, repr(r)))
                    if isinstance(r, collections.Callable):
                        r(layer)
                        if verbose:
                            print("Request function executed")
                    if isinstance(r, collections.Callable):
                        continue
                    rfields, rtypes, rdata = self.layerData(
                        layer, r, nr * 1000)
                    if len(rfields) > len(fields):
                        fields = rfields
                        fieldTypes = rtypes
                    data.update(rdata)
                    if not rdata:
                        log.append("Request " + str(nr) +
                                   " did not return any data")
                    if verbose:
                        print(("Request returned", len(list(rdata.keys())),
                               "features"))
            for msg in logger.messages():
                filelogname = 'temp_file' if 'tmp' in filename.lower(
                ) else filename
                msg = re.sub(r'file\s+.*' + re.escape(filename),
                             'file ' + filelogname, msg)
                msg = msg.replace(filepath, filelogname)
                log.append(msg)
            return dict(fields=fields,
                        fieldTypes=fieldTypes,
                        data=data,
                        log=log,
                        uri=uri,
                        geometryType=layer.geometryType())