Python match.start Exemples, re.match.start Python Exemples

Exemple #1

0

Afficher le fichier

 def get_context(self, document: str, match_found: re.match,
                 context_config: dict):
     """
     This method gets the context around a found match in the document in accordance with the context configuration
     :param document: The document to be scanned
     :param match_found: The match that is to be used as the center of the context window
     :param context_config: The context configuration
     :return: A string containing the context around the found match (Can parameterize later to return str or list!)
     """
     match_str = document[match_found.start():match_found.end()].strip()
     preceding_text = document[:match_found.start()]
     succeeding_text = document[match_found.end():]
     if context_config['type'] == ContextType.WORD:
         preceding_text_words = self.trim_boundaries(
             re.split(r'\s+', preceding_text))
         succeeding_text_words = self.trim_boundaries(
             re.split(r'\s+', succeeding_text))
         return match_str, ' '.join(
             preceding_text_words[len(preceding_text_words) -
                                  context_config['size']:] +
             ['TARGETWORD'] +
             succeeding_text_words[:context_config['size']])
     if context_config['type'] == ContextType.PARAGRAPH:
         preceding_text_lines = self.trim_boundaries(
             re.split(self.split_lines_regex, preceding_text))
         succeeding_text_lines = self.trim_boundaries(
             re.split(self.split_lines_regex, succeeding_text))
         preceding_text_empty_line_indices = [
             index for index, item in enumerate(preceding_text_lines)
             if len(item.strip()) < 1
         ]
         succeeding_text_empty_line_indices = [
             index for index, item in enumerate(succeeding_text_lines)
             if len(item.strip()) < 1
         ]
         if not preceding_text_empty_line_indices:
             preceding_text_empty_line_indices = [-1]
         if not succeeding_text_empty_line_indices:
             succeeding_text_empty_line_indices = [
                 len(succeeding_text_lines)
             ]
         return ' '.join(
             preceding_text_lines[preceding_text_empty_line_indices[
                 len(preceding_text_empty_line_indices) -
                 context_config['size'] if context_config['size'] < len(
                     preceding_text_empty_line_indices) else -1] + 1:] +
             ['TARGETWORD'] +
             succeeding_text_lines[:succeeding_text_empty_line_indices[
                 context_config['size'] - 1 if context_config['size'] < len(
                     succeeding_text_empty_line_indices) else -1]])

Exemple #2

0

Afficher le fichier

Fichier : email-anonymizer.py Projet : bokysan/docker-postfix

    def is_message_id(self, match: re.match, msg: str) -> bool:
        start = match.start()
        email = match.group()

        # Note that our regex will match thigs like "message-id=Issue1649523226559@postfix-mail.mail-system.svc.cluster.local"
        # so we need to filter / check for these first

        if email.startswith(self.MESSAGE_ID_LINE):
            return True

        if start >= self.MESSAGE_ID_LINE_LEN:
            pos = start - 1
            while True:
                char = msg[pos]
                if char == '=':
                    break
                elif char in '{<["\'':
                    pos = pos - 1
                    continue

                return False

            check = msg[pos - self.MESSAGE_ID_LINE_LEN + 1:pos + 1]
            if check == self.MESSAGE_ID_LINE:
                return True

        return False

Exemple #3

0

Afficher le fichier

Fichier : book.py Projet : VaysseB/autorerename

    def format(self, path: Path, match: re.match):
        """
        Format the path with the result of the matching.
        Only replace what was captured.
        """
        assert match is not None

        # get what is before and after the capture
        prefix = match.string[:match.start()]
        suffix = match.string[match.end():]

        updated_name = file_formatter.format(
            self.renamer,
            None,
            *match.groups(),
            **match.groupdict())

        return self.untouched_root(path) / Path(prefix + updated_name + suffix)

Exemple #4

0

Afficher le fichier

    def _wrap_date_match(order: str, match: re.match, pattern: str=None) -> dict or None:
        """

        Args:
            order: enums['MDY', 'DMY', 'YMD'] - order of the date
            match: re.match - a regex match object
            pattern: str - if user defined the pattern, record it here

        Returns:

        """
        return {
            'value': match.group(),
            'groups': match.groups(),
            'start': match.start(),
            'end': match.end(),
            'order': order,
            'pattern': pattern
        } if match else None