Esempio n. 1
0
        def builtin_entity_match(tokens, token_index):
            text = initial_string_from_tokens(tokens)
            start = tokens[token_index].start
            end = tokens[token_index].end

            builtin_entities = self.builtin_entity_parser.parse(
                text, scope=[builtin_entity], use_cache=True)
            # only keep builtin entities (of type `builtin_entity`) which
            # overlap with the current token
            builtin_entities = [
                ent for ent in builtin_entities
                if entity_filter(ent, start, end)
            ]
            if builtin_entities:
                # In most cases, 0 or 1 entity will be found. We fall back to
                # the first entity if 2 or more were found
                ent = builtin_entities[0]
                entity_start = ent[RES_MATCH_RANGE][START]
                entity_end = ent[RES_MATCH_RANGE][END]
                indexes = []
                for index, token in enumerate(tokens):
                    if (entity_start <= token.start < entity_end) \
                            and (entity_start < token.end <= entity_end):
                        indexes.append(index)
                return get_scheme_prefix(token_index, indexes,
                                         self.tagging_scheme)
            return None
Esempio n. 2
0
 def entity_match(tokens, token_index):
     transformed_tokens = self._transform(tokens)
     text = initial_string_from_tokens(transformed_tokens)
     token_start = transformed_tokens[token_index].start
     token_end = transformed_tokens[token_index].end
     custom_entities = self.custom_entity_parser.parse(text,
                                                       scope=[entity],
                                                       use_cache=True)
     # only keep builtin entities (of type `entity`) which overlap with
     # the current token
     custom_entities = [
         ent for ent in custom_entities
         if entity_filter(ent, token_start, token_end)
     ]
     if custom_entities:
         # In most cases, 0 or 1 entity will be found. We fall back to
         # the first entity if 2 or more were found
         ent = custom_entities[0]
         indexes = []
         for index, token in enumerate(transformed_tokens):
             if entity_filter(ent, token.start, token.end):
                 indexes.append(index)
         return get_scheme_prefix(token_index, indexes,
                                  self.tagging_scheme)
     return None
Esempio n. 3
0
 def collection_match(tokens, token_index):
     normalized_tokens = list(map(self._transform, tokens))
     ngrams = get_all_ngrams(normalized_tokens)
     ngrams = [ngram for ngram in ngrams if
               token_index in ngram[TOKEN_INDEXES]]
     ngrams = sorted(ngrams, key=lambda ng: len(ng[TOKEN_INDEXES]),
                     reverse=True)
     for ngram in ngrams:
         if ngram[NGRAM] in collection_set:
             return get_scheme_prefix(token_index,
                                      sorted(ngram[TOKEN_INDEXES]),
                                      self.tagging_scheme)
     return None
Esempio n. 4
0
 def collection_match(tokens, token_index):
     normalized_tokens = list(map(self._transform, tokens))
     ngrams = get_all_ngrams(normalized_tokens)
     ngrams = [ngram for ngram in ngrams if
               token_index in ngram[TOKEN_INDEXES]]
     ngrams = sorted(ngrams, key=lambda ng: len(ng[TOKEN_INDEXES]),
                     reverse=True)
     for ngram in ngrams:
         if ngram[NGRAM] in collection_set:
             return get_scheme_prefix(token_index,
                                      sorted(ngram[TOKEN_INDEXES]),
                                      self.tagging_scheme)
     return None
Esempio n. 5
0
 def entity_match(tokens, token_index):
     transformed_tokens = self._transform(tokens)
     text = initial_string_from_tokens(transformed_tokens)
     token_start = transformed_tokens[token_index].start
     token_end = transformed_tokens[token_index].end
     custom_entities = custom_entity_parser.parse(text,
                                                  scope=[entity],
                                                  use_cache=True)
     custom_entities = [
         ent for ent in custom_entities
         if entity_filter(ent, token_start, token_end)
     ]
     for ent in custom_entities:
         indexes = []
         for index, token in enumerate(transformed_tokens):
             if entity_filter(ent, token.start, token.end):
                 indexes.append(index)
         return get_scheme_prefix(token_index, indexes,
                                  self.tagging_scheme)
Esempio n. 6
0
        def builtin_entity_match(tokens, token_index):
            text = initial_string_from_tokens(tokens)
            start = tokens[token_index].start
            end = tokens[token_index].end

            builtin_entities = get_builtin_entities(
                text, self.language, scope=[builtin_entity])
            builtin_entities = [ent for ent in builtin_entities
                                if entity_filter(ent, start, end)]
            for ent in builtin_entities:
                entity_start = ent[RES_MATCH_RANGE][START]
                entity_end = ent[RES_MATCH_RANGE][END]
                indexes = []
                for index, token in enumerate(tokens):
                    if (entity_start <= token.start < entity_end) \
                            and (entity_start < token.end <= entity_end):
                        indexes.append(index)
                return get_scheme_prefix(token_index, indexes,
                                         self.tagging_scheme)
Esempio n. 7
0
        def builtin_entity_match(tokens, token_index):
            text = initial_string_from_tokens(tokens)
            start = tokens[token_index].start
            end = tokens[token_index].end

            builtin_entities = get_builtin_entities(
                text, self.language, scope=[builtin_entity])
            builtin_entities = [ent for ent in builtin_entities
                                if entity_filter(ent, start, end)]
            for ent in builtin_entities:
                entity_start = ent[RES_MATCH_RANGE][START]
                entity_end = ent[RES_MATCH_RANGE][END]
                indexes = []
                for index, token in enumerate(tokens):
                    if (entity_start <= token.start < entity_end) \
                            and (entity_start < token.end <= entity_end):
                        indexes.append(index)
                return get_scheme_prefix(token_index, indexes,
                                         self.tagging_scheme)