コード例 #1
0
ファイル: conll_dataset.py プロジェクト: dmh43/entity-linking
def _get_mention_splits(doc, mention, seek, span):
  mention_start_seek_offset = _.index_of(doc[seek:], mention)
  mention_start_sentence_offset = seek - span[0] + mention_start_seek_offset
  to_idx = mention_start_sentence_offset + len(mention)
  sentence = doc[span[0]:span[1]]
  return ([parse_for_tokens(sentence[:mention_start_sentence_offset] + mention),
           parse_for_tokens(mention + sentence[to_idx:])],
          span[0] + to_idx)
コード例 #2
0
def get_mention_sentence_splits(page_content, sentence_spans, mention_info):
    mention_len = len(mention_info['mention'])
    sentence_span = _merge_sentences_across_mention(sentence_spans,
                                                    mention_info['offset'],
                                                    mention_len)
    sentence = page_content[sentence_span[0]:sentence_span[1]]
    mention_index = sentence.index(mention_info['mention'])
    return [
        parse_for_tokens(sentence[:mention_index + mention_len]),
        parse_for_tokens(sentence[mention_index:])
    ]
コード例 #3
0
def get_mention_sentence(page_content, sentence_spans, mention_info, lim=None):
    mention_len = len(mention_info['mention'])
    sentence_span = _merge_sentences_across_mention(sentence_spans,
                                                    mention_info['offset'],
                                                    mention_len)
    return parse_for_tokens(
        page_content[sentence_span[0]:sentence_span[1]])[:lim]
コード例 #4
0
def get_mention_sentence_splits(page_content,
                                sentence_spans,
                                mention_info,
                                lim=None):
    mention_len = len(mention_info['mention'])
    sentence_span = _merge_sentences_across_mention(sentence_spans,
                                                    mention_info['offset'],
                                                    mention_len)
    sentence = page_content[sentence_span[0]:sentence_span[1]]
    mention_index = sentence.index(mention_info['mention'])
    if lim is not None:
        return [
            parse_for_tokens(sentence[:mention_index + mention_len])[-lim //
                                                                     2:],
            parse_for_tokens(sentence[mention_index:])[:lim // 2]
        ]
    else:
        return [
            parse_for_tokens(sentence[:mention_index + mention_len]),
            parse_for_tokens(sentence[mention_index:])
        ]
コード例 #5
0
ファイル: runner.py プロジェクト: dmh43/entity-linking
 def _get_entity_tokens(self, num_entities):
     mapper = lambda token: self.lookups.token_idx_lookup[
         token
     ] if token in self.lookups.token_idx_lookup else self.lookups.token_idx_lookup[
         '<UNK>']
     entity_indexed_tokens = {
         self.lookups.entity_labels[entity_id]:
         _.map_(parse_for_tokens(text), mapper)
         for entity_id, text in get_entity_text().items()
         if entity_id in self.lookups.entity_labels
     }
     entity_indexed_tokens_list = [
         entity_indexed_tokens[i] if i in entity_indexed_tokens else [1]
         for i in range(num_entities)
     ]
     return torch.tensor(pad_batch_list(0, entity_indexed_tokens_list),
                         device=self.device)
コード例 #6
0
def _get_mention_sentence(doc, mention, seek, span):
    mention_start_seek_offset = _.index_of(doc[seek:], mention)
    mention_start_sentence_offset = seek - span[0] + mention_start_seek_offset
    to_idx = mention_start_sentence_offset + len(mention)
    sentence = doc[span[0]:span[1]]
    return (parse_for_tokens(sentence), span[0] + to_idx)