예제 #1
0
 def parse_semantic_dependencies(self, tokenized_sentence, tags, lemmas,
                                 heads, deprels, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent['tags'] = tags
     sent['lemmas'] = lemmas
     sent['heads'] = heads
     # Convert from 1-based indexing for back-compatibility.
     #sent['heads'] = [h-1 for h in heads]
     sent['dependency_relations'] = deprels
     sent.compute_semantic_dependencies(worker)
     num_predicates = len(sent['predicate_names'])
     predicates = ['_' for token in tokenized_sentence]
     argument_lists = [['_' for k in xrange(num_predicates)] \
                       for token in tokenized_sentence]
     for k in xrange(num_predicates):
         name = sent['predicate_names'][k]
         p = sent['predicate_indices'][k]
         predicates[p] = name
         for l in xrange(len(sent['argument_roles'][k])):
             role = sent['argument_roles'][k][l]
             a = sent['argument_indices'][k][l]
             argument_lists[a][k] = role
     return predicates, argument_lists
예제 #2
0
 def parse_semantic_dependencies(self, tokenized_sentence, tags, lemmas,
                                 heads, deprels, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent['tags'] = tags
     sent['lemmas'] = lemmas
     sent['heads'] = heads
     # Convert from 1-based indexing for back-compatibility.
     #sent['heads'] = [h-1 for h in heads]
     sent['dependency_relations'] = deprels
     sent.compute_semantic_dependencies(worker)
     num_predicates = len(sent['predicate_names'])
     predicates = ['_' for token in tokenized_sentence]
     argument_lists = [['_' for k in xrange(num_predicates)] \
                       for token in tokenized_sentence]
     for k in xrange(num_predicates):
         name = sent['predicate_names'][k]
         p = sent['predicate_indices'][k]
         predicates[p] = name
         for l in xrange(len(sent['argument_roles'][k])):
             role = sent['argument_roles'][k][l]
             a = sent['argument_indices'][k][l]
             argument_lists[a][k] = role
     return predicates, argument_lists
예제 #3
0
 def recognize_entities(self, tokenized_sentence, tags, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent['tags'] = tags
     sent.compute_entities(worker)
     entity_tags = sent['entity_tags']
     return entity_tags
예제 #4
0
 def recognize_entities(self, tokenized_sentence, tags, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent['tags'] = tags
     sent.compute_entities(worker)
     entity_tags = sent['entity_tags']
     return entity_tags
예제 #5
0
 def parse(self, tokenized_sentence, tags, lemmas, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent['tags'] = tags
     sent['lemmas'] = lemmas
     sent.compute_syntactic_dependencies(worker)
     # Convert to 1-based indexing for back-compatibility.
     #heads = [h+1 for h in sent['heads']]
     heads = sent['heads']
     deprels = sent['dependency_relations']
     return heads, deprels
예제 #6
0
 def parse(self, tokenized_sentence, tags, lemmas, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent['tags'] = tags
     sent['lemmas'] = lemmas
     sent.compute_syntactic_dependencies(worker)
     # Convert to 1-based indexing for back-compatibility.
     #heads = [h+1 for h in sent['heads']]
     heads = sent['heads']
     deprels = sent['dependency_relations']
     return heads, deprels
예제 #7
0
 def tag(self, tokenized_sentence, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent.compute_morphology(worker)
     tags = sent['tags']
     if sent['lemmas'] != None:
         lemmas = sent['lemmas']
     else:
         lemmas = ['_' for token in tokenized_sentence]
     if sent['morphological_tags'] != None:
         feats = ['|'.join(morph) if len(morph) > 0 else '_' \
                  for morph in sent['morphological_tags']]
     else:
         feats = ['_' for token in tokenized_sentence]
     return tags, lemmas, feats
예제 #8
0
    def resolve_coreferences(self, all_tokenized_sentences, all_tags,
                             all_lemmas, all_heads, all_deprels,
                             all_entity_tags, language):
        worker = self.get_worker(language)
        sents = []
        for j, tokenized_sentence in enumerate(all_tokenized_sentences):
            sent = NLPSentence()
            sent['words'] = tokenized_sentence
            sent['tags'] = all_tags[j]
            sent['lemmas'] = all_lemmas[j]
            sent['heads'] = all_heads[j]
            # Convert from 1-based indexing for back-compatibility.
            #sent['heads'] = [h-1 for h in all_heads[j]]
            sent['dependency_relations'] = all_deprels[j]
            # For now, don't use this (must be coded as spans).
            sent['entity_tags'] = all_entity_tags[j]
            sents.append(sent)
        doc = NLPDocument(sents)
        doc.compute_coreferences(worker)

        # Convert from spans to coref info.
        all_coref_info = []
        for sent in doc['sentences']:
            spans = []
            for (start, end, name) in sent['coreference_spans']:
                span = Span(start, end, name)
                spans.append(span)
            coref_info = nlp_utils.construct_coreference_info_from_spans( \
                spans, len(sent['words']))
            all_coref_info.append(coref_info)

        return all_coref_info
예제 #9
0
 def tag(self, tokenized_sentence, language):
     worker = self.get_worker(language)
     sent = NLPSentence()
     sent['words'] = tokenized_sentence
     sent.compute_morphology(worker)
     tags = sent['tags']
     if sent['lemmas'] != None:
         lemmas = sent['lemmas']
     else:
         lemmas = ['_' for token in tokenized_sentence]
     if sent['morphological_tags'] != None:
         feats = ['|'.join(morph) if len(morph) > 0 else '_' \
                  for morph in sent['morphological_tags']]
     else:
         feats = ['_' for token in tokenized_sentence]
     return tags, lemmas, feats