def parse_semantic_dependencies(self, tokenized_sentence, tags, lemmas, heads, deprels, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = tags sent['lemmas'] = lemmas sent['heads'] = heads # Convert from 1-based indexing for back-compatibility. #sent['heads'] = [h-1 for h in heads] sent['dependency_relations'] = deprels sent.compute_semantic_dependencies(worker) num_predicates = len(sent['predicate_names']) predicates = ['_' for token in tokenized_sentence] argument_lists = [['_' for k in xrange(num_predicates)] \ for token in tokenized_sentence] for k in xrange(num_predicates): name = sent['predicate_names'][k] p = sent['predicate_indices'][k] predicates[p] = name for l in xrange(len(sent['argument_roles'][k])): role = sent['argument_roles'][k][l] a = sent['argument_indices'][k][l] argument_lists[a][k] = role return predicates, argument_lists
def parse_semantic_dependencies(self, tokenized_sentence, tags, lemmas, heads, deprels, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = tags sent['lemmas'] = lemmas sent['heads'] = heads # Convert from 1-based indexing for back-compatibility. #sent['heads'] = [h-1 for h in heads] sent['dependency_relations'] = deprels sent.compute_semantic_dependencies(worker) num_predicates = len(sent['predicate_names']) predicates = ['_' for token in tokenized_sentence] argument_lists = [['_' for k in xrange(num_predicates)] \ for token in tokenized_sentence] for k in xrange(num_predicates): name = sent['predicate_names'][k] p = sent['predicate_indices'][k] predicates[p] = name for l in xrange(len(sent['argument_roles'][k])): role = sent['argument_roles'][k][l] a = sent['argument_indices'][k][l] argument_lists[a][k] = role return predicates, argument_lists
def recognize_entities(self, tokenized_sentence, tags, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = tags sent.compute_entities(worker) entity_tags = sent['entity_tags'] return entity_tags
def recognize_entities(self, tokenized_sentence, tags, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = tags sent.compute_entities(worker) entity_tags = sent['entity_tags'] return entity_tags
def parse(self, tokenized_sentence, tags, lemmas, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = tags sent['lemmas'] = lemmas sent.compute_syntactic_dependencies(worker) # Convert to 1-based indexing for back-compatibility. #heads = [h+1 for h in sent['heads']] heads = sent['heads'] deprels = sent['dependency_relations'] return heads, deprels
def parse(self, tokenized_sentence, tags, lemmas, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = tags sent['lemmas'] = lemmas sent.compute_syntactic_dependencies(worker) # Convert to 1-based indexing for back-compatibility. #heads = [h+1 for h in sent['heads']] heads = sent['heads'] deprels = sent['dependency_relations'] return heads, deprels
def tag(self, tokenized_sentence, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent.compute_morphology(worker) tags = sent['tags'] if sent['lemmas'] != None: lemmas = sent['lemmas'] else: lemmas = ['_' for token in tokenized_sentence] if sent['morphological_tags'] != None: feats = ['|'.join(morph) if len(morph) > 0 else '_' \ for morph in sent['morphological_tags']] else: feats = ['_' for token in tokenized_sentence] return tags, lemmas, feats
def resolve_coreferences(self, all_tokenized_sentences, all_tags, all_lemmas, all_heads, all_deprels, all_entity_tags, language): worker = self.get_worker(language) sents = [] for j, tokenized_sentence in enumerate(all_tokenized_sentences): sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = all_tags[j] sent['lemmas'] = all_lemmas[j] sent['heads'] = all_heads[j] # Convert from 1-based indexing for back-compatibility. #sent['heads'] = [h-1 for h in all_heads[j]] sent['dependency_relations'] = all_deprels[j] # For now, don't use this (must be coded as spans). sent['entity_tags'] = all_entity_tags[j] sents.append(sent) doc = NLPDocument(sents) doc.compute_coreferences(worker) # Convert from spans to coref info. all_coref_info = [] for sent in doc['sentences']: spans = [] for (start, end, name) in sent['coreference_spans']: span = Span(start, end, name) spans.append(span) coref_info = nlp_utils.construct_coreference_info_from_spans( \ spans, len(sent['words'])) all_coref_info.append(coref_info) return all_coref_info
def tag(self, tokenized_sentence, language): worker = self.get_worker(language) sent = NLPSentence() sent['words'] = tokenized_sentence sent.compute_morphology(worker) tags = sent['tags'] if sent['lemmas'] != None: lemmas = sent['lemmas'] else: lemmas = ['_' for token in tokenized_sentence] if sent['morphological_tags'] != None: feats = ['|'.join(morph) if len(morph) > 0 else '_' \ for morph in sent['morphological_tags']] else: feats = ['_' for token in tokenized_sentence] return tags, lemmas, feats