def __init__(self, word_processor=word_processor.BaseWordProcessor(), lang='en', corenlp: my_corenlp.StanfordCoreNLP = None): super().__init__(word_processor) self.stanford_nlp = corenlp if not self.stanford_nlp: self.stanford_nlp = corenlp_factory.create_corenlp_server( lang=lang)
first_word = coref_relation[i] second_word = coref_relation[j] for k in range(first_word[1], first_word[2]): first_index = sentence_start_indices[first_word[0] - 1] + k - 1 for l in range(second_word[1], second_word[2]): second_index = sentence_start_indices[second_word[0] - 1] + l - 1 print( 'first_word: %s second_word: %s' % (all_words[first_index], all_words[second_index])) word_relation_graph[first_index][second_index] = 1 word_relation_graph[second_index][first_index] = 1 return word_relation_graph if __name__ == '__main__': sentence = 'When the food came, it was almost good.' core_nlp = corenlp_factory.create_corenlp_server() coref_graph = get_coref_edges(sentence, core_nlp) spacy_nlp = spacy.load("en_core_web_sm") doc = spacy_nlp(sentence) for token in doc: children = list(token.children) print(token) # neuralcoref.add_to_pipe(spacy_nlp) graph = create_dependency_graph_for_dgl(sentence, spacy_nlp) plot_dgl_graph(graph) print('')
def __init__(self, word_processor=word_processor.BaseWordProcessor(), lang='en'): super().__init__(word_processor) self.stanford_nlp = corenlp_factory.create_corenlp_server(lang=lang)
words, postags, arcs, child_dict_list, format_parse_list = self.sentence_parse_result[ dependency_key] return words, postags, arcs, child_dict_list, format_parse_list else: words, postags, arcs, child_dict_list, format_parse_list = self._inner_parser_main( sentence) self.sentence_parse_result[dependency_key] = [ words, postags, arcs, child_dict_list, format_parse_list ] self.cache_keys.append(dependency_key) if len(self.cache_keys) > self.max_cache_sentence_num: cache_key = self.cache_keys.pop(0) self.sentence_parse_result.pop(cache_key) return words, postags, arcs, child_dict_list, format_parse_list else: words, postags, arcs, child_dict_list, format_parse_list = self._inner_parser_main( sentence) return words, postags, arcs, child_dict_list, format_parse_list if __name__ == '__main__': parser = CorenlpParser(corenlp_factory.create_corenlp_server()) # sentence = sys.argv[1] sentence = '以及公司内部的党派之争最终导致了尚阳科技在2006年梦碎当场。' words, postags, arcs, child_dict_list, format_parse_list = parser.parser_main( sentence) print(postags, len(postags)) print(arcs, len(arcs)) print(child_dict_list, len(child_dict_list)) print(format_parse_list, len(format_parse_list))
if not constituency_parser_result.startswith('('): leaf_node = TreeNode(constituency_parser_result) return leaf_node else: constituency_parser_result = constituency_parser_result[1: -1] first_whitespace_index = constituency_parser_result.index(' ') value = constituency_parser_result[: first_whitespace_index] parent_node = TreeNode(value) constituency_parser_result = constituency_parser_result[first_whitespace_index + 1:] sub_constituency_parser_results = sub_constituency_parser_result_generator(constituency_parser_result) children = [] for sub_constituency_parser_result in sub_constituency_parser_results: parse_result = parse_corenlp_parse_result(sub_constituency_parser_result) children.append(parse_result) for child in children: child.parent = parent_node parent_node.children = children return parent_node if __name__ == '__main__': with corenlp_factory.create_corenlp_server(lang='zh', start_new_server=True) as nlp: sentence = '【多国隐形战机“暗战”巴黎航展】图片说明:航空工业展台的隐形战机模型引发关注。' sentence = re.sub('[\(\)]', '', sentence) result = nlp.parse(sentence) print(result) constituency_tree = parse_corenlp_parse_result(result) print(TreeNode.find_corresponding_node(constituency_tree, '模型', 20)) print('end')