def get_tree_data_new(raw_data, character_to_index, word_to_index, pos_to_index, lexicon_list): log("get_tree_data()...") """ Get tree structured data from CoNLL 2012 Stores into Node data structure """ tree_pyramid_list = [] ner_list = [] word_count = 0 pos_count = defaultdict(lambda: 0) ne_count = defaultdict(lambda: 0) pos_ne_count = defaultdict(lambda: 0) lexicon_hits = [0] for document in raw_data["gold"]: for part in raw_data["gold"][document]: ner_raw_data = defaultdict(lambda: {}) for k, v in raw_data["gold"][document][part]["ner"].items(): ner_raw_data[k[0]][(k[1], k[2])] = v for index, parse in enumerate( raw_data["gold"][document][part]["parses"]): text_raw_data = raw_data["gold"][document][part]["text"][index] word_count += len(text_raw_data) if parse.subtrees[0].label == "NOPARSE": continue head_raw_data = raw_data["gold"][document][part]["heads"][ index] root_node = Node() span_to_node = {} nodes = construct_node(root_node, parse, ner_raw_data[index], head_raw_data, text_raw_data, character_to_index, word_to_index, pos_to_index, lexicon_list, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node, False) root_node.nodes = nodes root_node.text_raw_data = text_raw_data # YOLO additional_node_list = [] """ additional_node_list = create_dense_nodes( ner_raw_data[index], text_raw_data, pos_to_index, lexicon_list, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node) """ tree_pyramid_list.append((root_node, additional_node_list)) ner_list.append(ner_raw_data[index]) log(" %d sentences\n" % len(tree_pyramid_list)) return (tree_pyramid_list, ner_list, word_count, pos_count, ne_count, pos_ne_count, lexicon_hits[0])
def get_tree_data(raw_data, character_to_index, word_to_index, pos_to_index, lexicon_list): log("get_tree_data()...") """ Get tree structured data from CoNLL 2012 Stores into Node data structure """ tree_pyramid_list = [] ner_list = [] word_count = 0 pos_count = defaultdict(lambda: 0) ne_count = defaultdict(lambda: 0) pos_ne_count = defaultdict(lambda: 0) lexicon_hits = [0] for document in raw_data["auto"]: for part in raw_data["auto"][document]: ner_raw_data = defaultdict(lambda: {}) for k, v in raw_data["gold"][document][part]["ner"].iteritems(): ner_raw_data[k[0]][(k[1], k[2])] = v for index, parse in enumerate(raw_data["auto"][document][part]["parses"]): text_raw_data = raw_data["auto"][document][part]["text"][index] word_count += len(text_raw_data) if parse.subtrees[0].label == "NOPARSE": continue head_raw_data = raw_data["auto"][document][part]["heads"][index] root_node = Node() span_to_node = {} nodes = construct_node( root_node, parse, ner_raw_data[index], head_raw_data, text_raw_data, character_to_index, word_to_index, pos_to_index, lexicon_list, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node, False) root_node.nodes = nodes root_node.text_raw_data = text_raw_data #YOLO additional_node_list = [] """ additional_node_list = create_dense_nodes( ner_raw_data[index], text_raw_data, pos_to_index, lexicon_list, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node) """ tree_pyramid_list.append((root_node, additional_node_list)) ner_list.append(ner_raw_data[index]) log(" %d sentences\n" % len(tree_pyramid_list)) return (tree_pyramid_list, ner_list, word_count, pos_count, ne_count, pos_ne_count, lexicon_hits[0])
def get_tree_data(raw_data, character_to_index, word_to_index, pos_to_index): log("get_tree_data()...") """ Get tree structured data from CoNLL 2012 Stores into Node data structure """ tree_list = [] ner_list = [] word_count = 0 pos_count = defaultdict(lambda: 0) ne_count = defaultdict(lambda: 0) pos_ne_count = defaultdict(lambda: 0) for document in raw_data["auto"]: for part in raw_data["auto"][document]: ner_raw_data = defaultdict(lambda: {}) for k, v in raw_data["gold"][document][part]["ner"].iteritems(): ner_raw_data[k[0]][(k[1], k[2])] = v for index, parse in enumerate( raw_data["auto"][document][part]["parses"]): text_raw_data = raw_data["auto"][document][part]["text"][index] word_count += len(text_raw_data) if parse.subtrees[0].label == "NOPARSE": continue head_raw_data = raw_data["auto"][document][part]["heads"][ index] root_node = Node() nodes = construct_node(root_node, parse, ner_raw_data[index], head_raw_data, text_raw_data, character_to_index, word_to_index, pos_to_index, pos_count, ne_count, pos_ne_count) root_node.nodes = nodes tree_list.append(root_node) ner_list.append(ner_raw_data[index]) log(" %d sentences\n" % len(tree_list)) return tree_list, ner_list, word_count, pos_count, ne_count, pos_ne_count
def get_tree_data(sentence_list, parse_list, ner_list, character_to_index, word_to_index, pos_to_index, index_to_lexicon): log("get_tree_data()...") """ Get tree structured data from CoNLL-2003 Stores into Node data structure """ tree_pyramid_list = [] word_count = 0 pos_count = defaultdict(lambda: 0) ne_count = defaultdict(lambda: 0) pos_ne_count = defaultdict(lambda: 0) lexicon_hits = [0] for index, parse in enumerate(parse_list): text_raw_data = sentence_list[index] word_count += len(text_raw_data) span_to_node = {} head_raw_data = head_finder.collins_find_heads(parse) root_node = Node() nodes = construct_node(root_node, parse, ner_list[index], head_raw_data, text_raw_data, character_to_index, word_to_index, pos_to_index, index_to_lexicon, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node) root_node.nodes = nodes root_node.tokens = len(text_raw_data) additional_node_list = create_dense_nodes(ner_list[index], text_raw_data, pos_to_index, index_to_lexicon, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node) tree_pyramid_list.append((root_node, additional_node_list)) log(" %d sentences\n" % len(tree_pyramid_list)) return tree_pyramid_list, word_count, pos_count, ne_count, pos_ne_count, lexicon_hits[ 0]
def get_tree_data(raw_data, character_to_index, word_to_index, pos_to_index): log("get_tree_data()...") """ Get tree structured data from CoNLL 2012 Stores into Node data structure """ tree_list = [] ner_list = [] word_count = 0 pos_count = defaultdict(lambda: 0) ne_count = defaultdict(lambda: 0) pos_ne_count = defaultdict(lambda: 0) for document in raw_data["auto"]: for part in raw_data["auto"][document]: ner_raw_data = defaultdict(lambda: {}) for k, v in raw_data["gold"][document][part]["ner"].iteritems(): ner_raw_data[k[0]][(k[1], k[2])] = v for index, parse in enumerate(raw_data["auto"][document][part]["parses"]): text_raw_data = raw_data["auto"][document][part]["text"][index] word_count += len(text_raw_data) if parse.subtrees[0].label == "NOPARSE": continue head_raw_data = raw_data["auto"][document][part]["heads"][index] root_node = Node() nodes = construct_node( root_node, parse, ner_raw_data[index], head_raw_data, text_raw_data, character_to_index, word_to_index, pos_to_index, pos_count, ne_count, pos_ne_count) root_node.nodes = nodes tree_list.append(root_node) ner_list.append(ner_raw_data[index]) log(" %d sentences\n" % len(tree_list)) return tree_list, ner_list, word_count, pos_count, ne_count, pos_ne_count
def get_tree_data(sentence_list, parse_list, ner_list, character_to_index, word_to_index, pos_to_index, index_to_lexicon): log("get_tree_data()...") """ Get tree structured data from CoNLL-2003 Stores into Node data structure """ tree_pyramid_list = [] word_count = 0 pos_count = defaultdict(lambda: 0) ne_count = defaultdict(lambda: 0) pos_ne_count = defaultdict(lambda: 0) lexicon_hits = [0] for index, parse in enumerate(parse_list): text_raw_data = sentence_list[index] word_count += len(text_raw_data) span_to_node = {} head_raw_data = head_finder.collins_find_heads(parse) root_node = Node() nodes = construct_node( root_node, parse, ner_list[index], head_raw_data, text_raw_data, character_to_index, word_to_index, pos_to_index, index_to_lexicon, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node) root_node.nodes = nodes root_node.tokens = len(text_raw_data) additional_node_list = create_dense_nodes( ner_list[index], text_raw_data, pos_to_index, index_to_lexicon, pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node) tree_pyramid_list.append((root_node, additional_node_list)) log(" %d sentences\n" % len(tree_pyramid_list)) return tree_pyramid_list, word_count, pos_count, ne_count, pos_ne_count, lexicon_hits[0]