def data_inference(self, unlabeled_document_name, output_name, w): sentence_words_pos = dict() sentence_num = 0 with open(unlabeled_document_name, 'r') as f1: with open(output_name, 'w+') as f2: for line in f1: if line == '\n': data_for_full_graph = set() words_tags = dict() for counter in sentence_words_pos: word_tuple = sentence_words_pos[counter] data_for_full_graph.add( (word_tuple[0], word_tuple[1], counter)) words_tags[counter] = sentence_words_pos[counter][ 1] data_for_full_graph.add(('root', 'root', 0)) sentence_words_pos[0] = ('root', 'root', 0) words_tags[0] = 'root' full_unweighted_g = AuxFunctions.make_full_graph( list(data_for_full_graph)) g_features = AuxFunctions.get_features_for_graph( self.features, full_unweighted_g, words_tags, self.is_improved) full_weighted_g = AuxFunctions.get_weighted_graph( full_unweighted_g, g_features, w) g_inference = edmonds.mst(('root', 'root', 0), full_weighted_g) inference_arches = self.get_arches_in_order( g_inference) self.write_lines(inference_arches, f2) sentence_num += 1 print('Done sentence number ' + str(sentence_num)) sentence_words_pos = dict() else: split_line = line.split('\t') # (counter)->(token,pos) sentence_words_pos[int( split_line[0])] = (split_line[1], split_line[3]) f2.close() f1.close()
def make_graphs(self): counter = 0 for arches_data in self.arches_data_list: # Get (arches tuples, full data) g = dict() arches_tuples = arches_data[0] for arch_tuple in arches_tuples: if (arch_tuple[0][0], arch_tuple[0][1], arch_tuple[1]) in g: g[(arch_tuple[0][0], arch_tuple[0][1], arch_tuple[1])][(arch_tuple[0][2], arch_tuple[0][3], arch_tuple[2])] = 0 else: g[(arch_tuple[0][0], arch_tuple[0][1], arch_tuple[1])] = { (arch_tuple[0][2], arch_tuple[0][3], arch_tuple[2]): 0 } self.graphs.append( (g, AuxFunctions.make_full_graph(arches_data[1])))