def calculate_features_for_all_graphs(self): graph_number = 0 # We use this to later save f_vector result in dict for data in self.graphs: graph_number += 1 self.scored_graphs.append( (data[0], AuxFunctions.get_features_for_graph( self.features, data[0], self.sentence_tags[graph_number - 1], self.is_improved), data[1], AuxFunctions.get_features_for_graph( self.features, data[1], self.sentence_tags[graph_number - 1], self.is_improved), graph_number))
def get_f_vector(self, g, g_num): f_vector = np.zeros(self.feature_num, dtype=int) features = AuxFunctions.get_features_for_graph( self.features, g, self.sentence_tags[g_num - 1], self.is_improved) for feature_data in features: for feature_i in feature_data[2]: f_vector[feature_i] += 1 return f_vector
def data_inference(self, unlabeled_document_name, output_name, w): sentence_words_pos = dict() sentence_num = 0 with open(unlabeled_document_name, 'r') as f1: with open(output_name, 'w+') as f2: for line in f1: if line == '\n': data_for_full_graph = set() words_tags = dict() for counter in sentence_words_pos: word_tuple = sentence_words_pos[counter] data_for_full_graph.add( (word_tuple[0], word_tuple[1], counter)) words_tags[counter] = sentence_words_pos[counter][ 1] data_for_full_graph.add(('root', 'root', 0)) sentence_words_pos[0] = ('root', 'root', 0) words_tags[0] = 'root' full_unweighted_g = AuxFunctions.make_full_graph( list(data_for_full_graph)) g_features = AuxFunctions.get_features_for_graph( self.features, full_unweighted_g, words_tags, self.is_improved) full_weighted_g = AuxFunctions.get_weighted_graph( full_unweighted_g, g_features, w) g_inference = edmonds.mst(('root', 'root', 0), full_weighted_g) inference_arches = self.get_arches_in_order( g_inference) self.write_lines(inference_arches, f2) sentence_num += 1 print('Done sentence number ' + str(sentence_num)) sentence_words_pos = dict() else: split_line = line.split('\t') # (counter)->(token,pos) sentence_words_pos[int( split_line[0])] = (split_line[1], split_line[3]) f2.close() f1.close()