def data_inference(self, unlabeled_document_name, output_name, w): sentence_words_pos = dict() sentence_num = 0 with open(unlabeled_document_name, 'r') as f1: with open(output_name, 'w+') as f2: for line in f1: if line == '\n': data_for_full_graph = set() words_tags = dict() for counter in sentence_words_pos: word_tuple = sentence_words_pos[counter] data_for_full_graph.add( (word_tuple[0], word_tuple[1], counter)) words_tags[counter] = sentence_words_pos[counter][ 1] data_for_full_graph.add(('root', 'root', 0)) sentence_words_pos[0] = ('root', 'root', 0) words_tags[0] = 'root' full_unweighted_g = AuxFunctions.make_full_graph( list(data_for_full_graph)) g_features = AuxFunctions.get_features_for_graph( self.features, full_unweighted_g, words_tags, self.is_improved) full_weighted_g = AuxFunctions.get_weighted_graph( full_unweighted_g, g_features, w) g_inference = edmonds.mst(('root', 'root', 0), full_weighted_g) inference_arches = self.get_arches_in_order( g_inference) self.write_lines(inference_arches, f2) sentence_num += 1 print('Done sentence number ' + str(sentence_num)) sentence_words_pos = dict() else: split_line = line.split('\t') # (counter)->(token,pos) sentence_words_pos[int( split_line[0])] = (split_line[1], split_line[3]) f2.close() f1.close()
def perceptron(self, n): w = np.zeros(self.feature_num, dtype=int) for i in range(0, n): iteration_time = datetime.now() scored_graph_index = list(range(0, len(self.scored_graphs), 1)) shuffled_scored_graph_index = sorted(scored_graph_index, key=lambda k: random.random()) for index in shuffled_scored_graph_index: data = self.scored_graphs[index] weighted_full_graph = AuxFunctions.get_weighted_graph( data[2], data[3], w) g_tag = edmonds.mst(('root', 'root', 0), weighted_full_graph) if True: # For better performance w = w + self.get_saved_f_vector( data[0], data[4]) - self.get_f_vector(g_tag, data[4]) print('Done ' + str(i + 1) + ' iteration at ' + str(datetime.now() - iteration_time)) if self.is_improved: ImprovedFunctions.save_w(w, i + 1) else: BasicFunctions.save_w(w, i + 1)