def perceptron(self): flag = True if not self.iter_set: self.iter += 1 for idx, sentence in enumerate(self.all_data, start=1): full_graph = build_full_graph(len(sentence)) get_score_func = GetScore(idx, self.w, sentence, self.feature_extractor) digraph = Digraph(full_graph, get_score_func) graph = build_real_graph(sentence) mst = digraph.mst().successors add_graph = {} rm_graph = {} for k in graph.keys(): add_graph[k] = [v for v in graph[k] if v not in mst[k]] rm_graph[k] = [v for v in mst[k] if v not in graph[k]] if any(add_graph.values()): flag = False temp_w = plus( self.w, self.graph_feature_extractor(idx, add_graph, sentence)) self.w = minus( temp_w, self.graph_feature_extractor(idx, rm_graph, sentence)) progress_bar(idx, len(self.all_data), "sentences") return flag
def test_create_full_graph(self): a = {0: [1, 2, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]} graph = Digraph(a) new_graph = graph.mst() suc = new_graph.successors self.make_vaild(suc) suc = {0: suc} m = self.Model(suc) perc = StructPerceptron(m, directory=directory) self.assertRaises(Exception, GraphUtil.create_full_graph(suc))
def test_perceptron(self): n = 10 a = {0: [1, 2, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]} graph = Digraph(a) new_graph = graph.mst() suc = new_graph.successors suc = self.make_vaild(suc) suc = {0: suc} m = self.Model(suc) perc = StructPerceptron(m, directory=directory) self.assertRaises(Exception, perc.perceptron(n)) print(perc.perceptron(n).A)
def test_identical_dependency_tree(self): a = {0: [1, 2, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]} graph = Digraph(a) new_graph = graph.mst() suc = new_graph.successors self.make_vaild(suc) suc = {0: suc} m = self.Model(suc) perc = StructPerceptron(m, directory=directory) GraphUtil.create_full_graph(suc) full_graph = perc.full_graph[0] self.assertEqual(True, GraphUtil.identical_dependency_tree(full_graph, a))
def infer(self, sentence): full_graph = build_full_graph(len(sentence)) get_score_func = GetScore(-1, self.w, sentence, self.feature_extractor) digraph = Digraph(full_graph, get_score_func) mst = digraph.mst().successors mst = inverse_graph(mst) Word = namedtuple('Word', 'counter token pos head') return [ Word(counter=word.counter, token=word.token, pos=word.pos, head=mst[word.counter]) for word in sentence ]
# for cui lui def get_score(h, m): return curr_G_wieghts.get((h, m), 0) t_init = time.time() ######### run perceptron ######### for n in range(NUM_OF_PERCEPTRON_STEPS): print ('Starting Perceptron Step - ' + str(n + 1)) for i in range(len(sent_word_list)): curr_G = sent_graph_list[i] # creates wieghts to each eadge curr_G_wieghts = turn_edge_feats_to_wights( edge_dict=sent_graph_edges_feats[i], wights_vec=basic_feature_weights_vec) graph = Digraph(curr_G, get_score) if True == TRAIN_WITH_MST: mst = graph.mst() else: mst = graph.greedy() pred_heads = convert_chi_lui_output_to_list_of_heads( sent_len=len(sent_word_list[i]), mst=mst) if len(pred_heads) != len(sent_head_list[i]): print ('Perceptron prediction problem at sent ' + str(i) + ' pred len ' + str(pred_heads) + ' real heads len' + str(sent_head_list[i])) if tuple(pred_heads) != tuple(sent_head_list[i]): # perceptron correction step pred_feat_idxs = get_all_feature_idxes_for_sent_and_head( featurs_obj=featurs_basic_obj, sent_words=sent_word_list[i], sent_pos=sent_pos_list[i],
def __predict__(self): MST = Digraph(self.graph.get_full_successors(), self.get_score).mst() return MST.successors, self.__generate_edges_set__(MST.successors)