Esempio n. 1
0
    def data_inference(self, unlabeled_document_name, output_name, w):
        sentence_words_pos = dict()
        sentence_num = 0
        with open(unlabeled_document_name, 'r') as f1:
            with open(output_name, 'w+') as f2:
                for line in f1:

                    if line == '\n':
                        data_for_full_graph = set()
                        words_tags = dict()
                        for counter in sentence_words_pos:
                            word_tuple = sentence_words_pos[counter]
                            data_for_full_graph.add(
                                (word_tuple[0], word_tuple[1], counter))
                            words_tags[counter] = sentence_words_pos[counter][
                                1]

                        data_for_full_graph.add(('root', 'root', 0))
                        sentence_words_pos[0] = ('root', 'root', 0)
                        words_tags[0] = 'root'

                        full_unweighted_g = AuxFunctions.make_full_graph(
                            list(data_for_full_graph))
                        g_features = AuxFunctions.get_features_for_graph(
                            self.features, full_unweighted_g, words_tags,
                            self.is_improved)
                        full_weighted_g = AuxFunctions.get_weighted_graph(
                            full_unweighted_g, g_features, w)

                        g_inference = edmonds.mst(('root', 'root', 0),
                                                  full_weighted_g)

                        inference_arches = self.get_arches_in_order(
                            g_inference)
                        self.write_lines(inference_arches, f2)

                        sentence_num += 1
                        print('Done sentence number ' + str(sentence_num))
                        sentence_words_pos = dict()
                    else:
                        split_line = line.split('\t')
                        # (counter)->(token,pos)
                        sentence_words_pos[int(
                            split_line[0])] = (split_line[1], split_line[3])

            f2.close()
        f1.close()
Esempio n. 2
0
 def perceptron(self, n):
     w = np.zeros(self.feature_num, dtype=int)
     for i in range(0, n):
         iteration_time = datetime.now()
         scored_graph_index = list(range(0, len(self.scored_graphs), 1))
         shuffled_scored_graph_index = sorted(scored_graph_index,
                                              key=lambda k: random.random())
         for index in shuffled_scored_graph_index:
             data = self.scored_graphs[index]
             weighted_full_graph = AuxFunctions.get_weighted_graph(
                 data[2], data[3], w)
             g_tag = edmonds.mst(('root', 'root', 0), weighted_full_graph)
             if True:  # For better performance
                 w = w + self.get_saved_f_vector(
                     data[0], data[4]) - self.get_f_vector(g_tag, data[4])
         print('Done ' + str(i + 1) + ' iteration at ' +
               str(datetime.now() - iteration_time))
         if self.is_improved:
             ImprovedFunctions.save_w(w, i + 1)
         else:
             BasicFunctions.save_w(w, i + 1)