예제 #1
0
 def calculate_features_for_all_graphs(self):
     graph_number = 0  # We use this to later save f_vector result in dict
     for data in self.graphs:
         graph_number += 1
         self.scored_graphs.append(
             (data[0],
              AuxFunctions.get_features_for_graph(
                  self.features, data[0],
                  self.sentence_tags[graph_number - 1],
                  self.is_improved), data[1],
              AuxFunctions.get_features_for_graph(
                  self.features, data[1],
                  self.sentence_tags[graph_number - 1],
                  self.is_improved), graph_number))
예제 #2
0
 def get_f_vector(self, g, g_num):
     f_vector = np.zeros(self.feature_num, dtype=int)
     features = AuxFunctions.get_features_for_graph(
         self.features, g, self.sentence_tags[g_num - 1], self.is_improved)
     for feature_data in features:
         for feature_i in feature_data[2]:
             f_vector[feature_i] += 1
     return f_vector
예제 #3
0
    def data_inference(self, unlabeled_document_name, output_name, w):
        sentence_words_pos = dict()
        sentence_num = 0
        with open(unlabeled_document_name, 'r') as f1:
            with open(output_name, 'w+') as f2:
                for line in f1:

                    if line == '\n':
                        data_for_full_graph = set()
                        words_tags = dict()
                        for counter in sentence_words_pos:
                            word_tuple = sentence_words_pos[counter]
                            data_for_full_graph.add(
                                (word_tuple[0], word_tuple[1], counter))
                            words_tags[counter] = sentence_words_pos[counter][
                                1]

                        data_for_full_graph.add(('root', 'root', 0))
                        sentence_words_pos[0] = ('root', 'root', 0)
                        words_tags[0] = 'root'

                        full_unweighted_g = AuxFunctions.make_full_graph(
                            list(data_for_full_graph))
                        g_features = AuxFunctions.get_features_for_graph(
                            self.features, full_unweighted_g, words_tags,
                            self.is_improved)
                        full_weighted_g = AuxFunctions.get_weighted_graph(
                            full_unweighted_g, g_features, w)

                        g_inference = edmonds.mst(('root', 'root', 0),
                                                  full_weighted_g)

                        inference_arches = self.get_arches_in_order(
                            g_inference)
                        self.write_lines(inference_arches, f2)

                        sentence_num += 1
                        print('Done sentence number ' + str(sentence_num))
                        sentence_words_pos = dict()
                    else:
                        split_line = line.split('\t')
                        # (counter)->(token,pos)
                        sentence_words_pos[int(
                            split_line[0])] = (split_line[1], split_line[3])

            f2.close()
        f1.close()