Beispiel #1
0
    def predict(self, dest_data_type=DataType.Test):

        self.relation_collections[dest_data_type].reset_labels()

        for index, relation_groups in enumerate(
                self.relation_collections[dest_data_type].
                iter_by_linked_relations_groups(self.Settings.BatchSize)):

            batch = Batch(relation_groups, self.Settings.GroupSize)
            feed_dict = self.create_feed_dict(batch, dest_data_type)

            result = self.sess.run([self.network.Labels], feed_dict=feed_dict)
            uint_labels = result[0]

            for group_index, group in enumerate(batch.iter_groups):
                for relation in group:
                    assert (isinstance(relation, ExtractedRelation))
                    self.relation_collections[dest_data_type].apply_label(
                        label=Label.from_uint(int(uint_labels[group_index])),
                        relation_id=relation.relation_id)

        for news_ID in self.io.get_data_indices(dest_data_type):
            collection = OpinionCollection(None, self.synonyms,
                                           self.Settings.Stemmer)
            self.relation_collections[dest_data_type].fill_opinion_collection(
                collection,
                news_ID,
                lambda labels: labels[0],
                debug_check_collection=False)

            collection.save(
                self.io.get_opinion_output_filepath(
                    news_ID, self.io.get_model_root(dest_data_type)))

        return self._evaluate(dest_data_type, self.Settings.Stemmer)
Beispiel #2
0
    def apply_labels(self, uint_labels, minibatch):
        """
        uint_labels: list of int
            each label could be as follows: 0 -- neutral, and 1 -- positive, 2 -- negative

        Applying labels for each bag. It is supposed that labels and bags have
        the same order.
        """
        assert (isinstance(uint_labels, list))
        assert (isinstance(minibatch, MiniBatch))
        index = 0
        for bag in minibatch.bags:
            for sample in bag.samples:
                label = Label.from_uint(uint_labels[index])
                self._find_relation_and_set_label(sample.position, label)
                index += 1
Beispiel #3
0
    def predict(self, dest_data_type=DataType.Test):

        def calculate_label(relation_labels):
            assert(isinstance(relation_labels, list))

            label = None
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED:
                label = relation_labels[0]
            if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE:
                label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels])))

            if DebugKeys.PredictLabel:
                print [l.to_int() for l in relation_labels]
                print "Result: {}".format(label.to_int())

            return label

        assert(isinstance(dest_data_type, unicode))

        self._relations_collections[dest_data_type].reset_labels()
        prediction_collection = RelationPredictionResultCollection(len(self._relations_collections[dest_data_type]))

        for bags_group in self.bags_collection[dest_data_type].iter_by_groups(self.Settings.BagsPerMinibatch):

            minibatch = MiniBatch(bags_group)
            feed_dict = self.create_feed_dict(minibatch, data_type=dest_data_type)

            log_names, log_params = self.network.Log
            result = self.sess.run([self.network.Labels, self.network.Output] + log_params, feed_dict=feed_dict)
            uint_labels = result[0]
            output = result[1]

            if DebugKeys.PredictBatchDisplayLog:
                self._display_log(log_names, result[2:])

            # apply labels
            sample_indices_count = 0
            for sample_index, sample in enumerate(minibatch.iter_by_samples()):
                label = Label.from_uint(int(uint_labels[sample_index]))
                self._relations_collections[dest_data_type].apply_label(label, sample.RelationID)
                prediction_collection.add(sample.RelationID, RelationPredictionResult(output[sample_index]))
                sample_indices_count += 1

            assert(sample_indices_count == len(uint_labels))

        assert(self._relations_collections[dest_data_type].debug_check_all_relations_has_labels())

        self._relations_collections[dest_data_type].debug_labels_statistic(dest_data_type)

        # Compose Result
        self._relations_collections[dest_data_type].save(
            self.io.get_relations_filepath(data_type=dest_data_type,
                                           epoch=self._last_fit_epoch_index))

        prediction_collection.save(
            self.io.get_relations_prediction_filepath(data_type=dest_data_type,
                                                      epoch=self._last_fit_epoch_index))

        for news_ID in self.io.get_data_indices(dest_data_type):
            collection = OpinionCollection(None, self.synonyms, self.settings.Stemmer)
            self._relations_collections[dest_data_type].fill_opinion_collection(collection, news_ID, calculate_label)

            collection.save(self.io.get_opinion_output_filepath(news_ID, self.io.get_model_root(dest_data_type)))

        return self._evaluate(dest_data_type, self.Settings.Stemmer)