def __relations_labeling(self, relations, dest_data_type):
        assert(isinstance(dest_data_type, unicode))
        assert(isinstance(relations, ExtractedRelationsCollection))

        predict_log = PredictVariables()
        var_names, var_tensors = self.network.Variables

        for bags_group in self.get_bags_collection(dest_data_type).iter_by_groups(self.Settings.BagsPerMinibatch):

            minibatch = self.create_batch_by_bags_group(bags_group)
            feed_dict = self.create_feed_dict(minibatch, data_type=dest_data_type)

            result = self.sess.run([self.network.Labels] + var_tensors, feed_dict=feed_dict)
            uint_labels = result[0]

            predict_log.add(names=var_names,
                            results=result[1:],
                            relation_ids=[sample.RelationID for sample in minibatch.iter_by_samples()])

            if DebugKeys.PredictBatchDisplayLog:
                display_log(var_names, result[1:])

            # apply labels
            for bag_index, bag in enumerate(minibatch.iter_by_bags()):
                label = self.get_labels_helper().create_label_from_uint(label_uint=int(uint_labels[bag_index]))
                for sample in bag:
                    if sample.RelationID < 0:
                        continue
                    relations.apply_label(label, sample.RelationID)

        return predict_log
Beispiel #2
0
    def prepare_sources(self):

        train_files, test_files = self.IO.get_train_test_paths()
        print "Train files: ", train_files
        print "Test files: ", test_files

        self.__train_helper = ModelInitHelper(
            samples_filepaths=train_files,
            word_embedding_filepath=self.IO.get_word_embedding_filepath(),
            settings=self.Settings,
            data_type=DataType.Train)

        self.__test_helper = ModelInitHelper(
            samples_filepaths=test_files,
            word_embedding_filepath=self.IO.get_word_embedding_filepath(),
            settings=self.Settings,
            data_type=DataType.Test)

        print "Saving train collections ..."
        self.__save_etalon(self.__train_helper.RelationsCollectionHelper)
        print "Saving test collections ..."
        self.__save_etalon(self.__test_helper.RelationsCollectionHelper)

        norm, _ = self.get_relations_collection_helper(
            DataType.Train).get_statistic()
        self.Settings.set_class_weights(norm)

        keys, values = self.Settings.get_parameters()
        display_log(keys, values)
 def __print_statistic(self):
     keys, values = self.Settings.get_parameters()
     display_log(keys, values)
     self.get_relations_collection_helper(DataType.Train).debug_labels_statistic()
     self.get_relations_collection_helper(DataType.Train).debug_unique_relations_statistic()
     self.get_relations_collection_helper(DataType.Test).debug_labels_statistic()
     self.get_relations_collection_helper(DataType.Test).debug_unique_relations_statistic()
     self.get_bags_collection_helper(DataType.Train).print_log_statistics()
     self.get_bags_collection_helper(DataType.Test).print_log_statistics()
    def fit(self):
        assert(self.sess is not None)

        operation_cancel = OperationCancellation()
        minibatches = list(self.get_bags_collection(DataType.Train).iter_by_groups(self.Settings.BagsPerMinibatch))
        print "Minibatches passing per epoch count: {}".format(len(minibatches))

        for epoch_index in range(self.Settings.Epochs):

            if operation_cancel.IsCancelled:
                break

            self.get_bags_collection_helper(DataType.Train).print_log_statistics()

            total_cost = 0
            total_acc = 0
            groups_count = 0

            np.random.shuffle(minibatches)

            for bags_group in minibatches:

                minibatch = self.create_batch_by_bags_group(bags_group)
                feed_dict = self.create_feed_dict(minibatch, data_type=DataType.Train)

                var_names, var_tensors = self.network.Variables
                result = self.sess.run([self.optimiser, self.network.Cost, self.network.Accuracy] + var_tensors,
                                       feed_dict=feed_dict)
                cost = result[1]

                if DebugKeys.FitBatchDisplayLog:
                    display_log(var_names, result[3:])

                total_cost += np.mean(cost)
                total_acc += result[2]
                groups_count += 1

            if DebugKeys.FitSaveTensorflowModelState:
                self.save_model(save_path=self.IO.get_model_filepath())

            if self.callback is not None:
                self.callback.on_epoch_finished(avg_cost=total_cost / groups_count,
                                                avg_acc=total_acc / groups_count,
                                                epoch_index=epoch_index,
                                                operation_cancel=operation_cancel)

        if self.callback is not None:
            self.callback.on_fit_finished()