예제 #1
0
    def _get_dev_metric(self, inputs, targets):
        """

        :param list[Sentence] inputs:
        :param list[list[int]] targets:
        :return:
        """
        with torch.no_grad():
            predicted = []
            target_anno = []
            for input_sent, t in zip(inputs, targets):
                res_indices = self.predict(input_sent)
                cur_predicted = []
                cur_anno = []
                for i in range(len(input_sent) - 1):
                    if i in res_indices:
                        cur_predicted.append(1)
                    else:
                        cur_predicted.append(0)
                    if i in t:
                        cur_anno.append(1)
                    else:
                        cur_anno.append(0)
                predicted.append(cur_predicted)
                target_anno.append(cur_anno)
        mh = MetricHelper(predicted=predicted, target=target_anno)
        return mh.f1()
예제 #2
0
    def train_model_dynamic(self,
                            train_sets,
                            dev_sets,
                            max_tries=5,
                            print_interval=500,
                            save_path="",
                            teacher_forcing=0.5,
                            max_epochs=20):
        train_input, train_target, _ = self.make_training_data(train_sets)
        dev_input, dev_target, dev_ds = self.make_training_data(dev_sets)
        print("Prepared Data")

        best_f1 = 0
        n_tries = 0
        n_epochs = 0
        best_epoch = 0

        while n_tries < max_tries and n_epochs < max_epochs:
            n_epochs += 1
            #  train model
            self.model.train(inputs=train_input,
                             targets=train_target,
                             epochs=1,
                             print_interval=print_interval,
                             teacher_forcing=teacher_forcing,
                             dev_inputs=dev_input,
                             dev_targets=dev_target)

            #  test model
            with torch.no_grad():
                predicted = []
                for input_sent in dev_input:
                    res_indices = self.model.predict(input_sent)
                    cur_predicted = []
                    for i in range(len(input_sent) - 1):
                        if i in res_indices:
                            cur_predicted.append(1)
                        else:
                            cur_predicted.append(0)
                    predicted.append(cur_predicted)
                mh = MetricHelper(predicted=predicted,
                                  target=dev_ds.get_merged_annotations())
                cur_f1 = mh.f1()

            if cur_f1 >= best_f1:
                best_f1 = cur_f1
                n_tries = 0
                self.model.save_model_parameters(save_path)
                print("New best model with f1={} saved\n".format(cur_f1))
                best_epoch = n_epochs
            else:
                n_tries += 1
                print("f1={}\n".format(cur_f1))
            print("{} epochs remaining".format(max_epochs - n_epochs))
        print(
            "Finished training after {} epochs, {} seconds. Best model achieved after {} epochs"
            .format(n_epochs, self.model.train_time, best_epoch))
예제 #3
0
    def test_ner(self, datasets):
        ds = DataSet(datasets[0])
        ds.read_multiple(datasets)
        predicted_list = []

        for doc in ds.documents:
            predicted = self.annotate_doc(doc)
            predicted_list += predicted
        mh = MetricHelper(predicted=predicted_list,
                          target=ds.get_merged_annotations())
        print("Recall: {}, Precision: {}, F1: {}".format(
            mh.recall(), mh.precision(), mh.f1()))
예제 #4
0
    def test_model(self, data_files):
        test_ds = DataSet(path=data_files[0])
        test_ds.read_multiple(data_files)
        t_data, target = self.data_helper.build_train_data(test_ds)

        predicted = self.model.predict(t_data)
        mh = MetricHelper(predicted=[predicted], target=[target])
        print("Recall: {}, Precision: {}, F1: {}".format(
            mh.recall(), mh.precision(), mh.f1()))
        return mh.recall(), mh.precision(), mh.f1()
예제 #5
0
 def test_model(self, datasets):
     input_list, target_list, ds = self.make_training_data(datasets)
     print("Prepared Test data")
     predicted = []
     with torch.no_grad():
         for input_sent in input_list:
             res_indices = self.model.predict(input_sent)
             cur_predicted = []
             for i in range(len(input_sent) - 1):
                 if i in res_indices:
                     cur_predicted.append(1)
                 else:
                     cur_predicted.append(0)
             predicted.append(cur_predicted)
     mh = MetricHelper(predicted=predicted,
                       target=ds.get_merged_annotations())
     print("Recall: {}, Precision: {}, F1: {}".format(
         mh.recall(), mh.precision(), mh.f1()))
     return mh.recall(), mh.precision(), mh.f1()