def _get_dev_metric(self, inputs, targets): """ :param list[Sentence] inputs: :param list[list[int]] targets: :return: """ with torch.no_grad(): predicted = [] target_anno = [] for input_sent, t in zip(inputs, targets): res_indices = self.predict(input_sent) cur_predicted = [] cur_anno = [] for i in range(len(input_sent) - 1): if i in res_indices: cur_predicted.append(1) else: cur_predicted.append(0) if i in t: cur_anno.append(1) else: cur_anno.append(0) predicted.append(cur_predicted) target_anno.append(cur_anno) mh = MetricHelper(predicted=predicted, target=target_anno) return mh.f1()
def train_model_dynamic(self, train_sets, dev_sets, max_tries=5, print_interval=500, save_path="", teacher_forcing=0.5, max_epochs=20): train_input, train_target, _ = self.make_training_data(train_sets) dev_input, dev_target, dev_ds = self.make_training_data(dev_sets) print("Prepared Data") best_f1 = 0 n_tries = 0 n_epochs = 0 best_epoch = 0 while n_tries < max_tries and n_epochs < max_epochs: n_epochs += 1 # train model self.model.train(inputs=train_input, targets=train_target, epochs=1, print_interval=print_interval, teacher_forcing=teacher_forcing, dev_inputs=dev_input, dev_targets=dev_target) # test model with torch.no_grad(): predicted = [] for input_sent in dev_input: res_indices = self.model.predict(input_sent) cur_predicted = [] for i in range(len(input_sent) - 1): if i in res_indices: cur_predicted.append(1) else: cur_predicted.append(0) predicted.append(cur_predicted) mh = MetricHelper(predicted=predicted, target=dev_ds.get_merged_annotations()) cur_f1 = mh.f1() if cur_f1 >= best_f1: best_f1 = cur_f1 n_tries = 0 self.model.save_model_parameters(save_path) print("New best model with f1={} saved\n".format(cur_f1)) best_epoch = n_epochs else: n_tries += 1 print("f1={}\n".format(cur_f1)) print("{} epochs remaining".format(max_epochs - n_epochs)) print( "Finished training after {} epochs, {} seconds. Best model achieved after {} epochs" .format(n_epochs, self.model.train_time, best_epoch))
def test_ner(self, datasets): ds = DataSet(datasets[0]) ds.read_multiple(datasets) predicted_list = [] for doc in ds.documents: predicted = self.annotate_doc(doc) predicted_list += predicted mh = MetricHelper(predicted=predicted_list, target=ds.get_merged_annotations()) print("Recall: {}, Precision: {}, F1: {}".format( mh.recall(), mh.precision(), mh.f1()))
def test_model(self, data_files): test_ds = DataSet(path=data_files[0]) test_ds.read_multiple(data_files) t_data, target = self.data_helper.build_train_data(test_ds) predicted = self.model.predict(t_data) mh = MetricHelper(predicted=[predicted], target=[target]) print("Recall: {}, Precision: {}, F1: {}".format( mh.recall(), mh.precision(), mh.f1())) return mh.recall(), mh.precision(), mh.f1()
def test_model(self, datasets): input_list, target_list, ds = self.make_training_data(datasets) print("Prepared Test data") predicted = [] with torch.no_grad(): for input_sent in input_list: res_indices = self.model.predict(input_sent) cur_predicted = [] for i in range(len(input_sent) - 1): if i in res_indices: cur_predicted.append(1) else: cur_predicted.append(0) predicted.append(cur_predicted) mh = MetricHelper(predicted=predicted, target=ds.get_merged_annotations()) print("Recall: {}, Precision: {}, F1: {}".format( mh.recall(), mh.precision(), mh.f1())) return mh.recall(), mh.precision(), mh.f1()