コード例 #1
0
	def dump_errors(self, checkpoint_path, output_file="mistakes.txt", task_name=SNLITask.NAME):
		if task_name == SNLITask.NAME:
			_, test_dataset, _ = DatasetHandler.load_SNLI_datasets()
		elif task_name == MNLITask.NAME:
			_, test_dataset, _ = DatasetHandler.load_MultiNLI_datasets()
		else:
			print("Unsupported task: " + str(task_name))
			sys.exit(1)

		evaluation_dict = load_model(checkpoint_path)["evaluation_dict"]
		task_evaluation = evaluation_dict[max(evaluation_dict.keys())][task_name]
		predictions = task_evaluation["predictions"]
		labels = task_evaluation["labels"]

		mistakes = [i for i, p, l in zip(range(len(predictions)), predictions.tolist(), labels.tolist()) if p != l]
		print("Number of mistakes: " + str(len(mistakes)) + " | " + str(len(test_dataset.data_list)) + " (%4.2f%%)" % (len(mistakes)*100.0/len(test_dataset.data_list)))
		print("Confusions:")
		for l in set(labels):
			for p in set(predictions):
				if l == p:
					continue
				print("\t- Label %s, pred %s: %i" % (test_dataset.label_to_string(l), test_dataset.label_to_string(p), len([m for m in mistakes if predictions[m]==p and labels[m]==l])))
		file_text = ""
		for example_index in mistakes:
			file_text += "-"*50 + "\n"
			file_text += "Label: " + str(test_dataset.label_to_string(labels[example_index])) + ", Prediction: " + str(test_dataset.label_to_string(predictions[example_index])) + "\n"
			file_text += "Premise: " + " ".join(test_dataset.data_list[example_index].premise_words) + "\n"
			file_text += "Hypothesis: " + " ".join(test_dataset.data_list[example_index].hypothesis_words) + "\n"
		with open(output_file, "w") as f:
			f.write(file_text)
コード例 #2
0
	def evaluate_all_models(self, checkpoint_path):
		checkpoint_files = sorted(glob(os.path.join(checkpoint_path, "*.tar")))

		model_results = dict()

		for i in range(len(checkpoint_files)):
			checkpoint_dict = load_model(checkpoint_files[i], model=self.model, tasks=self.tasks)
			epoch = checkpoint_dict["epoch"]
			model_results[epoch] = dict()
			model_results[epoch]["checkpoint_file"] = checkpoint_files[i]
			model_results[epoch]["train"] = self.eval(dataset=self.train_dataset)
			model_results[epoch]["val"] = self.eval(dataset=self.val_dataset)
			model_results[epoch]["test"] = self.eval(dataset=self.test_dataset)
			print("Model at epoch %i achieved %4.2f%% on validation and %4.2f%% on test dataset" % (epoch, 100.0 * model_results[epoch]["val"], 100.0 * model_results[epoch]["test"]))

		best_acc = {
			"train": {"acc": 0, "epoch": 0},
			"val": {"acc": 0, "epoch": 0},
			"test": {"acc": 0, "epoch": 0}
		}
		for epoch, epoch_dict in model_results.items():
			for data in ["train", "val", "test"]:
				if epoch_dict[data] > best_acc[data]["acc"]:
					best_acc[data]["epoch"] = epoch
					best_acc[data]["acc"] = epoch_dict[data] 

		print("Best train accuracy: %4.2f%% (epoch %i)" % (100.0 * best_acc["train"]["acc"], best_acc["train"]["epoch"]))
		print("Best validation accuracy: %4.2f%% (epoch %i)" % (100.0 * best_acc["val"]["acc"], best_acc["val"]["epoch"]))
		print("Best test accuracy: %4.2f%% (epoch %i)" % (100.0 * best_acc["test"]["acc"], best_acc["test"]["epoch"]))
		return model_results, best_acc
コード例 #3
0
def create_model(checkpoint_path, model_type, model_params):
	word2vec, word2id, wordvec_tensor = load_word2vec_from_file()
	model = MultiTaskEncoder(model_type, model_params, wordvec_tensor)
	_ = load_model(checkpoint_path, model=model)
	for param in model.parameters():
		param.requires_grad = False
	model.eval()
	return model
コード例 #4
0
	def visualize_tensorboard(self, checkpoint_path, optimizer_params=None, replace_old_files=False, additional_datasets=None):
		if replace_old_files:
			for old_tf_file in sorted(glob(os.path.join(checkpoint_path, "events.out.tfevents.*"))):
				print("Removing " + old_tf_file + "...")
				os.remove(old_tf_file)
		
		writer = SummaryWriter(log_dir=checkpoint_path)
		
		# dummy_embeds, dummy_length, _ = self.train_dataset.get_batch(self.batch_size, loop_dataset=False, toTorch=True, bidirectional=self.model.is_bidirectional())
		# writer.add_graph(self.model, (dummy_embeds[0], dummy_length[0], dummy_embeds[1], dummy_length[1]))
		
		final_dict = load_model(checkpoint_path)
		for batch in range(len(final_dict["loss_avg_list"])):
			writer.add_scalar("train/loss", final_dict["loss_avg_list"][batch], batch*50+1)

		for epoch in range(len(final_dict["eval_accuracies"])):
			writer.add_scalar("eval/accuracy", final_dict["eval_accuracies"][epoch], epoch+1)

		if optimizer_params is not None:
			lr = optimizer_params["lr"]
			lr_decay_step = optimizer_params["lr_decay_step"]
			for epoch in range(len(final_dict["eval_accuracies"])):
				writer.add_scalar("train/learning_rate", lr, epoch+1)
				if epoch in final_dict["lr_red_step"]:
					lr *= lr_decay_step

		# model_results, best_acc = self.evaluate_all_models(checkpoint_path)
		# for epoch, result_dict in model_results.items():
		# 	for data in ["train", "val", "test"]:
		# 		writer.add_scalar("eval/" + data + "_accuracy", result_dict[data], epoch+1)

		max_acc = max(final_dict["eval_accuracies"])
		best_epoch = final_dict["eval_accuracies"].index(max_acc) + 1
		load_model(os.path.join(checkpoint_path, "checkpoint_" + str(best_epoch).zfill(3) + ".tar"), model=self.model)

		visualize_tSNE(self.model, self.test_easy_dataset, writer, embedding_name="Test set easy", add_reduced_version=True)
		visualize_tSNE(self.model, self.test_hard_dataset, writer, embedding_name="Test set hard", add_reduced_version=True)
		if additional_datasets is not None:
			for dataset_name, dataset in additional_datasets.items():
				print("Adding embeddings for dataset " + str(dataset_name))
				visualize_tSNE(self.model, dataset, writer, embedding_name=dataset_name, add_reduced_version=True)

		writer.close()
コード例 #5
0
ファイル: eval_diversity.py プロジェクト: elnaz776655/P2_Net
def load_our_model(checkpoint_path):
    global OUR_MODEL
    if OUR_MODEL is None:
        args = load_args(checkpoint_path)

        print("-> Loading model...")
        model_params, _ = unsupervised_args_to_params(args)

        _, _, wordvec_tensor = load_word2vec_from_file()
        model = ModelUnsupervisedContextParaphrasingTemplate(
            model_params, wordvec_tensor)

        print(checkpoint_path)
        _ = load_model(checkpoint_path, model=model, load_best_model=True)
        model = model.to(get_device())

        model.eval()

        OUR_MODEL = model
    return OUR_MODEL
コード例 #6
0
	def test_best_model(self, checkpoint_path, main_task=None, delete_others=False, run_standard_eval=True, run_training_set=False, run_sent_eval=True, run_extra_eval=True, light_senteval=True, final_eval_dict=None):
		
		if final_eval_dict is None:
			final_eval_dict = dict()

		if main_task is None:
			for t in self.tasks:
				self.test_best_model(checkpoint_path=checkpoint_path, main_task=t, delete_others=delete_others, 
									 run_standard_eval=run_standard_eval, run_training_set=run_training_set, 
									 run_sent_eval=False, run_extra_eval=run_extra_eval, 
									 light_senteval=True, final_eval_dict=final_eval_dict)
			main_task = self.tasks[0]
		else:
			print("Evaluating with main task " + main_task.name)

		def iter_to_file(iteration):
			return os.path.join(checkpoint_path, "checkpoint_" + str(iteration).zfill(7) + ".tar")

		final_dict = load_model(checkpoint_path)
		best_acc, best_iter = -1, -1
		for eval_iter, eval_dict in final_dict["evaluation_dict"].items():
			if main_task.eval_metric(eval_dict[main_task.name]) > best_acc and os.path.isfile(iter_to_file(eval_iter)):
				best_iter = eval_iter
				best_acc = main_task.eval_metric(eval_dict[main_task.name])

		s = "Best iteration: " + str(best_iter) + " with metric value %4.2f%%" % (best_acc * 100.0) + " on task " + str(main_task.name) + "\n"
		print(s)

		best_checkpoint_path = iter_to_file(best_iter)
		load_model(best_checkpoint_path, model=self.model, tasks=self.tasks)
		for param in self.model.parameters():
			param.requires_grad = False
		self.model.eval()

		if run_standard_eval and (main_task.name not in final_eval_dict):
			acc_dict = {'train' : dict(), 'val' : dict(), 'test' : dict()}
			if run_training_set:
				# For training, we evaluate on the very last checkpoint as we expect to have the best training performance there
				load_model(checkpoint_path, model=self.model, tasks=self.tasks)
				for t in self.tasks:
					t_acc, _ = t.eval(dataset=t.train_dataset)
					acc_dict['train'][t.name] = t_acc
				# Load best checkpoint again
				load_model(best_checkpoint_path, model=self.model, tasks=self.tasks)
			
			for t in self.tasks:
				val_acc, detailed_val_acc = t.eval(dataset=t.val_dataset)
				if t.name == main_task.name and abs(main_task.eval_metric(detailed_val_acc) - best_acc) > 0.0005:
					print("[!] ERROR: Found different accuracy then reported in the final state dict. Difference: %f" % (100.0 * abs(val_acc - max_acc)) ) 
					return 

				test_acc, detailed_acc = t.eval(dataset=t.test_dataset)
				
				acc_dict['val'][t.name] = val_acc
				acc_dict['test'][t.name] = test_acc 
				acc_dict['test'][t.name + "_detailed"] = detailed_acc
				
			final_eval_dict[main_task.name] = acc_dict

			with open(os.path.join(checkpoint_path, "evaluation.pik"), "wb") as f:
				pickle.dump(final_eval_dict, f)

		# if run_extra_eval:
		# 	test_easy_acc = self.eval(dataset=self.test_easy_dataset)
		# 	test_hard_acc = self.eval(dataset=self.test_hard_dataset)
		# 	s = "Test easy accuracy: %4.2f%%\n Test hard accuracy: %4.2f%%\n" % (test_easy_acc*100.0, test_hard_acc*100.0)
		# 	with open(os.path.join(checkpoint_path, "extra_evaluation.txt"), "w") as f:
		# 		f.write(s)

		if run_sent_eval:
			self.model.eval()
			res = perform_SentEval(self.model, fast_eval=light_senteval)
			with open(os.path.join(checkpoint_path, "sent_eval.pik"), "wb") as f:
				pickle.dump(res, f)
コード例 #7
0
    def train_model(self,
                    max_iterations=1e6,
                    loss_freq=50,
                    eval_freq=2000,
                    save_freq=1e5,
                    max_gradient_norm=10.0,
                    no_model_checkpoints=False):

        # Setup training parameters
        parameters_to_optimize = self._get_all_parameters()
        print("Trainable model parameters: " + str([
            name
            for name, p in self.model.named_parameters() if p.requires_grad
        ]))
        checkpoint_dict = self.load_recent_model()
        start_iter = get_dict_val(checkpoint_dict, "iteration", 0)
        evaluation_dict = get_dict_val(checkpoint_dict, "evaluation_dict",
                                       dict())
        best_save_dict = get_dict_val(checkpoint_dict, "best_save_dict", {
            "file": None,
            "metric": -1,
            "detailed_metrics": None
        })
        best_save_iter = best_save_dict["file"]
        last_save = None if start_iter == 0 else self.get_checkpoint_filename(
            start_iter)
        if last_save is not None and not os.path.isfile(last_save):
            print(
                "[!] WARNING: Could not find last checkpoint file specified as "
                + last_save)
            last_save = None

        writer = SummaryWriter(self.checkpoint_path)

        # Function for saving model. Add here in the dictionary necessary parameters that should be saved
        def save_train_model(iteration, only_weights=True):
            if no_model_checkpoints:
                return
            checkpoint_dict = {
                "iteration": iteration,
                "best_save_dict": best_save_dict
            }
            if only_weights:
                self.save_model(iteration,
                                checkpoint_dict,
                                save_optimizer=False)
            else:
                self.save_model(iteration,
                                checkpoint_dict,
                                save_optimizer=True)

        def export_weight_parameters(iteration):
            # Export weight distributions
            for name, param in self.model.named_parameters():
                if not param.requires_grad:
                    continue
                writer.add_histogram(name,
                                     param.data.view(-1),
                                     global_step=iteration)

        time_per_step = np.zeros((2, ), dtype=np.float32)
        train_losses, train_accs = [], []

        if start_iter == 0 and writer is not None:
            export_weight_parameters(0)
        # Try-catch if user terminates
        try:
            print("=" * 50 + "\nStarting training...\n" + "=" * 50)
            self.model.train()

            for index_iter in range(start_iter, int(max_iterations)):

                # Training step
                start_time = time.time()
                loss, acc = self.task.train_step(self.batch_size,
                                                 iteration=index_iter)
                self.optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(parameters_to_optimize,
                                               max_gradient_norm)
                self.optimizer.step()
                self.lr_scheduler.step()
                end_time = time.time()
                time_per_step[0] += end_time - start_time
                time_per_step[1] += 1
                train_losses.append(loss.item())
                train_accs.append(acc.item())

                # Debug loss printing
                if (index_iter + 1) % loss_freq == 0:
                    loss_avg, acc_avg = sum(train_losses) / len(
                        train_losses), sum(train_accs) / len(train_accs)
                    print("Training iteration %i|%i. Loss: %6.5f" %
                          (index_iter + 1, max_iterations, loss_avg))
                    writer.add_scalar("train/loss", loss_avg, index_iter + 1)
                    writer.add_scalar("train/acc", acc_avg, index_iter + 1)
                    writer.add_scalar("train/learning_rate",
                                      self.optimizer.param_groups[0]['lr'],
                                      index_iter + 1)
                    writer.add_scalar(
                        "train/training_time",
                        time_per_step[0] / max(1e-5, time_per_step[1]),
                        index_iter + 1)
                    self.task.add_summary(writer, index_iter + 1)
                    time_per_step[:] = 0
                    train_losses, train_accs = [], []

                # Evaluation
                if (index_iter + 1) % eval_freq == 0:

                    self.model.eval()
                    eval_BLEU, detailed_scores = self.task.eval(
                        batch_size=self.batch_size)
                    self.model.train()

                    write_dict_to_tensorboard(writer,
                                              detailed_scores,
                                              base_name="eval",
                                              iteration=index_iter + 1)
                    if (index_iter + 1) % (eval_freq * 5) == 0:
                        export_weight_parameters(index_iter + 1)

                    if best_save_dict["metric"] < 0 or eval_BLEU > best_save_dict[
                            "metric"]:  # TODO: Test whether this is new best score or not
                        best_save_iter = self.get_checkpoint_filename(
                            index_iter + 1)
                        if not os.path.isfile(best_save_iter):
                            print("Saving model at iteration " +
                                  str(index_iter + 1))
                            save_train_model(index_iter + 1)
                            if best_save_dict[
                                    "file"] is not None and os.path.isfile(
                                        best_save_dict["file"]):
                                os.remove(best_save_dict["file"])
                            if last_save is not None and os.path.isfile(
                                    last_save):
                                os.remove(last_save)
                            best_save_dict["file"] = best_save_iter
                            last_save = best_save_iter
                        best_save_dict["metric"] = eval_BLEU
                        best_save_dict["detailed_metrics"] = detailed_scores
                        self.task.export_best_results(self.checkpoint_path,
                                                      index_iter + 1)
                    evaluation_dict[index_iter + 1] = best_save_dict["metric"]

                # Saving
                if (index_iter + 1) % save_freq == 0 and not os.path.isfile(
                        self.get_checkpoint_filename(index_iter + 1)):
                    save_train_model(index_iter + 1)
                    if last_save is not None and os.path.isfile(
                            last_save) and last_save != best_save_iter:
                        os.remove(last_save)
                    last_save = self.get_checkpoint_filename(index_iter + 1)

            eval_BLEU, detailed_scores = self.task.eval(
                batch_size=self.batch_size)
            print("Before reloading, the model achieved a score of %f" %
                  eval_BLEU)
            if not no_model_checkpoints and best_save_iter is not None:
                load_model(best_save_iter,
                           model=self.model,
                           optimizer=self.optimizer,
                           lr_scheduler=self.lr_scheduler)
                eval_BLEU, detailed_scores = self.task.eval(
                    batch_size=self.batch_size)
                print("Best model achieved %s" % str(eval_BLEU))
                if eval_BLEU != best_save_dict["metric"]:
                    print(
                        "[!] WARNING: new evaluation differs from saved one (%s vs %s)!"
                        % (str(eval_BLEU), str(best_save_dict["metric"])))
                self.task.finalize_summary(writer, max_iterations,
                                           self.checkpoint_path)
            else:
                print("Skipping finalizing the summary because %s..." %
                      ("no model checkpoints were saved"
                       if no_model_checkpoints else "best_save_iter was None"))

        except KeyboardInterrupt:
            print(
                "User keyboard interrupt detected. Saving model at step %i..."
                % (index_iter))
            save_train_model(index_iter + 1)
            if last_save is not None and os.path.isfile(last_save) and not any(
                [val == last_save for _, val in best_save_dict.items()]):
                os.remove(last_save)

        with open(os.path.join(self.checkpoint_path, "results.txt"), "w") as f:
            for eval_iter, eval_dict in evaluation_dict.items():
                f.write("Iteration %i: " % (eval_iter))
                f.write("BLEU: %s" % str(best_save_dict["metric"]))
                f.write("\n")

        writer.close()
コード例 #8
0
 def load_recent_model(self):
     checkpoint_dict = load_model(self.checkpoint_path,
                                  model=self.model,
                                  optimizer=self.optimizer,
                                  lr_scheduler=self.lr_scheduler)
     return checkpoint_dict
コード例 #9
0
	def load_recent_model(self):
		checkpoint_dict = load_model(self.checkpoint_path, model=self.model, optimizer=self.optimizer, lr_scheduler=self.lr_scheduler)
		if len(checkpoint_dict.keys()) > 0: # If checkpoint is not empty, load heads as well
			for t in self.tasks:
				t.load_from_dict(checkpoint_dict)
		return checkpoint_dict