def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, keep_results=False): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, batch_size, device, keep_results=keep_results) scores, metric_names = saved_model_evaluator.get_scores() logger.info('Evaluation metrics for {}'.format(split_name)) logger.info('\t'.join([' '] + metric_names)) logger.info('\t'.join([split_name] + list(map(str, scores))))
def other_evaluate(split_name, dataset_cls, model, embedding, loader, batch_size, device): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, batch_size, device) ALL_PAIRS, scores, metric_names = saved_model_evaluator.final_evaluation() logger.info('Evaluation metrics for {}'.format(split_name)) logger.info('\t'.join([' '] + metric_names)) logger.info('\t'.join([split_name] + list(map(str, scores)))) return ALL_PAIRS
def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, single_label): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, batch_size, device) saved_model_evaluator.ignore_lengths = True saved_model_evaluator.single_label = single_label scores, metric_names = saved_model_evaluator.get_scores() print('Evaluation metrics for', split_name) print(metric_names) print(scores)
def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, single_label): saved_model_evaluator = EvaluatorFactory.get_evaluator( dataset_cls, model, embedding, loader, batch_size, device) if hasattr(saved_model_evaluator, 'single_label'): saved_model_evaluator.single_label = single_label scores, metric_names = saved_model_evaluator.get_scores() logger.info('Evaluation metrics for {}'.format(split_name)) print(metric_names) print(scores)
def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, pred_scores, args, topic): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, args.batch_size, args.gpu) if args.model in {'HAN', 'HR-CNN'}: saved_model_evaluator.ignore_lengths = True dev_acc, dev_precision, dev_ap, dev_f1, dev_loss = saved_model_evaluator.get_scores()[0] if split_name == 'test': pred_scores[topic] = (saved_model_evaluator.y_pred, saved_model_evaluator.docid) else: dev_header = 'Dev/Loss Dev/Acc. Dev/Pr. Dev/APr.' dev_log_template = '{:4.4f} {:>8.4f} {:>4.4f} {:4.4f}' print('Evaluation metrics for %s split from topic %s' % (split_name, topic)) print(dev_header) print(dev_log_template.format(dev_loss, dev_acc, dev_precision, dev_ap) + '\n') return saved_model_evaluator.y_pred
def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, run_label, result_loc, dataset): saved_model_evaluator = EvaluatorFactory.get_evaluator( dataset_cls, model, embedding, loader, batch_size, device) if dataset == 'sts2014' and split_name == 'test': scores, metric_names = saved_model_evaluator.get_sts_test_scores() else: scores, metric_names = saved_model_evaluator.get_scores() logger.info('Evaluation metrics for {}'.format(split_name)) logger.info('\t'.join([' '] + metric_names)) logger.info('\t'.join([split_name] + list(map(str, scores)))) file = open(os.path.join(result_loc, dataset + "_" + run_label), "a+") file.write('Evaluation metrics for {}'.format(split_name)) file.write("\n") file.write('\t'.join([' '] + metric_names)) file.write("\n") file.write('\t'.join([split_name] + list(map(str, scores)))) file.write("\n") file.close()
model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu)) else: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage) else: model = CharCNN(config) if args.cuda: model.cuda() print('Shift model to GPU') parameter = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameter, lr=args.lr, weight_decay=args.weight_decay) if args.dataset not in dataset_map: raise ValueError('Unrecognized dataset') else: train_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, train_iter, args.batch_size, args.gpu) test_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, test_iter, args.batch_size, args.gpu) dev_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, dev_iter, args.batch_size, args.gpu) train_evaluator.single_label = args.single_label test_evaluator.single_label = args.single_label dev_evaluator.single_label = args.single_label dev_evaluator.ignore_lengths = True test_evaluator.ignore_lengths = True trainer_config = { 'optimizer': optimizer, 'batch_size': args.batch_size, 'log_interval': args.log_every, 'dev_log_interval': args.dev_every, 'patience': args.patience, 'model_outfile': args.save_path, # actually a directory, using model_outfile to conform to Trainer naming convention
if args.resume_snapshot: if args.cuda: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu)) else: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage) else: model = KimCNN(config) if args.cuda: model.cuda() print('Shift model to GPU') parameter = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adadelta(parameter, lr=args.lr, weight_decay=args.weight_decay) if args.dataset == 'SST-1': train_evaluator = EvaluatorFactory.get_evaluator(SST1, model, None, train_iter, args.batch_size, args.gpu) test_evaluator = EvaluatorFactory.get_evaluator(SST1, model, None, test_iter, args.batch_size, args.gpu) dev_evaluator = EvaluatorFactory.get_evaluator(SST1, model, None, dev_iter, args.batch_size, args.gpu) elif args.dataset == 'SST-2': train_evaluator = EvaluatorFactory.get_evaluator(SST2, model, None, train_iter, args.batch_size, args.gpu) test_evaluator = EvaluatorFactory.get_evaluator(SST2, model, None, test_iter, args.batch_size, args.gpu) dev_evaluator = EvaluatorFactory.get_evaluator(SST2, model, None, dev_iter, args.batch_size, args.gpu) elif args.dataset == 'Reuters': train_evaluator = EvaluatorFactory.get_evaluator(Reuters, model, None, train_iter, args.batch_size, args.gpu) test_evaluator = EvaluatorFactory.get_evaluator(Reuters, model, None, test_iter, args.batch_size, args.gpu) dev_evaluator = EvaluatorFactory.get_evaluator(Reuters, model, None, dev_iter, args.batch_size, args.gpu) else: raise ValueError('Unrecognized dataset') trainer_config = { 'optimizer': optimizer,
if args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.regularization, eps=args.epsilon) elif args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.regularization) else: raise ValueError( 'optimizer not recognized: it should be either adam or sgd') train_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, train_loader, args.batch_size, args.device) test_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, test_loader, args.batch_size, args.device) dev_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, dev_loader, args.batch_size, args.device) trainer_config = { 'optimizer': optimizer, 'batch_size': args.batch_size, 'log_interval': args.log_interval, 'model_outfile': args.model_outfile,
help='GPU device, -1 for CPU (default: 0)') args = parser.parse_args() with open(args.model, 'rb') as file: pickle_model = pickle.load(file) print(pickle_model) model = pickle_model.to(args.device) print("Loading data...") dataset_cls, embedding, _, test_loader, _ \ = DatasetFactory.get_dataset(args.dataset, args.word_vectors_dir, args.word_vectors_file, args.batch_size, args.device, args.castor_dir) embedding = embedding.to(args.device) saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, test_loader, args.batch_size, args.device) #conv_pairs, div_pairs = saved_model_evaluator.DIV_test() print("EVALUATING.........") ALL_PAIRS, SCORES, TITLES = saved_model_evaluator.final_evaluation() print(TITLES) print(SCORES) print("Writing to file>>>") with open(args.model + "_" + args.dataset + "_classify.txt", 'w') as f_out: for pair in ALL_PAIRS: SENT_1 = pair[0][0] SENT_2 = pair[0][1] sco = pair[1] lbl = pair[2]