def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, is_multilabel): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, batch_size, device) if hasattr(saved_model_evaluator, 'is_multilabel'): saved_model_evaluator.is_multilabel = is_multilabel scores, metric_names = saved_model_evaluator.get_scores() print('Evaluation metrics for', split_name) print(metric_names) print(scores)
def evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, is_multilabel, save_file): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, batch_size, device) if hasattr(saved_model_evaluator, 'is_multilabel'): saved_model_evaluator.is_multilabel = is_multilabel scores, score_names = saved_model_evaluator.get_scores() print('Evaluation metrics for', split_name) print(score_names) print(scores) scores_dict = dict(zip(score_names, scores)) with open(save_file, 'w') as f: f.write(json.dumps(scores_dict))
def evaluate_dataset(split, dataset_cls, model, embedding, loader, pred_scores, args, topic): saved_model_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, loader, args.batch_size, args.gpu) if args.model in {'HAN', 'HR-CNN'}: saved_model_evaluator.ignore_lengths = True accuracy, precision, recall, f1, avg_loss = saved_model_evaluator.get_scores()[0] if split == 'test': pred_scores[topic] = (saved_model_evaluator.y_pred, saved_model_evaluator.docid) else: print('\n' + LOG_HEADER) print(LOG_TEMPLATE.format(topic, accuracy, precision, recall, f1, avg_loss) + '\n') return saved_model_evaluator.y_pred
def run_main(args): print('Args: ', args) metrics_dev_json = args.metrics_json + '_dev' metrics_test_json = args.metrics_json + '_test' # Set random seed for reproducibility random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = True if not args.cuda: args.gpu = -1 device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") if torch.cuda.is_available() and args.cuda: print('Note: You are using GPU for training') torch.cuda.manual_seed(args.seed) args.device = torch.device('cuda:%d' % args.gpu) if torch.cuda.is_available() and not args.cuda: print('Warning: Using CPU for training') dataset_map = { 'Reuters': ReutersBOW, 'CongressionalHearing': CongressionalHearingBOW, 'AAPD': AAPD, 'IMDB': IMDB, 'Yelp2014': Yelp2014 } if args.dataset not in dataset_map: raise ValueError('Unrecognized dataset') else: dataset_class = dataset_map[args.dataset] if args.fold_num >= 0: dataset_name = os.path.join(dataset_class.NAME + 'Folds', 'fold' + str(args.fold_num)) else: dataset_name = dataset_class.NAME if args.evaluate_dev: train_iter, dev_iter = dataset_map[args.dataset].iters_dev(args.data_dir, dataset_name, args.word_vectors_file, args.word_vectors_dir, batch_size=args.batch_size, device=args.device, unk_init=UnknownWordVecCache.unk) if args.evaluate_test: train_iter, test_iter = dataset_map[args.dataset].iters_test(args.data_dir, dataset_name, args.word_vectors_file, args.word_vectors_dir, batch_size=args.batch_size, device=args.device, unk_init=UnknownWordVecCache.unk) config = deepcopy(args) config.dataset = train_iter.dataset config.target_class = train_iter.dataset.NUM_CLASSES config.words_num = len(train_iter.dataset.TEXT_FIELD.vocab) print('Dataset:', args.dataset) print('No. of target classes:', train_iter.dataset.NUM_CLASSES) print('No. of train instances', len(train_iter.dataset)) if args.evaluate_dev: print('No. of dev instances', len(dev_iter.dataset)) if args.evaluate_test: print('No. of test instances', len(test_iter.dataset)) if args.resume_snapshot: if args.cuda: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.device)) else: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage) else: model = FastText(config) model.to(device) if not args.trained_model: save_path = os.path.join(args.save_path, dataset_name) os.makedirs(save_path, exist_ok=True) parameter = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameter, lr=args.lr, weight_decay=args.weight_decay) train_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, train_iter, args.batch_size, args.device) if args.evaluate_dev: dev_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, dev_iter, args.batch_size, args.device) if hasattr(dev_evaluator, 'is_multilabel'): dev_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if args.evaluate_test: test_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, test_iter, args.batch_size, args.device) if hasattr(test_evaluator, 'is_multilabel'): test_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if hasattr(train_evaluator, 'is_multilabel'): train_evaluator.is_multilabel = dataset_class.IS_MULTILABEL trainer_config = { 'optimizer': optimizer, 'batch_size': args.batch_size, 'log_interval': args.log_every, 'patience': args.patience, 'model_outfile': args.save_path, 'is_multilabel': dataset_class.IS_MULTILABEL } if args.evaluate_dev: trainer = TrainerFactory.get_trainer_dev(args.dataset, model, None, train_iter, trainer_config, train_evaluator, dev_evaluator, args) if args.evaluate_test: trainer = TrainerFactory.get_trainer_test(args.dataset, model, None, train_iter, trainer_config, train_evaluator, test_evaluator, args) if not args.trained_model: trainer.train(args.epochs) else: if args.cuda: model = torch.load(args.trained_model, map_location=lambda storage, location: storage.cuda(args.device)) else: model = torch.load(args.trained_model, map_location=lambda storage, location: storage) # Calculate dev and test metrics if hasattr(trainer, 'snapshot_path'): model = torch.load(trainer.snapshot_path) if args.evaluate_dev: evaluate_dataset('dev', dataset_map[args.dataset], model, None, dev_iter, args.batch_size, is_multilabel=dataset_class.IS_MULTILABEL, device=args.device, save_file=metrics_dev_json) if args.evaluate_test: evaluate_dataset('test', dataset_map[args.dataset], model, None, test_iter, args.batch_size, is_multilabel=dataset_class.IS_MULTILABEL, device=args.device, save_file=metrics_test_json)
model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu)) else: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage) else: model = XmlCNN(config) if args.cuda: model.cuda() if not args.trained_model: save_path = os.path.join(args.save_path, dataset_map[args.dataset].NAME) os.makedirs(save_path, exist_ok=True) parameter = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameter, lr=args.lr, weight_decay=args.weight_decay) train_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, train_iter, args.batch_size, args.gpu) test_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, test_iter, args.batch_size, args.gpu) dev_evaluator = EvaluatorFactory.get_evaluator(dataset_map[args.dataset], model, None, dev_iter, args.batch_size, args.gpu) if hasattr(train_evaluator, 'is_multilabel'): train_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if hasattr(test_evaluator, 'is_multilabel'): test_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if hasattr(dev_evaluator, 'is_multilabel'): dev_evaluator.is_multilabel = dataset_class.IS_MULTILABEL trainer_config = { 'optimizer': optimizer, 'batch_size': args.batch_size, 'log_interval': args.log_every, 'patience': args.patience,
def start(self): args = self.args # Set default configuration in args.py #args = get_args() logger = self.get_logger() # Set random seed for reproducibility torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = True np.random.seed(args.seed) random.seed(args.seed) if not args.cuda: args.gpu = -1 if torch.cuda.is_available() and args.cuda: print('Note: You are using GPU for training') torch.cuda.set_device(args.gpu) torch.cuda.manual_seed(args.seed) if torch.cuda.is_available() and not args.cuda: print('Warning: Using CPU for training') dataset_map = { 'Reuters': Reuters, 'AAPD': AAPD, 'IMDB': IMDB, 'Yelp2014': Yelp2014 } if args.dataset not in dataset_map: raise ValueError('Unrecognized dataset') else: dataset_class = dataset_map[args.dataset] train_iter, dev_iter, test_iter = dataset_class.iters(args.data_dir, args.word_vectors_file, args.word_vectors_dir, batch_size=args.batch_size, device=args.gpu, unk_init=UnknownWordVecCache.unk) config = deepcopy(args) config.dataset = train_iter.dataset config.target_class = train_iter.dataset.NUM_CLASSES print('Dataset:', args.dataset) print('No. of target classes:', train_iter.dataset.NUM_CLASSES) print('No. of train instances', len(train_iter.dataset)) print('No. of dev instances', len(dev_iter.dataset)) print('No. of test instances', len(test_iter.dataset)) if args.resume_snapshot: if args.cuda: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu)) else: model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage) else: model = CharCNN(config) if args.cuda: model.cuda() if not args.trained_model: save_path = os.path.join(args.save_path, dataset_map[args.dataset].NAME) os.makedirs(save_path, exist_ok=True) parameter = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameter, lr=args.lr, weight_decay=args.weight_decay) train_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None, train_iter, args.batch_size, args.gpu) test_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None, test_iter, args.batch_size, args.gpu) dev_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None, dev_iter, args.batch_size, args.gpu) if hasattr(train_evaluator, 'is_multilabel'): train_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if hasattr(dev_evaluator, 'is_multilabel'): dev_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if hasattr(dev_evaluator, 'ignore_lengths'): dev_evaluator.ignore_lengths = True if hasattr(test_evaluator, 'is_multilabel'): test_evaluator.is_multilabel = dataset_class.IS_MULTILABEL if hasattr(test_evaluator, 'ignore_lengths'): test_evaluator.ignore_lengths = True trainer_config = { 'optimizer': optimizer, 'batch_size': args.batch_size, 'log_interval': args.log_every, 'patience': args.patience, 'model_outfile': args.save_path, 'logger': logger, 'is_multilabel': dataset_class.IS_MULTILABEL, 'ignore_lengths': True } trainer = TrainerFactory.get_trainer(args.dataset, model, None, train_iter, trainer_config, train_evaluator, test_evaluator, dev_evaluator) if not args.trained_model: trainer.train(args.epochs) else: if args.cuda: model = torch.load(args.trained_model, map_location=lambda storage, location: storage.cuda(args.gpu)) else: model = torch.load(args.trained_model, map_location=lambda storage, location: storage) # Calculate dev and test metrics if hasattr(trainer, 'snapshot_path'): model = torch.load(trainer.snapshot_path) self.evaluate_dataset('dev', dataset_map[args.dataset], model, None, dev_iter, args.batch_size, is_multilabel=dataset_class.IS_MULTILABEL, device=args.gpu) self.evaluate_dataset('test', dataset_map[args.dataset], model, None, test_iter, args.batch_size, is_multilabel=dataset_class.IS_MULTILABEL, device=args.gpu)