############################################################################### # Load data ############################################################################### # load train and dev dataset train_corpus = data.Corpus(args.tokenize) dev_corpus = data.Corpus(args.tokenize) test_corpus = data.Corpus(args.tokenize) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: if 'IMDB' in task: ############################################################################### # Load Learning to Skim paper's Pickle file ############################################################################### train_d, dev_d, test_d = helper.get_splited_imdb_data( args.output_base_path + 'IMDB/' + 'imdb.p', SAG=args.SAG) train_corpus.parse(train_d, task, args.max_example) dev_corpus.parse(dev_d, task, args.max_example) test_corpus.parse(test_d, task, args.max_example) else: train_corpus.parse(args.data + task + '/train.txt', task, args.max_example) if task == 'multinli': dev_corpus.parse(args.data + task + '/dev_matched.txt', task, args.tokenize) test_corpus.parse(args.data + task + '/test_matched.txt', task, args.tokenize) else: dev_corpus.parse(args.data + task + '/dev.txt', task, args.tokenize) test_corpus.parse(args.data + task + '/test.txt', task,
if args.cuda: torch.cuda.set_device(args.gpu) model = model.cuda() print('loading model') helper.load_model(model, model_path, 'state_dict', args.cuda) print('vocabulary size = ', len(dictionary)) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: test_corpus = data.Corpus(args.tokenize) if 'IMDB' in args.task: ############################################################################### # Load Learning to Skim paper's Pickle file ############################################################################### train_d, dev_d, test_d = helper.get_splited_imdb_data(args.output_base_path+task+'/'+'imdb.p') test_corpus.parse(test_d, task, args.max_example) # test_corpus.parse(args.output_base_path + task + '/' + args.test + '.txt', 'RT', args.max_example) #although IMDB but selected text saved by budget model from theano in 'RT' format elif task == 'multinli' and args.test != 'train': for partition in ['_matched', '_mismatched']: test_corpus.parse(args.data + task + '/' + args.test + partition + '.txt', task, args.max_example) print('[' + partition[1:] + '] dataset size = ', len(test_corpus.data)) test_batches = helper.batchify(test_corpus.data, args.eval_batch_size) if args.test == 'test': evaluate(model, test_batches, dictionary, args.save_path + args.task + partition + '.csv') else: test_accuracy, test_f1 = evaluate(model, test_batches, dictionary) print('[' + partition[1:] + '] accuracy: %.2f%%' % test_accuracy) print('[' + partition[1:] + '] f1: %.2f%%' % test_f1)
model = BCN(dictionary, embeddings_index, class_distributions, args) if args.cuda and torch.cuda.is_available(): torch.cuda.set_device(args.gpu) model = model.cuda() helper.load_model_states_from_checkpoint( model, args.save_path + 'model_best.pth.tar', 'state_dict', args.cuda) print('vocabulary size = ', len(dictionary)) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: test_corpus = data.Corpus(args.tokenize) if 'imdb' in args.task: ############################################################################### # Load Learning to Skim paper's Pickle file ############################################################################### train_d, dev_d, test_d = helper.get_splited_imdb_data( args.save_path + 'data/' + 'imdb.p') test_corpus.parse(test_d, task, args.max_example) elif task == 'multinli' and args.test != 'train': for partition in ['_matched', '_mismatched']: test_corpus.parse( args.data + task + '/' + args.test + partition + '.txt', task, args.max_example) print('[' + partition[1:] + '] dataset size = ', len(test_corpus.data)) test_batches = helper.batchify(test_corpus.data, args.batch_size) if args.test == 'test': evaluate(model, test_batches, dictionary, args.save_path + args.task + partition + '.csv') else:
############################################################################### # load train and dev dataset train_corpus = data.Corpus(args.tokenize) train_corpus_temp = data.Corpus(args.tokenize) dev_corpus = data.Corpus(args.tokenize) test_corpus = data.Corpus(args.tokenize) ori_train_size = -1 task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: if 'IMDB' in task: ############################################################################### # Load Learning to Skim paper's Pickle file ############################################################################### train_d, dev_d, test_d = helper.get_splited_imdb_data( args.output_base_path + task + '/' + 'imdb.p', SAG=args.SAG) train_corpus_temp.parse(train_d, task, args.max_example) dev_corpus.parse(dev_d, task, args.max_example) test_corpus.parse(test_d, task, args.max_example) ori_train_size = len(train_corpus_temp.data) else: train_corpus_temp.parse(args.output_base_path + task + '/train.txt', task, args.max_example) ori_train_size = len(train_corpus_temp.data) if task == 'multinli': dev_corpus.parse(args.output_base_path + task + '/dev_matched.txt', task, args.tokenize) test_corpus.parse( args.output_base_path + task + '/test_matched.txt', task, args.tokenize)