def train(args, data): device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.WORD.vocab.vectors).to(device) ema = EMA(args.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adadelta(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() model.train() loss, last_epoch = 0, -1 max_dev_exact, max_dev_f1 = -1, -1 iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch p1, p2 = model(batch) optimizer.zero_grad() batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) loss += batch_loss.item() batch_loss.backward() optimizer.step() for name, param in model.named_parameters(): if param.requires_grad: ema.update(name, param.data) if (i + 1) % args.print_freq == 0: dev_loss, dev_exact, dev_f1 = test(model, ema, args, data) c = (i + 1) // args.print_freq print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}' f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}') if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_exact = dev_exact best_model = copy.deepcopy(model) loss = 0 model.train() print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}') return best_model
def train_pipeline(args): field = data.SpanFieldCollection() train_loader = data.SpanDataLoader(args.pre_train_path, field) dev_loader = data.SpanDataLoader(args.pre_dev_path, field) field.build_vocab(None, train_loader.dataset, dev_loader.dataset) model = BiDAF(**argument_wrapper.get_model_args(field)) if args.pretrained_wv_path is not None: pretrained = load_pretrained_wv(args.pretrained_wv_path) model.set_word_embedding(pretrained) train_iter = train_loader.get_batchiter(args.batch_size, args.gpu_id) dev_iter = dev_loader.get_batchiter(args.batch_size, args.gpu_id) test_func = rc.create_test_func('foo') best_model = rc.train_by_span(model,train_iter,dev_iter,RawCorpus(args.dev_path).get_answers(),test_func,\ args.epoch_num,args.learning_rate,args.exp_decay_rate,args.save_freq) best_model.dump('model.pt')
def load_mf_from_run_folder(folder_path): model_path = f'{folder_path}/model.pt' field_path = f'{folder_path}/field.pkl' model = BiDAF.load_model(model_path) field = SpanFieldCollection() field.load_fileds(field_path) return model, field
def main(): parser = argparse.ArgumentParser() parser.add_argument('--char-emb-dim', default=8, type=int) parser.add_argument('--char-channel-width', default=5, type=int) parser.add_argument('--char-channel-num', default=100, type=int) parser.add_argument('--dev-batch-size', default=60, type=int) parser.add_argument('--disable-c2q', type=ast.literal_eval) parser.add_argument('--disable-q2c', type=ast.literal_eval) parser.add_argument('--dropout', default=0.2, type=float) parser.add_argument('--epoch', default=12, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=100, type=int) parser.add_argument('--learning-rate', default=0.5, type=float) parser.add_argument('--moving-average-decay', default=0.999, type=float) parser.add_argument('--squad-version', default='1.1') parser.add_argument('--train-batch-size', default=60, type=int) parser.add_argument('--word-vec-dim', default=100, type=int) parser.add_argument('--validation-freq', default=500, type=int) args = parser.parse_args() device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") print("Device is set to", device) print(f"Load SQuAD {args.squad_version}") data = SQuAD(device=device, squad_version=args.squad_version, word_vec_dim=args.word_vec_dim, train_batch_size=args.train_batch_size, dev_batch_size=args.dev_batch_size) setattr(args, 'char_vocab_size', len(data.CHAR_NESTING.vocab)) setattr(args, 'word_vocab_size', len(data.WORD.vocab)) print(f"Load BiDAF Model") model = BiDAF(pretrain_embedding=data.WORD.vocab.vectors, char_vocab_size=len(data.CHAR_NESTING.vocab), consider_na=True if args.squad_version == "2.0" else False, enable_c2q= not args.disable_c2q if args.disable_c2q is not None else True, enable_q2c= not args.disable_q2c if args.disable_q2c is not None else True, hidden_size=args.hidden_size, char_emb_dim=args.char_emb_dim, char_channel_num=args.char_channel_num, char_channel_width=args.char_channel_width, dropout=args.dropout) print(f"Training start") trained_model = train(device=device, data=data, model=model, epoch=args.epoch, lr=args.learning_rate, moving_average_decay=args.moving_average_decay, validation_freq=args.validation_freq) model_time = int(time.time()) if not os.path.exists('models'): os.makedirs('models') torch.save(trained_model.state_dict(), f'trained_models/BiDAF_{model_time}.pt')
setattr(options, 'dataset_file', '.data/squad/{}'.format(options.dev_file)) setattr(options, 'prediction_file', os.path.join(root_dir, 'prediction.json')) setattr(options, 'model_time', strftime('%H:%M:%S', gmtime())) logger.info('data loading complete!') options.old_model = best_model_file_name options.old_ema = best_ema answer_append_sentences = joblib.load( 'sampled_perturb_answer_sentences.pkl') question_append_sentences = joblib.load( 'sampled_perturb_question_sentences.pkl') model = BiDAF(options, data.WORD.vocab.vectors).to(device) if options.old_model is not None: model.load_state_dict( torch.load(options.old_model, map_location="cuda:{}".format(options.gpu))) if options.old_ema is not None: # ema = pickle.load(open(options.old_ema, "rb")) ema = torch.load(options.old_ema, map_location=device) else: ema = EMA(options.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) torch.manual_seed(args.seed) if torch.cuda.is_available():
def train(args, data): device = torch.device( "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.WORD.vocab.vectors).to(device) ema = EMA(args.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adadelta(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_exact, max_dev_f1 = -1, -1 iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch p1, p2 = model(batch) optimizer.zero_grad() batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) loss += batch_loss.item() batch_loss.backward() optimizer.step() for name, param in model.named_parameters(): if param.requires_grad: ema.update(name, param.data) if (i + 1) % args.print_freq == 0: dev_loss, dev_exact, dev_f1 = test(model, ema, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('exact_match/dev', dev_exact, c) writer.add_scalar('f1/dev', dev_f1, c) print('train loss: {} / dev loss: {}'.format(loss, dev_loss) + ' / dev EM: {} / dev F1: {}'.format(dev_exact, dev_f1)) if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_exact = dev_exact best_model = copy.deepcopy(model) loss = 0 model.train() writer.close() print('max dev EM: {} / max dev F1: {}'.format(max_dev_exact, max_dev_f1)) return best_model
def train(args, data): if args.load_model != "": model = BiDAF(args, data.WORD.vocab.vectors) model.load_state_dict(torch.load(args.load_model)) else: model = BiDAF(args, data.WORD.vocab.vectors) device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = model.to(device) ema = EMA(args.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) for name, i in model.named_parameters(): if not i.is_leaf: print(name,i) writer = SummaryWriter(log_dir='runs/' + args.model_name) best_model = None for iterator, dev_iter, dev_file_name, index, print_freq, lr in zip(data.train_iter, data.dev_iter, args.dev_files, range(len(data.train)), args.print_freq, args.learning_rate): # print # (iterator[0]) embed() exit(0) optimizer = optim.Adadelta(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() model.train() loss, last_epoch = 0, 0 max_dev_exact, max_dev_f1 = -1, -1 print(f"Training with {dev_file_name}") print() for i, batch in tqdm(enumerate(iterator), total=len(iterator) * args.epoch[index], ncols=100): present_epoch = int(iterator.epoch) eva = False if present_epoch == args.epoch[index]: break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) eva = True last_epoch = present_epoch p1, p2 = model(batch) optimizer.zero_grad() batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) loss += batch_loss.item() batch_loss.backward() optimizer.step() for name, param in model.named_parameters(): if param.requires_grad: ema.update(name, param.data) torch.cuda.empty_cache() if (i + 1) % print_freq == 0 or eva: dev_loss, dev_exact, dev_f1 = test(model, ema, args, data, dev_iter, dev_file_name) c = (i + 1) // print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('exact_match/dev', dev_exact, c) writer.add_scalar('f1/dev', dev_f1, c) print() print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}' f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}') if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_exact = dev_exact best_model = copy.deepcopy(model) loss = 0 model.train() writer.close() print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}') print("testing with test batch on best model") test_loss, test_exact, test_f1 = test(best_model, ema, args, data, list(data.test_iter)[-1], args.test_files[-1]) print(f'test loss: {test_loss:.3f}' f' / test EM: {test_exact:.3f} / test F1: {test_f1:.3f}') return best_model
def train(args, data): device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.CONTEXT_WORD.vocab.vectors).to(device) num = count_parameters(model) print(f'paramter {num}') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) ema = EMA(args.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adadelta(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_exact, max_dev_f1 = -1, -1 print('totally {} epoch'.format(args.epoch)) sys.stdout.flush() iterator = data.train_iter iterator.repeat = True for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: print('present_epoch value:',present_epoch) break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch p1, p2 = model(batch.c_char,batch.q_char,batch.c_word[0],batch.q_word[0],batch.c_word[1],batch.q_word[1]) optimizer.zero_grad() batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) loss += batch_loss.item() batch_loss.backward() optimizer.step() for name, param in model.named_parameters(): if param.requires_grad: ema.update(name, param.data) if (i + 1) % args.print_freq == 0: dev_loss, dev_exact, dev_f1, dev_hasans_exact, dev_hasans_f1, dev_noans_exact,dev_noans_f1 = test(model, ema, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('exact_match/dev', dev_exact, c) writer.add_scalar('f1/dev', dev_f1, c) print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}' f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}' f' / dev hasans EM: {dev_hasans_exact} / dev hasans F1: {dev_hasans_f1}' f' / dev noans EM: {dev_noans_exact} / dev noans F1: {dev_noans_f1}') if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_exact = dev_exact best_model = copy.deepcopy(model) loss = 0 model.train() sys.stdout.flush() writer.close() args.max_f1 = max_dev_f1 print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}') return best_model
def predict(): parser = argparse.ArgumentParser() parser.add_argument('--char-dim', default=8, type=int) parser.add_argument('--char-channel-width', default=5, type=int) parser.add_argument('--char-channel-size', default=100, type=int) parser.add_argument('--context-threshold', default=400, type=int) parser.add_argument('--dev-batch-size', default=100, type=int) parser.add_argument('--test-batch-size', default=100, type=int) parser.add_argument('--dev-file', default='dev-v1.1.json') parser.add_argument('--test-file', default='test1.json') parser.add_argument('--dropout', default=0.2, type=float) parser.add_argument('--epoch', default=12, type=int) parser.add_argument('--exp-decay-rate', default=0.999, type=float) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=200, type=int) parser.add_argument('--learning-rate', default=0.5, type=float) parser.add_argument('--print-freq', default=250, type=int) parser.add_argument('--train-batch-size', default=60, type=int) parser.add_argument('--train-file', default='train-v1.1.json') parser.add_argument('--word-dim', default=300, type=int) args = parser.parse_args() print('loading SQuAD data...') current_dir = os.getcwd() current_dir = os.path.join(current_dir, 'BiDAF') path = os.path.join(current_dir, 'testing_files') data = SQuAD(args, path) setattr(args, 'char_vocab_size', len(data.CHAR.vocab)) setattr(args, 'word_vocab_size', len(data.WORD.vocab)) setattr(args, 'dataset_file', f'testing_files/{args.dev_file}') setattr(args, 'prediction_file', f'output/prediction{time.time()}.out') setattr(args, 'model_time', strftime('%H:%M:%S', gmtime())) print('data loading complete!') device = torch.device( f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.WORD.vocab.vectors).to(device) # load trained the parameters to the model model_path = os.path.join(current_dir, 'saved_models') model.load_state_dict( torch.load(os.path.join(model_path, 'BiDAF_tl_new.pt'))) model.eval() # show the model result answers = dict() # answers to be saved in dictionary format with torch.set_grad_enabled(False): for batch in iter(data.test_iter): # print(batch) p1, p2 = model(batch) # batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) # loss += batch_loss.item() batch_size, c_len = p1.size() ls = nn.LogSoftmax(dim=1) mask = (torch.ones(c_len, c_len) * float('-inf')).to(device).tril(-1).unsqueeze(0).expand( batch_size, -1, -1) score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask score, s_idx = score.max(dim=1) score, e_idx = score.max(dim=1) s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze() for i in range(batch_size): id = batch.id[i] answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1] answer = ' '.join( [data.WORD.vocab.itos[idx] for idx in answer]) answers[id] = answer print(answers) return answers
def train(args, data): device = torch.device( f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.WORD.vocab.vectors).to(device) ema = EMA(args.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adadelta(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_exact, max_dev_f1 = -1, -1 iterator = data.train_iter num_batch = len(iterator) for present_epoch in range(args.epoch): print('epoch', present_epoch + 1) for i, batch in enumerate(iterator): # present_epoch = int(iterator.epoch) """ if present_epoch == args.epoch: print(present_epoch) print() print(args.epoch) break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch """ p1, p2 = model(batch) optimizer.zero_grad() """ print(p1) print() print(batch.s_idx) """ if len(p1.size()) == 1: p1 = p1.reshape(1, -1) if len(p2.size()) == 1: p2 = p2.reshape(1, -1) batch_loss = criterion(p1, batch.s_idx) + criterion( p2, batch.e_idx) loss += batch_loss.item() batch_loss.backward() optimizer.step() for name, param in model.named_parameters(): if param.requires_grad: ema.update(name, param.data) best_model = copy.deepcopy(model) if i + 1 == num_batch: dev_loss, dev_exact, dev_f1 = test(model, ema, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss / num_batch, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('exact_match/dev', dev_exact, c) writer.add_scalar('f1/dev', dev_f1, c) print( f'train loss: {loss/num_batch:.3f} / dev loss: {dev_loss:.3f}' f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}') if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_exact = dev_exact best_model = copy.deepcopy(model) loss = 0 model.train() writer.close() print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}') return best_model
def train(args, data): device = torch.device( f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args).to(device) D_batch = args.train_batch_size ema = EMA(args.exp_decay_rate) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adadelta(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() # writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_exact, max_dev_f1 = -1, -1 i = 0 # iterator = data.train_iter while i + D_batch < len(data.data): b_id = i e_id = i + D_batch # present_epoch = int(iterator.epoch) # if present_epoch == args.epoch: # break # if present_epoch > last_epoch: # print('epoch:', present_epoch + 1) # last_epoch = present_epoch p1, p2 = model(data, b_id, e_id) optimizer.zero_grad() s_idx, e_idx = data.get_targ(b_id, e_id) batch_loss = criterion(p1, s_idx) + criterion(p2, e_idx) loss += batch_loss.item() batch_loss.backward() optimizer.step() for name, param in model.named_parameters(): if param.requires_grad: ema.update(name, param.data) # if (i + 1) % args.print_freq == 0: # dev_loss, dev_exact, dev_f1 = test(model, ema, args, data) # c = (i + 1) // args.print_freq # # writer.add_scalar('loss/train', loss, c) # # writer.add_scalar('loss/dev', dev_loss, c) # # writer.add_scalar('exact_match/dev', dev_exact, c) # # writer.add_scalar('f1/dev', dev_f1, c) # # print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}' # # f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}') # if dev_f1 > max_dev_f1: # max_dev_f1 = dev_f1 # max_dev_exact = dev_exact # best_model = copy.deepcopy(model) # loss = 0 # model.train() i += D_batch # writer.close() print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}') return best_model
def train(args, data): device = torch.device("cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu") model = BiDAF(args).to(device) # 如果载入的这些参数中,有些参数不要求被更新,即固定不变,不参与训练,需要手动设置这些参数的梯度属性为Fasle, # 并且在optimizer传参时筛选掉这些参数: # ema = EMA(args.exp_decay_rate) # for name, param in model.named_parameters(): # if param.requires_grad: # ema.register(name, param.data) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(logdir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_exact, max_dev_f1 = -1, -1 iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch try: p1, p2 = model(batch) except OSError: pass optimizer.zero_grad() batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) loss = batch_loss.item() batch_loss.backward() optimizer.step() print("loss", loss) # for name, param in model.named_parameters(): # if param.requires_grad: # ema.update(name, param.data) # if (i + 1) % args.print_freq == 0: # dev_loss, dev_exact, dev_f1 = test(model, args, data) # c = (i + 1) // args.print_freq # writer.add_scalar('loss/train', loss, c) # writer.add_scalar('loss/dev', dev_loss, c) # writer.add_scalar('exact_match/dev', dev_exact, c) # writer.add_scalar('f1/dev', dev_f1, c) # print('train loss: {:.3f} / dev loss: {:.3f} dev EM: {:.3f} dev F1: {:.3f}'.format(loss, dev_loss, dev_exact, dev_f1)) # if dev_f1 > max_dev_f1: # max_dev_f1 = dev_f1 # max_dev_exact = dev_exact # best_model = copy.deepcopy(model) # loss = 0 # model.train() # writer.close() # print('max dev EM: {:.3f} / max dev F1: {:.3f}'.format(max_dev_exact, max_dev_f1)) return best_model
parser.add_argument('--context-threshold', default=400, type=int) parser.add_argument('--dev-batch-size', default=100, type=int) parser.add_argument('--dev-file', default='dev-v1.1.json') parser.add_argument('--dropout', default=0.2, type=float) parser.add_argument('--epoch', default=12, type=int) parser.add_argument('--exp-decay-rate', default=0.999, type=float) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=100, type=int) parser.add_argument('--learning-rate', default=0.5, type=float) parser.add_argument('--print-freq', default=250, type=int) parser.add_argument('--train-batch-size', default=60, type=int) parser.add_argument('--train-file', default='train-v1.1.json') parser.add_argument('--word-dim', default=100, type=int) args = parser.parse_args() print('loading SQuAD data...') data = SQuAD(args) setattr(args, 'char_vocab_size', len(data.CHAR.vocab)) setattr(args, 'word_vocab_size', len(data.WORD.vocab)) setattr(args, 'dataset_file', f'.data/squad/{args.dev_file}') setattr(args, 'prediction_file', f'prediction{args.gpu}.out') setattr(args, 'model_time', strftime('%H:%M:%S', gmtime())) print('data loading complete!') device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.WORD.vocab.vectors).to(device) # load trained the parameters to the model # model.load_state_dict(torch.load(r'saved_models/BiDAF_08:03:17.pt')) model.eval() # show the model result print(model)
parser.add_argument('--word-dim', default=100, type=int) args = parser.parse_args() path = r'testing_files' print('loading SQuAD data...') data = SQuAD(args, path) setattr(args, 'char_vocab_size', len(data.CHAR.vocab)) setattr(args, 'word_vocab_size', len(data.WORD.vocab)) setattr(args, 'dataset_file', f'testing_files/{args.dev_file}') setattr(args, 'prediction_file', f'output/prediction{time.time()}.out') setattr(args, 'model_time', strftime('%H:%M:%S', gmtime())) print('data loading complete!') device = torch.device( f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") model = BiDAF(args, data.WORD.vocab.vectors).to(device) # load trained the parameters to the model model.load_state_dict(torch.load(r'saved_models/BiDAF_08:03:17.pt')) model.eval() # show the model result answers = dict() # answers to be saved in dictionary format with torch.set_grad_enabled(False): for batch in iter(data.dev_iter): # batch.to(device) p1, p2 = model(batch) # batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx) # loss += batch_loss.item() # (batch, c_len, c_len) batch_size, c_len = p1.size()