def train(config_path): logger.info('------------MODEL TRAIN--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") logger.info('reading dataset...') dataset = Dataset(global_config) logger.info('constructing model...') dataset_h5_path = global_config['data']['dataset_h5'] model = MatchLSTMPlus(dataset_h5_path) model = model.to(device) criterion = MyNLLLoss() optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) model_rerank = None rank_k = global_config['global']['rank_k'] if global_config['global']['enable_rerank']: model_rerank = ReRanker(dataset_h5_path) model_rerank = model_rerank.to(device) criterion = torch.nn.NLLLoss() optimizer_param = filter(lambda p: p.requires_grad, model_rerank.parameters()) # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) rerank_weight_path = global_config['data']['rerank_model_path'] if global_config['global']['enable_rerank'] and os.path.exists(rerank_weight_path): logger.info('loading existing rerank weight...') weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage.cuda()) model_rerank.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] num_workers = global_config['global']['num_data_workers'] batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers) clip_grad_max = global_config['train']['clip_grad_norm'] best_avg = 0. # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout if global_config['global']['enable_rerank']: model_rerank.train() sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, model_rerank=model_rerank, rank_k=rank_k) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout if global_config['global']['enable_rerank']: model_rerank.eval() valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device, model_rerank=model_rerank, rank_k=rank_k) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: if model_rerank is not None: save_model(model_rerank, epoch=epoch, model_weight_path=global_config['data']['rerank_model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logging.info("saving rerank model weight on epoch=%d" % epoch) else: save_model(model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('finished.')
def main(config_path): logger.info('------------Match-LSTM Train--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['model']['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model = MatchLSTMModel(global_config).to(device) criterion = MyNLLLoss() # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] # batch_train_data = dataset.get_dataloader_train(train_batch_size) # batch_dev_data = dataset.get_dataloader_dev(valid_batch_size) batch_train_data = list(dataset.get_batch_train(train_batch_size)) batch_dev_data = list(dataset.get_batch_dev(valid_batch_size)) clip_grad_max = global_config['train']['clip_grad_norm'] enable_char = global_config['model']['encoder']['enable_char'] best_valid_f1 = None # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best f1 score if best_valid_f1 is None or valid_score_f1 > best_valid_f1: save_model(model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_valid_f1 = valid_score_f1 logger.info('finished.')
def test(config_path, out_path): logger.info('------------MODEL PREDICT--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) #set default gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"]) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False, logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path, global_config['preprocess']['use_domain_tag']) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists( model_weight_path ), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) # forward logger.info('forwarding...') batch_size = global_config['test']['batch_size'] num_workers = global_config['global']['num_data_workers'] if 'test_path' not in global_config['data']['dataset']: batch_test_data = dataset.get_dataloader_dev(batch_size, num_workers) else: batch_test_data = dataset.get_dataloader_test(batch_size, num_workers) # to just evaluate score or write answer to file if out_path is None: criterion = MyNLLLoss() score_em, score_f1, sum_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_test_data, epoch=None, device=device) logger.info( "test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss)) else: #context_right_space = dataset.get_all_ct_right_space_dev() context_right_space = dataset.get_all_ct_right_space_test() predict_ans = predict_on_model( model=model, batch_data=batch_test_data, device=device, id_to_word_func=dataset.sentence_id2word, right_space=context_right_space) #samples_id = dataset.get_all_samples_id_dev() samples_id = dataset.get_all_samples_id_test() ans_with_id = dict(zip(samples_id, predict_ans)) logging.info('writing predict answer to file %s' % out_path) with open(out_path, 'w') as f: json.dump(ans_with_id, f) logging.info('finished.')
def train(config_path): logger.info('------------MODEL TRAIN--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) criterion = MyNLLLoss() # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] num_workers = global_config['global']['num_data_workers'] batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers) clip_grad_max = global_config['train']['clip_grad_norm'] best_avg = 0. # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model(model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('finished.')
def train(config_path): logger.info('------------MODEL TRAIN--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) #set default gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"]) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path, global_config['preprocess']['use_domain_tag']) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) criterion = MyNLLLoss() # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] num_workers = global_config['global']['num_data_workers'] batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers) clip_grad_max = global_config['train']['clip_grad_norm'] best_avg = 0. # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info( "epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model( model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('pretraining finished.') if global_config['global']['finetune']: batch_train_data = dataset.get_dataloader_train2( train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev2(valid_batch_size, num_workers) for epoch in range(global_config['train']['finetune_epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('finetune epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info( "finetune epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model( model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg if global_config['global']['finetune2']: batch_train_data = dataset.get_dataloader_train3( train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev3(valid_batch_size, num_workers) for epoch in range(global_config['train']['finetune_epoch2']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('finetune2 epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info( "finetune2 epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model( model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('finished.')
def main(config_path, out_path): logger.info('------------Match-LSTM Evaluate--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['model']['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") torch.no_grad() # make sure all tensors below have require_grad=False logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model = MatchLSTMModel(global_config).to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) # forward logger.info('forwarding...') enable_char = global_config['model']['encoder']['enable_char'] batch_size = global_config['test']['batch_size'] # batch_dev_data = dataset.get_dataloader_dev(batch_size) batch_dev_data = list(dataset.get_batch_dev(batch_size)) # to just evaluate score or write answer to file if out_path is None: criterion = MyNLLLoss() score_em, score_f1, sum_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=None, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) logger.info("test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss)) else: predict_ans = predict_on_model(model=model, batch_data=batch_dev_data, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char, id_to_word_func=dataset.sentence_id2word) samples_id = dataset.get_all_samples_id_dev() ans_with_id = dict(zip(samples_id, predict_ans)) logging.info('writing predict answer to file %s' % out_path) with open(out_path, 'w') as f: json.dump(ans_with_id, f) logging.info('finished.')
def test(config_path, out_path): logger.info('------------MODEL PREDICT--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled(False) # make sure all tensors below have require_grad=False, logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) # forward logger.info('forwarding...') batch_size = global_config['test']['batch_size'] num_workers = global_config['global']['num_data_workers'] batch_dev_data = dataset.get_dataloader_dev(batch_size, num_workers) # to just evaluate score or write answer to file if out_path is None: criterion = MyNLLLoss() score_em, score_f1, sum_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=None, device=device) logger.info("test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss)) else: context_right_space = dataset.get_all_ct_right_space_dev() predict_ans = predict_on_model(model=model, batch_data=batch_dev_data, device=device, id_to_word_func=dataset.sentence_id2word, right_space=context_right_space) samples_id = dataset.get_all_samples_id_dev() ans_with_id = dict(zip(samples_id, predict_ans)) logging.info('writing predict answer to file %s' % out_path) with open(out_path, 'w') as f: json.dump(ans_with_id, f) logging.info('finished.')
def train(config_path, experiment_info, thread_queue): logger.info('------------MedQA v1.0 Train--------------') logger.info( '============================loading config file... print config file =========================' ) global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info( '^^^^^^^^^^^^^^^^^^^^^^ config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^' ) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) global gpu_nums, init_embedding_weight, batch_test_data, tensorboard_writer, test_epoch, embedding_layer_name test_epoch = 0 enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] logger.info('Using dataset path is : %s' % dataset_h5_path) logger.info('### Using model is: %s ###' % model_choose) if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) elif model_choose == 'TestModel': model = TestModel(dataset_h5_path, device) elif model_choose == 'cnn_model': model = cnn_model(dataset_h5_path, device) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) else: raise ValueError('model "%s" in config file not recognized' % model_choose) print_network(model) gpu_nums = torch.cuda.device_count() logger.info('dataParallel using %d GPU.....' % gpu_nums) if gpu_nums > 1: model = torch.nn.DataParallel(model) model = model.to(device) # weights_init(model) embedding_layer_name = 'module.embedding.embedding_layer.weight' for name in model.state_dict().keys(): if 'embedding_layer.weight' in name: embedding_layer_name = name break init_embedding_weight = model.state_dict()[embedding_layer_name].clone() task_criterion = CrossEntropyLoss( weight=torch.tensor([0.2, 0.8]).to(device)).to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = Embedding_reg_L21_Loss(c=0.01).to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_eps = float(global_config['train']['eps']) optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param, lr=optimizer_lr, eps=optimizer_eps) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=True) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path) and global_config['train']['continue']: logger.info('loading existing weight............') if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight # todo 之后的版本可能不需要这句了 if not global_config['train']['keep_embedding']: del weight[ 'module.embedding.embedding_layer.weight'] #删除掉embedding层的参数 ,避免尺寸不对的问题 # # 删除全连接层的参数 # decision_layer_names=[] # for name,w in weight.items(): # if 'decision_layer' in name: # decision_layer_names.append(name) # for name in decision_layer_names: # del weight[name] model.load_state_dict(weight, strict=False) # training arguments logger.info('start training............................................') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] test_batch_size = global_config['train']['test_batch_size'] batch_train_data = dataset.get_dataloader_train(train_batch_size, shuffle=False) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, shuffle=False) batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) clip_grad_max = global_config['train']['clip_grad_norm'] enable_char = False # tensorboardX writer save_cur_experiment_code_path = "savedcodes/" + experiment_info save_current_codes(save_cur_experiment_code_path, global_config) tensorboard_writer = SummaryWriter( log_dir=os.path.join('tensorboard_logdir', experiment_info)) best_valid_acc = None # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout train_avg_loss, train_avg_binary_acc = train_on_model( model=model, criterion=all_criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, thread_queue=thread_queue) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout val_avg_loss, val_avg_binary_acc, val_avg_problem_acc = eval_on_model( model=model, criterion=all_criterion, batch_data=batch_dev_data, epoch=epoch, device=device, init_embedding_weight=init_embedding_weight, eval_dataset='dev') # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_test_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) # save model when best f1 score if best_valid_acc is None or val_avg_problem_acc > best_valid_acc: epoch_info = 'epoch=%d, val_binary_acc=%.4f, val_problem_acc=%.4f' % ( epoch, val_avg_binary_acc, val_avg_problem_acc) save_model( model, epoch_info=epoch_info, model_weight_path=global_config['data']['model_weight_dir'] + experiment_info + "_model_weight.pt", checkpoint_path=global_config['data']['checkpoint_path'] + experiment_info + "_save.log") logger.info("========= saving model weight on epoch=%d =======" % epoch) best_valid_acc = val_avg_problem_acc tensorboard_writer.add_scalar("train/lr", optimizer.param_groups[0]['lr'], epoch) tensorboard_writer.add_scalar("train/avg_loss", train_avg_loss, epoch) tensorboard_writer.add_scalar("train/binary_acc", train_avg_binary_acc, epoch) tensorboard_writer.add_scalar("val/avg_loss", val_avg_loss, epoch) tensorboard_writer.add_scalar("val/binary_acc", val_avg_binary_acc, epoch) tensorboard_writer.add_scalar("val/problem_acc", val_avg_problem_acc, epoch) # adjust learning rate scheduler.step(train_avg_loss) logger.info('finished.................................') tensorboard_writer.close()
def train_on_model(model, criterion, optimizer, batch_data, epoch, clip_grad_max, device, thread_queue): """ train on every batch :param enable_char: :param batch_char_func: :param model: :param criterion: :param batch_data: :param optimizer: :param epoch: :param clip_grad_max: :param device: :return: """ global test_epoch epoch_loss = AverageMeter() epoch_binary_acc = AverageMeter() batch_cnt = len(batch_data) for i, batch in enumerate(batch_data, 0): optimizer.zero_grad() # batch data # bat_context, bat_question, bat_context_char, bat_question_char, bat_answer_range = batch_char_func(batch, enable_char=enable_char, device=device) contents, question_ans, sample_labels, sample_ids, sample_categorys, sample_logics = batch contents = contents.to(device) question_ans = question_ans.to(device) sample_labels = sample_labels.to(device) sample_logics = sample_logics.to(device) # contents:batch_size*10*200, question_ans:batch_size*100 ,sample_labels=batchsize # forward pred_labels = model.forward( contents, question_ans, sample_logics) # pred_labels size=(batch,2) # pred_labels=model_output[0:model_output.size()[0]-1] # mean_gate_val=model_output[-1][0][0] # get task loss task_loss = criterion[0].forward(pred_labels, sample_labels) #gate_loss # gate_loss=criterion[1].forward(mean_gate_val) gate_loss = 0 # # embedding regularized loss embedding_loss = criterion[2].forward( model.embedding.embedding_layer.weight, init_embedding_weight) # embedding_loss=0 loss = task_loss + gate_loss + embedding_loss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_max) # fix gradient explosion optimizer.step() # update parameters # logging batch_loss = loss.item() epoch_loss.update(batch_loss, len(sample_ids)) binary_acc = compute_binary_accuracy(pred_labels.data, sample_labels.data) # problem_acc = compute_problems_accuracy(pred_labels.data, sample_labels.data, sample_ids) epoch_binary_acc.update(binary_acc.item(), len(sample_ids)) # epoch_problem_acc.update(problem_acc.item(), int(len(sample_ids) / 5)) logger.info( 'epoch=%d, batch=%d/%d, embedding_loss=%.5f loss=%.5f binary_acc=%.4f ' % (epoch, i, batch_cnt, embedding_loss, batch_loss, binary_acc)) #线程间通信,用于存放时间 if thread_queue.qsize() != 0: thread_queue.queue.clear() thread_queue.put(time.time()) # manual release memory, todo: really effect? del contents, question_ans, sample_labels, sample_ids del pred_labels, loss if i % 200 == 0: model.eval() with torch.no_grad(): test_avg_loss, test_binary_acc, test_avg_problem_acc = eval_on_model( model=model, criterion=criterion, batch_data=batch_test_data, epoch=test_epoch, device=device, init_embedding_weight=init_embedding_weight, eval_dataset='test') tensorboard_writer.add_scalar("test/avg_loss", test_avg_loss, test_epoch) tensorboard_writer.add_scalar("test/binary_acc", test_binary_acc, test_epoch) tensorboard_writer.add_scalar("test/problem_acc", test_avg_problem_acc, test_epoch) test_epoch += 1 model.train() logger.info( '===== epoch=%d, batch_count=%d, epoch_average_loss=%.5f, avg_binary_acc=%.4f ====' % (epoch, batch_cnt, epoch_loss.avg, epoch_binary_acc.avg)) return epoch_loss.avg, epoch_binary_acc.avg