def train_no_content(config_path, experiment_info, thread_queue): logger.info('------------MedQA v1.0 Train--------------') logger.info( '============================loading config file... print config file =========================' ) global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info( '^^^^^^^^^^^^^^^^^^^^^^ config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^' ) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) global gpu_nums, init_embedding_weight, batch_test_data, batch_dev_data, tensorboard_writer, test_epoch, embedding_layer_name, val_epoch, global_config, best_valid_acc test_epoch = 0 val_epoch = 0 enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] logger.info('Using dataset path is : %s' % dataset_h5_path) logger.info('### Using model is: %s ###' % model_choose) if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) elif model_choose == 'TestModel': model = TestModel(dataset_h5_path, device) elif model_choose == 'cnn_model': model = cnn_model(dataset_h5_path, device) elif model_choose == 'SeaReader_5c': model = SeaReader_5c(dataset_h5_path, device) elif model_choose == 'SeaReader_v2': model = SeaReader_v2(dataset_h5_path, device) elif model_choose == 'SeaReader_v3': model = SeaReader_v3(dataset_h5_path, device) elif model_choose == 'No_content_model': model = No_content_model(dataset_h5_path) else: raise ValueError('model "%s" in config file not recognized' % model_choose) print_network(model) gpu_nums = torch.cuda.device_count() logger.info('dataParallel using %d GPU.....' % gpu_nums) if gpu_nums > 1: model = torch.nn.DataParallel(model) model = model.to(device) # weights_init(model) # embedding_layer_name = 'module.embedding.embedding_layer.weight' # for name in model.state_dict().keys(): # if 'embedding_layer.weight' in name: # embedding_layer_name=name # break # init_embedding_weight = model.state_dict()[embedding_layer_name].clone() task_criterion = SVM_loss().to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = delta_embedding_Loss(c=1).to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_eps = float(global_config['train']['eps']) optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param, lr=optimizer_lr, eps=optimizer_eps) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path) and global_config['train']['continue']: logger.info('loading existing weight............') if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight # todo 之后的版本可能不需要这句了 if not global_config['train']['keep_embedding']: del weight[ 'module.embedding.embedding_layer.weight'] # 删除掉embedding层的参数 ,避免尺寸不对的问题 model.load_state_dict(weight, strict=False) # training arguments logger.info('start training............................................') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] test_batch_size = global_config['train']['test_batch_size'] batch_train_data = dataset.get_dataloader_train(train_batch_size, shuffle=False) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, shuffle=False) batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) clip_grad_max = global_config['train']['clip_grad_norm'] # tensorboardX writer tensorboard_writer = SummaryWriter( log_dir=os.path.join('tensorboard_logdir', experiment_info)) # save_cur_experiment_code_path = "savedcodes/" + experiment_info # save_current_codes(save_cur_experiment_code_path, global_config) best_valid_acc = None # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout train_avg_loss, train_avg_problem_acc = train_on_model( model=model, criterion=all_criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, thread_queue=thread_queue, experiment_info=experiment_info) # # evaluate # with torch.no_grad(): # model.eval() # let training = False, make sure right dropout # val_avg_loss, val_avg_problem_acc = eval_on_model_5c(model=model, # criterion=all_criterion, # batch_data=batch_dev_data, # epoch=epoch, # device=device, # init_embedding_weight=init_embedding_weight, # eval_dataset='dev') # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_test_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) tensorboard_writer.add_scalar("train/lr", optimizer.param_groups[0]['lr'], epoch) tensorboard_writer.add_scalar("train/avg_loss", train_avg_loss, epoch) tensorboard_writer.add_scalar("train/problem_acc", train_avg_problem_acc, epoch) # tensorboard_writer.add_scalar("test/avg_loss", test_avg_loss, epoch) # tensorboard_writer.add_scalar("test/binary_acc", test_avg_binary_acc, epoch) # tensorboard_writer.add_scalar("test/problem_acc", test_avg_problem_acc, epoch) # adjust learning rate scheduler.step(train_avg_loss) logger.info('finished.................................') tensorboard_writer.close()
def debug(config_path, experiment_info): logger.info('------------MedQA v1.0 Train--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is available, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not available, please unable CUDA in config file") ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] logger.info('Using dataset path is : %s' % dataset_h5_path) logger.info('### Using model is: %s ###' % model_choose) if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) elif model_choose == 'TestModel': model = TestModel(dataset_h5_path, device) elif model_choose == 'cnn_model': model = cnn_model(dataset_h5_path, device) elif model_choose == 'SeaReader_v2': model = SeaReader_v2(dataset_h5_path, device) elif model_choose == 'SeaReader_v3': model = SeaReader_v3(dataset_h5_path, device) elif model_choose == 'SeaReader_v4': model = SeaReader_v4(dataset_h5_path, device) elif model_choose == 'SeaReader_v4_5': model = SeaReader_v4_5(dataset_h5_path, device) elif model_choose == 'SeaReader_v5': model = SeaReader_v5(dataset_h5_path, device) elif model_choose == 'SeaReader_v6': model = SeaReader_v6(dataset_h5_path, device) elif model_choose == 'No_content_model': model = No_content_model(dataset_h5_path) elif model_choose == 'SeaReader_attention': model = SeaReader_attention(dataset_h5_path, device) else: raise ValueError('model "%s" in config file not recognized' % model_choose) print_network(model) logger.info('dataParallel using %d GPU.....' % torch.cuda.device_count()) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model = model.to(device) task_criterion = CrossEntropyLoss( weight=torch.tensor([0.2, 0.8]).to(device)).to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = Embedding_reg_L21_Loss().to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param, lr=optimizer_lr) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=True) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path) and global_config['train']['continue']: logger.info('loading existing weight............') if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) embed() # training arguments logger.info('start training............................................') train_batch_size = 10 valid_batch_size = 10 test_batch_size = 10 batch_train_data = dataset.get_dataloader_train(train_batch_size, shuffle=True) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, shuffle=False) global batch_test_data batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) clip_grad_max = global_config['train']['clip_grad_norm'] enable_char = False best_valid_acc = None # every epoch for epoch in range(1): # train model.train() # set training = True, make sure right dropout train_avg_loss, train_avg_binary_acc = train_on_model( model=model, criterion=all_criterion, optimizer=optimizer, batch_data=batch_dev_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) # evaluate # with torch.no_grad(): # model.eval() # let training = False, make sure right dropout # val_avg_loss, val_avg_binary_acc, val_avg_problem_acc = eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_dev_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_test_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) # # save model when best f1 score # if best_valid_acc is None or val_avg_problem_acc > best_valid_acc: # epoch_info = 'epoch=%d, val_binary_acc=%.4f, val_problem_acc=%.4f' % ( # epoch, val_avg_binary_acc, val_avg_problem_acc) # save_model(model, # epoch_info=epoch_info, # model_weight_path=global_config['data']['model_weight_dir']+experiment_info+"_model_weight.pt", # checkpoint_path=global_config['data']['checkpoint_path']+experiment_info+"_save.log") # logger.info("========= saving model weight on epoch=%d =======" % epoch) # best_valid_acc = val_avg_problem_acc # tensorboard_writer.add_scalar("train/problem_acc", train_avg_problem_acc, epoch) # tensorboard_writer.add_scalar("val/avg_loss", val_avg_loss, epoch) # tensorboard_writer.add_scalar("val/binary_acc", val_avg_binary_acc, epoch) # tensorboard_writer.add_scalar("val/problem_acc", val_avg_problem_acc, epoch) # tensorboard_writer.add_scalar("test/avg_loss", test_avg_loss, epoch) # tensorboard_writer.add_scalar("test/binary_acc", test_avg_binary_acc, epoch) # tensorboard_writer.add_scalar("test/problem_acc", test_avg_problem_acc, epoch) # adjust learning rate scheduler.step(train_avg_loss) logger.info('finished.................................')
def test(config_path, experiment_info): logger.info('------------MedQA v1.0 Evaluate--------------') logger.info('============================loading config file... print config file =========================') global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info('^^^^^^^^^^^^^^^^^^^^^^ config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^') # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled(False) # make sure all tensors below have require_grad=False, ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['test']['model'] logger.info("model choose is: " + model_choose) dataset_h5_path = global_config['test']['dataset_h5'] if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) print_network(model) logger.info('dataParallel using %d GPU.....' % torch.cuda.device_count()) model = torch.nn.DataParallel(model) model = model.to(device) model.eval() # let training = False, make sure right dropout global init_embedding_weight init_embedding_weight = model.state_dict()['module.embedding.embedding_layer.weight'] # criterion task_criterion = CrossEntropyLoss(weight=torch.tensor([0.2, 0.8]).to(device)).to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = Embedding_reg_L21_Loss().to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # testing arguments logger.info('get test data loader ...') test_batch_size = global_config['test']['test_batch_size'] batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) if not global_config['test']['keep_embedding']: del weight['module.embedding.embedding_layer.weight'] # 删除掉embedding层的参数 ,避免尺寸不对的问题 model.load_state_dict(weight, strict=False) # forward logger.info('evaluate forwarding...') out_path=global_config['test']['output_file_path']+experiment_info+"_result.csv" # to just evaluate score or write answer to file if out_path is not None: predict_on_model(model=model,batch_data=batch_test_data,device=device,out_path=out_path) logging.info('finished.')