def __init__(self, cfg): cfg = deepcopy(cfg) cfg["model"]["arch"] = "joint_segmentation_depth" cfg["model"]["segmentation_name"] = None cfg["model"]["disable_monodepth"] = False cfg["model"]["disable_pose"] = True cfg['data']['augmentations'] = {} cfg['data'].pop('crop_h', None) cfg['data'].pop('crop_w', None) assert not (cfg["data"].get("depth_teacher") is not None and cfg['model'].get("depth_estimator_weights") is not None) if cfg["data"].get("depth_teacher") is not None: cfg['model']['backbone_name'] = "resnet101" cfg, load_backbone = decoder_variant(cfg, 6, (512, 512)) cfg['model']['depth_pretraining'] = cfg["data"]["depth_teacher"] cfg['model']['backbone_pretraining'] = cfg["data"]["depth_teacher"] if cfg['model'].get("depth_estimator_weights") is not None: cfg['model']['backbone_pretraining'] = cfg['model'][ 'depth_estimator_weights'] cfg['model']['depth_pretraining'] = cfg['model'][ 'depth_estimator_weights'] self.cfg = cfg assert cfg['model']['depth_pretraining'] == cfg['model'][ 'backbone_pretraining'] self.depth_dir = cfg["data"]["generated_depth_dir"] self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.monodepth_loss_calculator = get_monodepth_loss(self.cfg, is_train=False) unrestricted_cfg = deepcopy(self.cfg["data"]) unrestricted_cfg.update({ "restrict_to_subset": None, "generated_depth_dir": None }) self.train_loader = build_loader(unrestricted_cfg, "train", load_labels=False, load_sequence=False) self.val_loader = build_loader(unrestricted_cfg, "val", load_labels=False, load_sequence=False) self.loader = data.ConcatDataset([self.train_loader, self.val_loader]) self.n_classes = self.train_loader.n_classes batch_size = 4 self.data_loader = data.DataLoader( self.loader, batch_size=batch_size, num_workers=self.cfg["training"]["n_workers"], pin_memory=True, ) self.model = get_model(cfg["model"], self.n_classes).to(self.device)
def test(gen_result, config): time0 = time.time() # prepare if config.is_true_test: preprocess_data.gen_pre_file_for_test() # load w2v embedding_np_train = loader.load_w2v(config.train_embedding + '.npy') if config.is_true_test: embedding_np_test = loader.load_w2v(config.test_embedding + '.npy') # prepare: test_df if config.is_true_test and (os.path.isfile(config.test_df) is False): preprocess_data.gen_test_datafile(config.test_data, config.test_df) if (config.is_true_test is False) and (os.path.isfile(config.test_val_df) is False): print('run gen_train_val_datafile() again') assert 1 == -1 # load data if config.is_true_test is False: if os.path.isfile(config.test_val_pkl): with open(config.test_val_pkl, 'rb') as file: test_data = pickle.load(file) else: test_data = loader.load_data(config.test_val_df, config.train_vocab_path, config.tag_path) with open(config.test_val_pkl, 'wb') as file: pickle.dump(test_data, file) else: if os.path.isfile(config.test_pkl): with open(config.test_pkl, 'rb') as file: test_data = pickle.load(file) else: test_data = loader.load_data(config.test_df, config.test_vocab_path, config.tag_path) with open(config.test_pkl, 'wb') as file: pickle.dump(test_data, file) # build test dataloader test_loader = loader.build_loader(dataset=test_data[:6], batch_size=config.test_batch_size, shuffle=False, drop_last=False) # model initial param = { 'embedding': embedding_np_train, 'mode': config.mode, 'hidden_size': config.hidden_size, 'dropout_p': config.dropout_p, 'encoder_dropout_p': config.encoder_dropout_p, 'encoder_bidirectional': config.encoder_bidirectional, 'encoder_layer_num': config.encoder_layer_num, 'is_bn': config.is_bn } model = eval(config.model_name).Model(param) # load model param, and training state model_path = os.path.join('model', config.model_test) print('load model, ', model_path) state = torch.load(model_path) model.load_state_dict(state['best_model_state']) # 改变embedding_fix if config.is_true_test: model.embedding.sd_embedding.embedding_fix = nn.Embedding( num_embeddings=embedding_np_test.shape[0], embedding_dim=embedding_np_test.shape[1], padding_idx=0, _weight=torch.Tensor(embedding_np_test)) model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False model.embedding.sd_embedding.vocab_size = embedding_np_test.shape[0] model = model.cuda() best_loss = state['best_loss'] best_epoch = state['best_epoch'] best_step = state['best_step'] best_time = state['best_time'] use_time = state['time'] print( 'best_epoch:%2d, best_step:%5d, best_loss:%.4f, best_time:%d, use_time:%d' % (best_epoch, best_step, best_loss, best_time, use_time)) # gen result result_start = [] result_end = [] result_start_p = [] result_end_p = [] model.eval() with torch.no_grad(): cc = 0 cc_total = len(test_loader) print('total iter_num:%d' % cc_total) for batch in test_loader: # cuda, cut batch = utils.deal_batch(batch) outputs = model(batch) start, end = utils.answer_search(outputs) start = start.reshape(-1).cpu().numpy().tolist() end = end.reshape(-1).cpu().numpy().tolist() result_start = result_start + start result_end = result_end + end start_p = outputs[0].cpu().numpy().tolist() end_p = outputs[1].cpu().numpy().tolist() result_start_p += start_p result_end_p += end_p cc += 1 if cc % 100 == 0: print('processing: %d/%d' % (cc, cc_total)) # 需要生成结果 if gen_result: if config.is_true_test: df = pd.read_csv(config.test_df) else: df = pd.read_csv(config.test_val_df) # 生成str结果 titles = df['title'] shorten_content = df['shorten_content'] question = df['question'] assert len(titles) == len(shorten_content) == len(result_start) == len( result_end) result = utils.gen_str(titles, shorten_content, question, result_start, result_end, add_liangci=config.is_true_test) # gen a submission if config.is_true_test: articled_ids = df['article_id'].astype(str).values.tolist() question_ids = df['question_id'].values submission = [] temp_a_id = articled_ids[0] temp_qa = [] for a_id, q_id, a in zip(articled_ids, question_ids, result): if a_id == temp_a_id: sub = {'questions_id': q_id, 'answer': a} temp_qa.append(sub) else: submission.append({ 'article_id': temp_a_id, 'questions': temp_qa }) temp_a_id = a_id temp_qa = [{'questions_id': q_id, 'answer': a}] submission.append({'article_id': temp_a_id, 'questions': temp_qa}) submission_article = [s['article_id'] for s in submission] submission_questions = [s['questions'] for s in submission] submission_dict = dict( zip(submission_article, submission_questions)) with open(config.test_data, 'r') as file: all_data = json.load(file) all_article = [d['article_id'] for d in all_data] submission = [] for a_id in all_article: if a_id in submission_dict: submission.append({ 'article_id': a_id, 'questions': submission_dict[a_id] }) else: submission.append({'article_id': a_id, 'questions': []}) with open(config.submission, mode='w', encoding='utf-8') as f: json.dump(submission, f, ensure_ascii=False) # my_metrics if config.is_true_test is False: answer_true = df['answer'].values assert len(result) == len(answer_true) blue_score = blue.Bleu() rouge_score = rouge_test.RougeL() for a, r in zip(answer_true, result): if a == a: blue_score.add_inst(r, a) rouge_score.add_inst(r, a) print('rouge_L score: %.4f, blue score:%.4f' % (rouge_score.get_score(), blue_score.get_score())) # to .csv if config.is_true_test is False: df['answer_pred'] = result df['answer_start_pred'] = result_start df['answer_end_pred'] = result_end csv_path = os.path.join('result', config.model_test + '_val.csv') df.to_csv(csv_path, index=False) # save result_ans_range if config.is_true_test: save_path = os.path.join('result/ans_range', config.model_test + '_submission.pkl') else: save_path = os.path.join('result/ans_range', config.model_test + '_val.pkl') result_ans_range = {'start_p': result_start_p, 'end_p': result_end_p} torch.save(result_ans_range, save_path) print('time:%d' % (time.time() - time0))
def train(): time_start = time.time() # prepare preprocess_data.gen_pre_file_for_train() # load w2v embedding_np = loader.load_w2v(config.train_embedding + '.npy') # prepare: train_df preprocess_data.gen_train_val_datafile() # load data print('load data...') time0 = time.time() # load train data if os.path.isfile(config.train_pkl): with open(config.train_pkl, 'rb') as file: train_data = pickle.load(file) else: train_data = build_dataset.CustomDataset( df_file=config.train_df, vocab_path=config.train_vocab_path, tag_path=config.tag_path) with open(config.train_pkl, 'wb') as file: pickle.dump(train_data, file) # load val data if os.path.isfile(config.val_pkl): with open(config.val_pkl, 'rb') as file: val_data = pickle.load(file) else: val_data = build_dataset.CustomDataset( df_file=config.val_df, vocab_path=config.train_vocab_path, tag_path=config.tag_path) with open(config.val_pkl, 'wb') as file: pickle.dump(val_data, file) print('train data size:%d, val data size:%d, time:%d' % (train_data.__len__(), val_data.__len__(), time.time() - time0)) # build train, val dataloader train_loader = loader.build_loader(dataset=train_data, batch_size=config.batch_size, shuffle=True, drop_last=True) val_loader = loader.build_loader(dataset=val_data, batch_size=config.batch_size, shuffle=False, drop_last=True) # model: param = { 'embedding': embedding_np, 'mode': config.mode, 'hidden_size': config.hidden_size, 'dropout_p': config.dropout_p, 'encoder_dropout_p': config.encoder_dropout_p, 'encoder_bidirectional': config.encoder_bidirectional, 'encoder_layer_num': config.encoder_layer_num, 'is_bn': config.is_bn, 'k': config.k, 'num_align_hops': config.num_align_hops } model = eval(config.model_name).Model(param) # 改变embedding_fix model.embedding.sd_embedding.embedding_fix = nn.Embedding( num_embeddings=embedding_np.shape[0], embedding_dim=embedding_np.shape[1], padding_idx=0, _weight=torch.Tensor(embedding_np)) model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False model = model.cuda() # loss criterion = loss.LossJoin() # optimizer optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(optimizer_param, lr=config.lr, weight_decay=config.weight_decay) # load model param, optimizer param, train param model_path = os.path.join('model', config.model_save) if os.path.isfile(model_path): print('load training param, ', model_path) state = torch.load(model_path) model.load_state_dict(state['cur_model_state']) optimizer.load_state_dict(state['cur_opt_state']) epoch_list = range(state['cur_epoch'] + 1, state['cur_epoch'] + 1 + config.epoch) train_loss_list = state['train_loss'] val_loss_list = state['val_loss'] val_accuracy = state['val_accuracy'] steps = state['steps'] time_use = state['time'] else: state = None epoch_list = range(config.epoch) train_loss_list = [] val_loss_list = [] val_accuracy = [] steps = [] time_use = 0 # train model_param_num = 0 for param in model.parameters(): if param.requires_grad is True: model_param_num += param.nelement() print('starting training: %s' % config.model_name) if state is None: print('start_epoch:0, end_epoch:%d, num_params:%d' % (config.epoch - 1, model_param_num)) else: print('start_epoch:%d, end_epoch:%d, num_params:%d' % (state['cur_epoch'] + 1, state['cur_epoch'] + config.epoch, model_param_num)) plt.ion() train_loss = 0 train_c = 0 flag = False cc = 0 grade_1 = False grade_2 = False grade_num1 = train_data.__len__() // config.batch_size grade_num2 = grade_num1 // 20 for e in epoch_list: for i, batch in enumerate(train_loader): # cuda batch = utils.deal_batch(batch) model.train() optimizer.zero_grad() outputs = model(batch) loss_value = criterion(outputs, batch[-1].view(-1)) loss_value.backward() nn.utils.clip_grad_norm_(model.parameters(), config.max_grad) optimizer.step() train_loss += loss_value.item() train_c += 1 if config.val_mean: flag = (train_c % config.val_every == 0) else: if (train_c % (config.val_every // 2) == 0) and (cc <= 0): cc += 1 flag = True elif grade_1 and (train_c % grade_num1 == 0): flag = True elif grade_2 and (train_c % grade_num2 == 0): flag = True if flag: flag = False val_loss = 0 val_c = 0 correct_num = 0 sum_num = 0 with torch.no_grad(): model.eval() for val_batch in val_loader: # cut, cuda val_batch = utils.deal_batch(val_batch) outputs = model(val_batch) loss_value = criterion(outputs, val_batch[-1].view(-1)) _, k = torch.max(outputs, dim=1) k = k.view(-1) correct_num += torch.sum( k == val_batch[-1].view(-1)).item() sum_num += val_batch[-1].size(0) val_loss += loss_value.item() val_c += 1 train_loss_list.append(train_loss / train_c) val_loss_list.append(val_loss / val_c) steps.append(train_c) val_accuracy.append(correct_num * 1.0 / sum_num) print( 'training, epochs:%2d, steps:%5d, train_loss:%.4f, val_loss:%.4f, val_accuracy:%.4f, time:%4ds' % (e, sum(steps), train_loss / train_c, val_loss / val_c, correct_num * 1.0 / sum_num, time.time() - time_start + time_use)) if val_loss / val_c > 0.65: grade_1 = True grade_2 = False else: grade_1 = False grade_2 = True train_loss = 0 train_c = 0 # draw plt.cla() x = np.cumsum(steps) plt.plot(x, train_loss_list, color='r', label='train') plt.plot(x, val_loss_list, color='b', label='val') # plt.plot( # x, # val_accuracy, # color='black', # label='accuracy' # ) plt.xlabel('steps') plt.ylabel('loss/accuracy') plt.legend() plt.pause(0.0000001) fig_path = os.path.join('model', config.model_save + '.png') plt.savefig(fig_path) plt.show() # save model if os.path.isfile(model_path): state = torch.load(model_path) else: state = {} if state == {} or (val_loss / val_c <= 0.61 and state['best_val_accuracy'] <= correct_num * 1.0 / sum_num): state['best_model_state'] = model.state_dict() state['best_opt_state'] = optimizer.state_dict() state['best_loss'] = val_loss / val_c state['best_val_accuracy'] = correct_num * 1.0 / sum_num state['best_epoch'] = e state['best_step'] = sum(steps) state['best_time'] = time_use + time.time() - time_start state['cur_model_state'] = model.state_dict() state['cur_opt_state'] = optimizer.state_dict() state['cur_epoch'] = e state['train_loss'] = train_loss_list state['val_loss'] = val_loss_list state['val_accuracy'] = val_accuracy state['steps'] = steps state['time'] = time_use + time.time() - time_start torch.save(state, model_path)
from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_absolute_error, mean_squared_error from loader import build_loader from model import LSTM from utils import snapshot, plot_prediction # parameters test_ratio = 0.13 # to get exactly one test sample based on how we built test samples batch_size = 10 learning_rate = 0.001 look_back = 168 look_ahead = 574 train_loader, test_loader, scaler = build_loader(test_ratio, look_back, look_ahead, batch_size) model = LSTM(batch_size, learning_rate) resume_training = True if resume_training: # load previous model checkpoint = torch.load('saved_models/lstm_adam_b10_lb168_model') model.load_state_dict(checkpoint['model_state_dict']) model.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] else: epoch = 0 loss = np.inf train = False
def __init__(self, cfg, logdir, run_id): # Copy shared config fields if "monodepth_options" in cfg: cfg["data"].update(cfg["monodepth_options"]) cfg["model"].update(cfg["monodepth_options"]) cfg["training"]["monodepth_loss"].update(cfg["monodepth_options"]) cfg['model']['depth_args']['max_scale_size'] = ( cfg["monodepth_options"]["crop_h"], cfg["monodepth_options"]["crop_w"]) # Setup seeds setup_seeds(cfg.get("seed", 1337)) if cfg["data"]["dataset_seed"] == "same": cfg["data"]["dataset_seed"] = cfg["seed"] # Setup device torch.backends.cudnn.benchmark = cfg["training"].get("benchmark", True) self.cfg = cfg self.logdir = logdir self.run_id = run_id self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # Prepare depth estimates do_precalculate_depth = False if do_precalculate_depth: print("Prepare depth estimates") depth_estimator = DepthEstimator(cfg) depth_estimator.prepare_depth_estimates() del depth_estimator torch.cuda.empty_cache() else: self.cfg["data"]["generated_depth_dir"] = None # Setup Dataloader self.val_loader = build_loader(self.cfg["data"], "val", load_labels=False, load_sequence=False) self.n_classes = self.val_loader.n_classes self.val_batch_size = self.cfg["training"]["val_batch_size"] self.val_data_loader = data.DataLoader( self.val_loader, batch_size=self.val_batch_size, num_workers=self.cfg["training"]["n_workers"], pin_memory=True, # If using a dataset with odd number of samples (CamVid), the memory consumption suddenly increases for the # last batch. This can be circumvented by dropping the last batch. Only do that if it is necessary for your # system as it will result in an incomplete validation set. # drop_last=True, ) # Setup Model self.model = get_model(cfg["model"], self.n_classes).to(self.device) # print(self.model) self.monodepth_loss_calculator_val = get_monodepth_loss( self.cfg, is_train=False, batch_size=self.val_batch_size) if self.cfg["training"]["resume"] is not None: self.load_resume(strict=False)
def train(): time_start = time.time() # prepare preprocess_data.gen_pre_file_for_train() # load w2v embedding_np = loader.load_w2v(config.train_embedding + '.npy') # prepare: train_df, val_df preprocess_data.gen_train_datafile() # load data print('load data...') time0 = time.time() # load train data if os.path.isfile(config.train_pkl): with open(config.train_pkl, 'rb') as file: train_data = pickle.load(file) else: train_data = loader.load_data(config.train_df, config.train_vocab_path, config.tag_path) with open(config.train_pkl, 'wb') as file: pickle.dump(train_data, file) # load val data if os.path.isfile(config.val_pkl): with open(config.val_pkl, 'rb') as file: val_data = pickle.load(file) else: val_data = loader.load_data(config.val_df, config.train_vocab_path, config.tag_path) with open(config.val_pkl, 'wb') as file: pickle.dump(val_data, file) print('load data finished, time:%d' % (time.time()-time0)) # build train, val dataloader train_loader = loader.build_loader( dataset=train_data, batch_size=config.batch_size, shuffle=True, drop_last=True ) val_loader = loader.build_loader( dataset=val_data, batch_size=config.batch_size, shuffle=False, drop_last=True ) # model: param = { 'embedding': embedding_np, 'mode': config.mode, 'hidden_size': config.hidden_size, 'dropout_p': config.dropout_p, 'encoder_dropout_p': config.encoder_dropout_p, 'encoder_bidirectional': config.encoder_bidirectional, 'encoder_layer_num': config.encoder_layer_num, 'is_bn': config.is_bn } model = eval(config.model_name).Model(param) # 改变embedding_fix model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False model = model.cuda() # loss if config.criterion == 'RougeLoss': criterion = eval(config.criterion)(lam=config.lamda) else: criterion = eval(config.criterion)() # optimizer optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(optimizer_param, lr=config.lr) # load model param, optimizer param, train param if config.is_for_rouge: model_path = os.path.join('model', config.model_save+'_mrt') if os.path.isfile(model_path): print('load training param, ', model_path) state = torch.load(model_path) model.load_state_dict(state['cur_model_state']) optimizer.load_state_dict(state['cur_opt_state']) train_loss_list = state['train_loss'] val_loss_list = state['val_loss'] steps = state['steps'] time_use = state['time'] else: model_path = os.path.join('model', config.model_save) assert os.path.isfile(model_path) state = torch.load(model_path) model.load_state_dict(state['best_model_state']) optimizer.load_state_dict(state['best_opt_state']) train_loss_list = [] val_loss_list = [] steps = [] time_use = 0 epoch_list = range(3) else: model_path = os.path.join('model', config.model_save) if os.path.isfile(model_path): print('load training param, ', model_path) state = torch.load(model_path) model.load_state_dict(state['cur_model_state']) optimizer.load_state_dict(state['cur_opt_state']) epoch_list = range(state['cur_epoch']+1, state['cur_epoch']+1+config.epoch) train_loss_list = state['train_loss'] val_loss_list = state['val_loss'] steps = state['steps'] time_use = state['time'] else: state = None epoch_list = range(config.epoch) train_loss_list = [] val_loss_list = [] steps = [] time_use = 0 # train model_param_num = 0 for param in model.parameters(): model_param_num += param.nelement() print('starting training: %s' % config.model_name) if state is None: print('start_epoch:0, end_epoch:%d, num_params:%d, num_params_except_embedding:%d' % (config.epoch-1, model_param_num, model_param_num-embedding_np.shape[0]*embedding_np.shape[1])) else: print('start_epoch:%d, end_epoch:%d, num_params:%d, num_params_except_embedding:%d' % (state['cur_epoch']+1, state['cur_epoch']+config.epoch, model_param_num, model_param_num-embedding_np.shape[0]*embedding_np.shape[1])) plt.ion() train_loss = 0 train_c = 0 flag = False cc = 0 grade_1 = False grade_2 = False for e in epoch_list: for i, batch in enumerate(train_loader): # cut, cuda batch = utils.deal_batch(batch) model.train() optimizer.zero_grad() outputs = model(batch) loss_value = criterion(outputs, batch) loss_value.backward() nn.utils.clip_grad_norm_(model.parameters(), config.max_grad) optimizer.step() train_loss += loss_value.item() train_c += 1 if config.val_mean: flag = (train_c % config.val_every == 0) else: if (train_c % (config.val_every//2) == 0) and (cc <= 1): cc += 1 flag = True elif grade_1 and (train_c % (config.val_every*5) == 0): flag = True elif grade_2 and (train_c % config.val_every == 0): flag = True if flag: flag = False val_loss = 0 val_c = 0 with torch.no_grad(): model.eval() for val_batch in val_loader: # cut, cuda val_batch = utils.deal_batch(val_batch) outputs = model(val_batch) loss_value = criterion(outputs, val_batch) val_loss += loss_value.item() val_c += 1 train_loss_list.append(train_loss/train_c) val_loss_list.append(val_loss/val_c) steps.append(train_c) print('training, epochs:%2d, steps:%5d, train_loss:%.4f, val_loss:%.4f, time:%4ds' % (e, sum(steps), train_loss/train_c, val_loss/val_c, time.time()-time_start+time_use)) if val_loss/val_c >= 0.97: grade_1 = True grade_2 = False elif val_loss/val_c < 0.97: grade_1 = False grade_2 = True train_loss = 0 train_c = 0 # draw plt.cla() x = np.cumsum(steps) plt.plot( x, train_loss_list, color='r', label='train' ) plt.plot( x, val_loss_list, color='b', label='val' ) plt.xlabel('steps') plt.ylabel('loss') plt.legend() plt.pause(0.0000001) if config.is_for_rouge: fig_path = os.path.join('model', config.model_save+'_mrt.png') else: fig_path = os.path.join('model', config.model_save+'.png') plt.savefig(fig_path) plt.show() # save model if config.is_for_rouge: model_path = os.path.join('model', config.model_save+'_mrt') if os.path.isfile(model_path): state = torch.load(model_path) else: state = {} if state == {} or state['best_loss'] > (val_loss/val_c): state['best_model_state'] = model.state_dict() state['best_opt_state'] = optimizer.state_dict() state['best_loss'] = val_loss/val_c state['best_epoch'] = e state['best_step'] = sum(steps) state['best_time'] = time_use + time.time() - time_start state['cur_model_state'] = model.state_dict() state['cur_opt_state'] = optimizer.state_dict() state['cur_epoch'] = e state['train_loss'] = train_loss_list state['val_loss'] = val_loss_list state['steps'] = steps state['time'] = time_use + time.time() - time_start torch.save(state, model_path)
def __init__(self, cfg, writer, img_writer, logger, run_id): # Copy shared config fields if "monodepth_options" in cfg: cfg["data"].update(cfg["monodepth_options"]) cfg["model"].update(cfg["monodepth_options"]) cfg["training"]["monodepth_loss"].update(cfg["monodepth_options"]) if "generated_depth_dir" in cfg["data"]: dataset_name = f"{cfg['data']['dataset']}_" \ f"{cfg['data']['width']}x{cfg['data']['height']}" depth_teacher = cfg["data"].get("depth_teacher", None) assert not (depth_teacher and cfg['model'].get('detph_estimator_weights') is not None) if depth_teacher is not None: cfg["data"]["generated_depth_dir"] += dataset_name + "/" + depth_teacher + "/" else: cfg["data"]["generated_depth_dir"] += dataset_name + "/" + cfg['model']['depth_estimator_weights'] + "/" # Setup seeds setup_seeds(cfg.get("seed", 1337)) if cfg["data"]["dataset_seed"] == "same": cfg["data"]["dataset_seed"] = cfg["seed"] # Setup device torch.backends.cudnn.benchmark = cfg["training"].get("benchmark", True) self.cfg = cfg self.writer = writer self.img_writer = img_writer self.logger = logger self.run_id = run_id self.mIoU = 0 self.fwAcc = 0 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.setup_segmentation_unlabeled() self.unlabeled_require_depth = (self.cfg["training"]["unlabeled_segmentation"] is not None and (self.cfg["training"]["unlabeled_segmentation"]["mix_mask"] == "depth" or self.cfg["training"]["unlabeled_segmentation"]["mix_mask"] == "depthcomp" or self.cfg["training"]["unlabeled_segmentation"]["mix_mask"] == "depthhist")) # Prepare depth estimates do_precalculate_depth = self.cfg["training"]["segmentation_lambda"] != 0 and self.unlabeled_require_depth and \ self.cfg['model']['segmentation_name'] != 'mtl_pad' use_depth_teacher = cfg["data"].get("depth_teacher", None) is not None if do_precalculate_depth or use_depth_teacher: assert not (do_precalculate_depth and use_depth_teacher) if not self.cfg["training"].get("disable_depth_estimator", False): print("Prepare depth estimates") depth_estimator = DepthEstimator(cfg) depth_estimator.prepare_depth_estimates() del depth_estimator torch.cuda.empty_cache() else: self.cfg["data"]["generated_depth_dir"] = None # Setup Dataloader load_labels, load_sequence = True, True if self.cfg["training"]["monodepth_lambda"] == 0: load_sequence = False if self.cfg["training"]["segmentation_lambda"] == 0: load_labels = False train_data_cfg = deepcopy(self.cfg["data"]) if not do_precalculate_depth and not use_depth_teacher: train_data_cfg["generated_depth_dir"] = None self.train_loader = build_loader(train_data_cfg, "train", load_labels=load_labels, load_sequence=load_sequence) if self.cfg["training"].get("minimize_entropy_unlabeled", False) or self.enable_unlabled_segmentation: unlabeled_segmentation_cfg = deepcopy(self.cfg["data"]) if not self.only_unlabeled and self.mix_use_gt: unlabeled_segmentation_cfg["load_onehot"] = True if self.only_unlabeled: unlabeled_segmentation_cfg.update({"load_unlabeled": True, "load_labeled": False}) elif self.only_labeled: unlabeled_segmentation_cfg.update({"load_unlabeled": False, "load_labeled": True}) else: unlabeled_segmentation_cfg.update({"load_unlabeled": True, "load_labeled": True}) if self.mix_video: assert not self.mix_use_gt and not self.only_labeled and not self.only_unlabeled, \ "Video sample indices are not compatible with non-video indices." unlabeled_segmentation_cfg.update({"only_sequences_with_segmentation": not self.mix_video, "restrict_to_subset": None}) self.unlabeled_loader = build_loader(unlabeled_segmentation_cfg, "train", load_labels=load_labels if not self.mix_video else False, load_sequence=load_sequence) else: self.unlabeled_loader = None self.val_loader = build_loader(self.cfg["data"], "val", load_labels=load_labels, load_sequence=load_sequence) self.n_classes = self.train_loader.n_classes # monodepth dataloader settings uses drop_last=True and shuffle=True even for val self.train_data_loader = data.DataLoader( self.train_loader, batch_size=self.cfg["training"]["batch_size"], num_workers=self.cfg["training"]["n_workers"], shuffle=self.cfg["data"]["shuffle_trainset"], pin_memory=True, # Setting to false will cause crash at the end of epoch drop_last=True, ) if self.unlabeled_loader is not None: self.unlabeled_data_loader = infinite_iterator(data.DataLoader( self.unlabeled_loader, batch_size=self.cfg["training"]["batch_size"], num_workers=self.cfg["training"]["n_workers"], shuffle=self.cfg["data"]["shuffle_trainset"], pin_memory=True, # Setting to false will cause crash at the end of epoch drop_last=True, )) self.val_batch_size = self.cfg["training"]["val_batch_size"] self.val_data_loader = data.DataLoader( self.val_loader, batch_size=self.val_batch_size, num_workers=self.cfg["training"]["n_workers"], pin_memory=True, # If using a dataset with odd number of samples (CamVid), the memory consumption suddenly increases for the # last batch. This can be circumvented by dropping the last batch. Only do that if it is necessary for your # system as it will result in an incomplete validation set. # drop_last=True, ) # Setup Model self.model = get_model(cfg["model"], self.n_classes).to(self.device) # print(self.model) assert not (self.enable_unlabled_segmentation and self.cfg["training"]["save_monodepth_ema"]) if self.enable_unlabled_segmentation and not self.only_labeled: print("Create segmentation ema model.") self.ema_model = self.create_ema_model(self.model).to(self.device) elif self.cfg["training"]["save_monodepth_ema"]: print("Create depth ema model.") # TODO: Try to remove unnecessary components and fit into gpu for better performance self.ema_model = self.create_ema_model(self.model) # .to(self.device) else: self.ema_model = None # Setup optimizer, lr_scheduler and loss function optimizer_cls = get_optimizer(cfg) optimizer_params = {k: v for k, v in cfg["training"]["optimizer"].items() if k not in ["name", "backbone_lr", "pose_lr", "depth_lr", "segmentation_lr"]} train_params = get_train_params(self.model, self.cfg) self.optimizer = optimizer_cls(train_params, **optimizer_params) self.scheduler = get_scheduler(self.optimizer, self.cfg["training"]["lr_schedule"]) # Creates a GradScaler once at the beginning of training. self.scaler = GradScaler(enabled=self.cfg["training"]["amp"]) self.loss_fn = get_segmentation_loss_function(self.cfg) self.monodepth_loss_calculator_train = get_monodepth_loss(self.cfg, is_train=True) self.monodepth_loss_calculator_val = get_monodepth_loss(self.cfg, is_train=False, batch_size=self.val_batch_size) if cfg["training"]["early_stopping"] is None: logger.info("Using No Early Stopping") self.earlyStopping = None else: self.earlyStopping = EarlyStopping( patience=round(cfg["training"]["early_stopping"]["patience"] / cfg["training"]["val_interval"]), min_delta=cfg["training"]["early_stopping"]["min_delta"], cumulative_delta=cfg["training"]["early_stopping"]["cum_delta"], logger=logger )
def test(config): time0 = time.time() # prepare preprocess_data.gen_pre_file_for_test() # load w2v embedding_np_train = loader.load_w2v(config.train_embedding + '.npy') embedding_np_test = loader.load_w2v(config.test_embedding + '.npy') # prepare: test_df if config.is_true_test: preprocess_data.gen_test_datafile() # load data if config.is_true_test is False: if os.path.isfile(config.val_true_pkl): with open(config.val_true_pkl, 'rb') as file: test_data = pickle.load(file) else: test_data = build_dataset.CustomDataset( df_file=config.val_df, vocab_path=config.train_vocab_path, tag_path=config.tag_path, is_test=True) with open(config.val_true_pkl, 'wb') as file: pickle.dump(test_data, file) else: if os.path.isfile(config.test_pkl): with open(config.test_pkl, 'rb') as file: test_data = pickle.load(file) else: test_data = build_dataset.CustomDataset( df_file=config.test_df, vocab_path=config.test_vocab_path, tag_path=config.tag_path, is_test=True) with open(config.test_pkl, 'wb') as file: pickle.dump(test_data, file) # build test dataloader test_loader = loader.build_loader(dataset=test_data, batch_size=config.test_batch_size, shuffle=False, drop_last=False) # model initial param = { 'embedding': embedding_np_train, 'mode': config.mode, 'hidden_size': config.hidden_size, 'dropout_p': config.dropout_p, 'encoder_dropout_p': config.encoder_dropout_p, 'encoder_bidirectional': config.encoder_bidirectional, 'encoder_layer_num': config.encoder_layer_num, 'is_bn': config.is_bn, 'k': config.k, 'num_align_hops': config.num_align_hops } model = eval(config.model_name).Model(param) # load model param, and training state model_path = os.path.join('model', config.model_test) print('load model, ', model_path) state = torch.load(model_path) model.load_state_dict(state['best_model_state']) # 改变embedding_fix if config.is_true_test: model.embedding.sd_embedding.embedding_fix = nn.Embedding( num_embeddings=embedding_np_test.shape[0], embedding_dim=embedding_np_test.shape[1], padding_idx=0, _weight=torch.Tensor(embedding_np_test)) model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False model.embedding.sd_embedding.vocab_size = embedding_np_test.shape[0] model = model.cuda() best_loss = state['best_loss'] best_val_accuracy = state['best_val_accuracy'] best_epoch = state['best_epoch'] best_step = state['best_step'] best_time = state['best_time'] use_time = state['time'] print( 'best_epoch:%2d, best_step:%5d, best_loss:%.4f, val_accuracy:%.4f, best_time:%d, use_time:%d' % (best_epoch, best_step, best_loss, best_val_accuracy, best_time, use_time)) # gen result result = [] result_range = [] model.eval() with torch.no_grad(): cc = 0 cc_total = len(test_loader) print('total iter_num:%d' % cc_total) for batch in test_loader: # cuda, cut batch = utils.deal_batch(batch) outputs = model(batch) # (batch_size, 3) _, k = torch.max(outputs, dim=1) k = k.cpu().numpy().tolist() result = result + k outputs = outputs.cpu().numpy().tolist() result_range = result_range + outputs cc += 1 if cc % 100 == 0: print('processing: %d/%d' % (cc, cc_total)) if config.is_true_test: df = pd.read_csv(config.test_df, encoding='utf-8') else: df = pd.read_csv(config.val_df, encoding='utf-8') # 生成结果 a_items = df['a_item'].values b_items = df['b_item'].values c_items = df['c_item'].values alts = df['alternatives'].values tmp = [] for r, a, b, c, alt in zip(result, a_items, b_items, c_items, alts): alt_list = alt.split('|') if r == 0: if a == alt_list[0].strip(): tmp.append(alt_list[0]) elif a == alt_list[1].strip(): tmp.append(alt_list[1]) elif a == alt_list[2].strip(): tmp.append(alt_list[2]) else: tmp.append('xxx') print('r==0, meet wrong data') elif r == 1: if b == alt_list[0].strip(): tmp.append(alt_list[0]) elif b == alt_list[1].strip(): tmp.append(alt_list[1]) elif b == alt_list[2].strip(): tmp.append(alt_list[2]) else: tmp.append('xxx') print('r==1, meet wrong data') else: if c == alt_list[0].strip(): tmp.append(alt_list[0]) elif c == alt_list[1].strip(): tmp.append(alt_list[1]) elif c == alt_list[2].strip(): tmp.append(alt_list[2]) else: tmp.append('xxx') print('r==2, meet wrong data') # gen a submission if config.is_true_test: query_ids = df['query_id'] with open(config.submission, 'w') as file: for i, r in zip(query_ids, tmp): file.writelines(str(i) + '\t' + r + '\n') # my_metrics if config.is_true_test is False: answers = df['answer'] flag = [] for a, r in zip(answers, tmp): if a == r: flag.append(True) else: flag.append(False) print('accuracy:%.4f' % (sum(flag) / len(answers))) # to .csv if config.is_true_test is False: df['answer_pred'] = tmp df = df[[ 'query_id', 'query', 'passage', 'alternatives', 'answer', 'answer_pred' ]] csv_path = os.path.join('result', config.model_test + '_val.csv') df.to_csv(csv_path, index=False) # save result_ans_range if config.is_true_test: save_path = os.path.join('result/ans_range', config.model_test + '_submission.pkl') else: save_path = os.path.join('result/ans_range', config.model_test + '_val.pkl') torch.save(result_range, save_path) print('time:%d' % (time.time() - time0))