def main(params): train_set, valid_set, test_set = df.datasets.mnist.data() train_set_x, train_set_y = train_set test_set_x, test_set_y = test_set model = lenet() criterion = df.ClassNLLCriterion() optimiser = df.SGD(lr=params['lr']) for epoch in range(100): model.training() train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'train') train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'stats') model.evaluate() validate(test_set_x, test_set_y, model, epoch, params['batch_size'])
model.add(df.Linear(512, 9)) model.add(df.SoftMax()) return model if __name__ == "__main__": if __package__ is None: # PEP366 __package__ = "DeepFried2.examples.KaggleOtto" train_data_x, train_data_y = load_train_data() train_data_x, valid_data_x, train_data_y, valid_data_y = train_test_split( train_data_x, train_data_y, train_size=0.85) model = nnet() criterion = df.ClassNLLCriterion() optimiser = df.Momentum(lr=0.01, momentum=0.9) for epoch in range(1, 1001): model.training() if epoch % 100 == 0: optimiser.hyperparams['lr'] /= 10 train(train_data_x, train_data_y, model, optimiser, criterion, epoch, 100, 'train') train(train_data_x, train_data_y, model, optimiser, criterion, epoch, 100, 'stats') model.evaluate() validate(valid_data_x, valid_data_y, model, epoch, 100)
model.add(df.BatchNormalization(512)) model.add(df.ReLU()) model.add(df.Dropout(0.5)) model.add(df.Linear(512, 9)) model.add(df.SoftMax()) return model if __name__ == "__main__": if __package__ is None: # PEP366 __package__ = "DeepFried2.examples.KaggleOtto" train_data_x, train_data_y = load_train_data() train_data_x, valid_data_x, train_data_y, valid_data_y = train_test_split(train_data_x, train_data_y, train_size=0.85) model = nnet() criterion = df.ClassNLLCriterion() optimiser = df.Momentum(lr=0.01, momentum=0.9) for epoch in range(1, 1001): model.training() if epoch % 100 == 0: optimiser.hyperparams['lr'] /= 10 train(train_data_x, train_data_y, model, optimiser, criterion, epoch, 100, 'train') train(train_data_x, train_data_y, model, optimiser, criterion, epoch, 100, 'stats') model.evaluate() validate(valid_data_x, valid_data_y, model, epoch, 100)
loader_train_target, loader_val_target = fun.load_loaders('usps', BATCH_SIZE) if trained == True: encoder_s.load_state_dict(torch.load('encoder_s.pkl')) encoder_t.load_state_dict(torch.load('encoder_s.pkl')) else: for e in range(50): fun.train_src(encoder_s, loader_train_source, opt_es) encoder_t.load_state_dict(encoder_s.state_dict()) torch.save(encoder_s.state_dict(), 'encoder_s.pkl') acc = fun.validate(encoder_s, encoder_s, loader_train_source, loader_val_source) goal = 0.9 for i in range(EPOCH): # fun.train_disc(encoder_s,encoder_t,disc,loader_train_source,loader_train_target,opt_et,opt_dis,EPOCH = 3) fun.fit_disc(encoder_s, encoder_t, disc, loader_train_source, loader_train_target, opt_et, opt_dis, 20) acc = fun.validate(encoder_s, encoder_t, loader_train_target, loader_val_target) # acc = test.validate(encoder_s, encoder_t, loader_train_source, loader_val_target) print("Source >>> Target with out centering acc:{:.4f}".format(acc)) # fun.train_center(encoder_s,encoder_t,loader_train_source,loader_train_target,opt_et) acc = test.validate(encoder_s, encoder_t, loader_train_source, loader_val_target) print("Source >>> Target after centering acc:{:.4f}".format(acc)) if acc > 0.9: torch.save(encoder_t.state_dict(), 'encoder_t.pkl')
def main(args): os.environ['KMP_WARNINGS'] = '0' torch.cuda.manual_seed_all(1) np.random.seed(0) # filter array num_features = [ args.features * i for i in range(1, args.levels + 2 + args.levels_without_sample) ] # 確定 輸出大小 target_outputs = int(args.output_size * args.sr) # 訓練才保存模型設定參數 # 設定teacher and student and student_for_backward 超參數 student_KD = Waveunet(args.channels, num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) KD_optimizer = Adam(params=student_KD.parameters(), lr=args.lr) print(25 * '=' + 'model setting' + 25 * '=') print('student_KD: ', student_KD.shapes) if args.cuda: student_KD = utils.DataParallel(student_KD) print("move student_KD to gpu\n") student_KD.cuda() state = {"step": 0, "worse_epochs": 0, "epochs": 0, "best_pesq": -np.Inf} if args.load_model is not None: print("Continuing full model from checkpoint " + str(args.load_model)) state = utils.load_model(student_KD, KD_optimizer, args.load_model, args.cuda) dataset = get_folds(args.dataset_dir, args.outside_test) log_dir, checkpoint_dir, result_dir = utils.mkdir_and_get_path(args) # print(model) if args.test is False: writer = SummaryWriter(log_dir) # set hypeparameter # printing hypeparameters info with open(os.path.join(log_dir, 'config.json'), 'w') as f: json.dump(args.__dict__, f, indent=5) print('saving commandline_args') if args.teacher_model is not None: print(25 * '=' + 'printing hypeparameters info' + 25 * '=') print(f'KD_method = {args.KD_method}') teacher_num_features = [ 24 * i for i in range(1, args.levels + 2 + args.levels_without_sample) ] teacher_model = Waveunet( args.channels, teacher_num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) student_copy = Waveunet( args.channels, num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) copy_optimizer = Adam(params=student_copy.parameters(), lr=args.lr) student_copy2 = Waveunet( args.channels, num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) copy2_optimizer = Adam(params=student_copy2.parameters(), lr=args.lr) policy_network = RL(n_inputs=2, kernel_size=6, stride=1, conv_type=args.conv_type, pool_size=4) PG_optimizer = Adam(params=policy_network.parameters(), lr=args.RL_lr) if args.cuda: teacher_model = utils.DataParallel(teacher_model) policy_network = utils.DataParallel(policy_network) student_copy = utils.DataParallel(student_copy) student_copy2 = utils.DataParallel(student_copy2) # print("move teacher to gpu\n") teacher_model.cuda() # print("student_copy to gpu\n") student_copy.cuda() # print("student_copy2 to gpu\n") student_copy2.cuda() # print("move policy_network to gpu\n") policy_network.cuda() student_size = sum(p.numel() for p in student_KD.parameters()) teacher_size = sum(p.numel() for p in teacher_model.parameters()) print('student_parameter count: ', str(student_size)) print('teacher_model_parameter count: ', str(teacher_size)) print('RL_parameter count: ', str(sum(p.numel() for p in policy_network.parameters()))) print(f'compression raito :{100*(student_size/teacher_size)}%') if args.teacher_model is not None: print("load teacher model" + str(args.teacher_model)) _ = utils.load_model(teacher_model, None, args.teacher_model, args.cuda) teacher_model.eval() if args.load_RL_model is not None: print("Continuing full RL_model from checkpoint " + str(args.load_RL_model)) _ = utils.load_model(policy_network, PG_optimizer, args.load_RL_model, args.cuda) # If not data augmentation, at least crop targets to fit model output shape crop_func = partial(crop, shapes=student_KD.shapes) ### DATASET train_data = SeparationDataset(dataset, "train", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func) val_data = SeparationDataset(dataset, "test", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func) dataloader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, worker_init_fn=utils.worker_init_fn, pin_memory=True) # Set up the loss function if args.loss == "L1": criterion = nn.L1Loss() elif args.loss == "L2": criterion = nn.MSELoss() else: raise NotImplementedError("Couldn't find this loss!") My_criterion = customLoss() ### TRAINING START print('TRAINING START') if state["epochs"] > 0: state["epochs"] = state["epochs"] + 1 batch_num = (len(train_data) // args.batch_size) if args.teacher_model is not None: counting = 0 PG_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer=PG_optimizer, gamma=args.decayRate) while counting < state["epochs"]: PG_optimizer.zero_grad() PG_optimizer.step() counting += 1 PG_lr_scheduler.step() # print(f'modify lr RL rate : {counting} , until : {state["epochs"]}') while state["epochs"] < 100: memory_alpha = [] print("epoch:" + str(state["epochs"])) # monitor_value total_avg_reward = 0 total_avg_scalar_reward = 0 avg_origin_loss = 0 all_avg_KD_rate = 0 same = 0 with tqdm(total=len(dataloader)) as pbar: for example_num, (x, targets) in enumerate(dataloader): # if example_num==20: # break student_KD.train() if args.cuda: x = x.cuda() targets = targets.cuda() if args.teacher_model is not None: student_copy.train() student_copy2.train() # Set LR for this iteration temp = {'state_dict': None, 'optim_dict': None} temp['state_dict'] = copy.deepcopy( student_KD.state_dict()) temp['optim_dict'] = copy.deepcopy( KD_optimizer.state_dict()) #print('base_model from KD') student_KD.load_state_dict(temp['state_dict']) KD_optimizer.load_state_dict(temp['optim_dict']) student_copy.load_state_dict(temp['state_dict']) copy_optimizer.load_state_dict(temp['optim_dict']) student_copy2.load_state_dict(temp['state_dict']) copy2_optimizer.load_state_dict(temp['optim_dict']) utils.set_cyclic_lr(KD_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) utils.set_cyclic_lr(copy_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) utils.set_cyclic_lr(copy2_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) # forward student and teacher get output student_KD_output, avg_student_KD_loss = utils.compute_loss( student_KD, x, targets, criterion, compute_grad=False) teacher_output, _ = utils.compute_loss( teacher_model, x, targets, criterion, compute_grad=False) # PG_state diff_from_target = targets.detach( ) - student_KD_output.detach() diff_from_teacher = teacher_output.detach( ) - student_KD_output.detach() PG_state = torch.cat( (diff_from_target, diff_from_teacher), 1) # forward RL get alpha alpha = policy_network(PG_state) nograd_alpha = alpha.detach() avg_KD_rate = torch.mean(nograd_alpha).item() all_avg_KD_rate += avg_KD_rate / batch_num KD_optimizer.zero_grad() KD_outputs, KD_hard_loss, KD_loss, KD_soft_loss = utils.KD_compute_loss( student_KD, teacher_model, x, targets, My_criterion, alpha=nograd_alpha, compute_grad=True, KD_method=args.KD_method) KD_optimizer.step() copy_optimizer.zero_grad() _, _, _, _ = utils.KD_compute_loss( student_copy, teacher_model, x, targets, My_criterion, alpha=1, compute_grad=True, KD_method=args.KD_method) copy_optimizer.step() copy2_optimizer.zero_grad() _, _, _, _ = utils.KD_compute_loss( student_copy2, teacher_model, x, targets, My_criterion, alpha=0, compute_grad=True, KD_method=args.KD_method) copy2_optimizer.step() # calculate backwarded model MSE backward_KD_loss = utils.loss_for_sample( student_KD, x, targets) backward_copy_loss = utils.loss_for_sample( student_copy, x, targets) backward_copy2_loss = utils.loss_for_sample( student_copy2, x, targets) # calculate rewards rewards, same_num, before_decay = utils.get_rewards( backward_KD_loss.detach(), backward_copy_loss.detach(), backward_copy2_loss.detach(), backward_KD_loss.detach(), len(train_data), state["epochs"] + 1) same += same_num rewards = rewards.detach() avg_origin_loss += avg_student_KD_loss / batch_num # avg_reward avg_reward = torch.mean(rewards) avg_scalar_reward = torch.mean(torch.abs(rewards)) total_avg_reward += avg_reward.item() / batch_num total_avg_scalar_reward += avg_scalar_reward.item( ) / batch_num # append to memory_alpha nograd_alpha = nograd_alpha.detach().cpu() memory_alpha.append(nograd_alpha.numpy()) PG_optimizer.zero_grad() _ = utils.RL_compute_loss(alpha, rewards, nn.MSELoss()) PG_optimizer.step() # print info # print(f'avg_KD_rate = {avg_KD_rate} ') # print(f'student_KD_loss = {avg_student_KD_loss}') # print(f'backward_student_copy_loss = {np.mean(backward_copy_loss.detach().cpu().numpy())}') # print(f'backward_student_KD_loss = {np.mean(backward_KD_loss.detach().cpu().numpy())}') # print(f'backward_student_copy2_loss = {np.mean(backward_copy2_loss.detach().cpu().numpy())}') # print(f'avg_reward = {avg_reward}') # print(f'total_avg_reward = {total_avg_reward}') # print(f'same = {same}') # add to tensorboard writer.add_scalar("student_KD_loss", avg_student_KD_loss, state["step"]) writer.add_scalar( "backward_student_KD_loss", np.mean(backward_KD_loss.detach().cpu().numpy()), state["step"]) writer.add_scalar("KD_loss", KD_loss, state["step"]) writer.add_scalar("KD_hard_loss", KD_hard_loss, state["step"]) writer.add_scalar("KD_soft_loss", KD_soft_loss, state["step"]) writer.add_scalar("avg_KD_rate", avg_KD_rate, state["step"]) writer.add_scalar("rewards", avg_reward, state["step"]) writer.add_scalar("scalar_rewards", avg_scalar_reward, state["step"]) writer.add_scalar("before_decay", before_decay, state["step"]) else: # no KD training utils.set_cyclic_lr(KD_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) KD_optimizer.zero_grad() KD_outputs, KD_hard_loss = utils.compute_loss( student_KD, x, targets, nn.MSELoss(), compute_grad=True) KD_optimizer.step() avg_origin_loss += KD_hard_loss / batch_num writer.add_scalar("student_KD_loss", KD_hard_loss, state["step"]) ### save wav #### if example_num % args.example_freq == 0: input_centre = torch.mean( x[0, :, student_KD.shapes["output_start_frame"]: student_KD.shapes["output_end_frame"]], 0) # Stereo not supported for logs yet # target=torch.mean(targets[0], 0).cpu().numpy() # pred=torch.mean(KD_outputs[0], 0).detach().cpu().numpy() # inputs=input_centre.cpu().numpy() writer.add_audio("input:", input_centre, state["step"], sample_rate=args.sr) writer.add_audio("pred:", torch.mean(KD_outputs[0], 0), state["step"], sample_rate=args.sr) writer.add_audio("target", torch.mean(targets[0], 0), state["step"], sample_rate=args.sr) state["step"] += 1 pbar.update(1) # VALIDATE val_loss, val_metrics = validate(args, student_KD, criterion, val_data) print("ori VALIDATION FINISHED: LOSS: " + str(val_loss)) choose_val = val_metrics if args.teacher_model is not None: for i in range(len(nograd_alpha)): writer.add_scalar("KD_rate_" + str(i), nograd_alpha[i], state["epochs"]) print(f'all_avg_KD_rate = {all_avg_KD_rate}') writer.add_scalar("all_avg_KD_rate", all_avg_KD_rate, state["epochs"]) # writer.add_scalar("val_loss_copy", val_loss_copy, state["epochs"]) writer.add_scalar("total_avg_reward", total_avg_reward, state["epochs"]) writer.add_scalar("total_avg_scalar_reward", total_avg_scalar_reward, state["epochs"]) RL_checkpoint_path = os.path.join( checkpoint_dir, "RL_checkpoint_" + str(state["epochs"])) utils.save_model(policy_network, PG_optimizer, state, RL_checkpoint_path) PG_lr_scheduler.step() writer.add_scalar("same", same, state["epochs"]) writer.add_scalar("avg_origin_loss", avg_origin_loss, state["epochs"]) writer.add_scalar("val_enhance_pesq", choose_val[0], state["epochs"]) writer.add_scalar("val_improve_pesq", choose_val[1], state["epochs"]) writer.add_scalar("val_enhance_stoi", choose_val[2], state["epochs"]) writer.add_scalar("val_improve_stoi", choose_val[3], state["epochs"]) writer.add_scalar("val_enhance_SISDR", choose_val[4], state["epochs"]) writer.add_scalar("val_improve_SISDR", choose_val[5], state["epochs"]) # writer.add_scalar("val_COPY_pesq",val_metrics_copy[0], state["epochs"]) writer.add_scalar("val_loss", val_loss, state["epochs"]) # Set up training state dict that will also be saved into checkpoints checkpoint_path = os.path.join( checkpoint_dir, "checkpoint_" + str(state["epochs"])) if choose_val[0] < state["best_pesq"]: state["worse_epochs"] += 1 else: print("MODEL IMPROVED ON VALIDATION SET!") state["worse_epochs"] = 0 state["best_pesq"] = choose_val[0] state["best_checkpoint"] = checkpoint_path # CHECKPOINT print("Saving model...") utils.save_model(student_KD, KD_optimizer, state, checkpoint_path) print('dump alpha_memory') with open(os.path.join(log_dir, 'alpha_' + str(state["epochs"])), "wb") as fp: #Pickling pickle.dump(memory_alpha, fp) state["epochs"] += 1 writer.close() info = args.model_name path = os.path.join(result_dir, info) else: PATH = args.load_model.split("/") info = PATH[-3] + "_" + PATH[-1] if (args.outside_test == True): info += "_outside_test" print(info) path = os.path.join(result_dir, info) # test_data = SeparationDataset(dataset, "test", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func) #### TESTING #### # Test loss print("TESTING") # eval metrics #ling_data=get_ling_data_list('/media/hd03/sutsaiwei_data/data/mydata/ling_data') #validate(args, student_KD, criterion, test_data) #test_metrics = ling_evaluate(args, ling_data['noisy'], student_KD) #test_metrics = evaluate_without_noisy(args, dataset["test"], student_KD) test_metrics = evaluate(args, dataset["test"], student_KD) test_pesq = test_metrics['pesq'] test_stoi = test_metrics['stoi'] test_SISDR = test_metrics['SISDR'] test_noise = test_metrics['noise'] if not os.path.exists(path): os.makedirs(path) utils.save_result(test_pesq, path, "pesq") utils.save_result(test_stoi, path, "stoi") utils.save_result(test_SISDR, path, "SISDR") utils.save_result(test_noise, path, "noise")
def train(epochs, batch_size, lr, log_interval=200): # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # cifar_norm_mean = (0.49139968, 0.48215827, 0.44653124) # cifar_norm_std = (0.24703233, 0.24348505, 0.26158768) # transform_train = transforms.Compose([ # transforms.Resize(224), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize(cifar_norm_mean, cifar_norm_std), ]) transform_test = transforms.Compose([ # transforms.Resize(224), transforms.ToTensor(), # transforms.Normalize(cifar_norm_mean, cifar_norm_std), ]) # 构造训练集 cifar10 = MyCIFAR10.MyCIFAR10('./data/cifar-10-batches-py', device, train=True, transform=transform_train) train_loader = torch.utils.data.DataLoader(dataset=cifar10, batch_size=batch_size, shuffle=True) cifar10 = MyCIFAR10.MyCIFAR10('./data/cifar-10-batches-py', device, train=False, transform=transform_test) test_loader = torch.utils.data.DataLoader(dataset=cifar10, batch_size=batch_size, shuffle=False) # 构建模型并开启dropout训练模式 model = VGG.VGG().to(device) model.train() # 用交叉熵作为损失,Adam优化器作为优化器 criterion = nn.CrossEntropyLoss() # optimizer = torch.optim.Adam(model.parameters(), lr=lr) optimizer = torch.optim.SGD(model.parameters(), lr=lr) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, 0.5, last_epoch=-1) trainlog = [] testlog = [] accuracylog = [] try: total_step = len(train_loader) for epoch in range(epochs): trainloss = 0.0 for i, (images, labels) in enumerate(train_loader): # 若GPU可用,拷贝数据至GPU images = images.to(device) labels = labels.to(device) # 将梯度缓存置0 optimizer.zero_grad() # 执行一次前向传播 output = model(images) # 计算loss loss = criterion(output, labels) trainloss += loss # 反向传播 loss.backward() # 更新权值 optimizer.step() # 打印loss信息 if (i + 1) % log_interval == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch + 1, epochs, i + 1, total_step, loss.item())) # 每个epoch计算一次平均Loss print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, epochs, trainloss / len(train_loader))) trainlog.append((trainloss / len(train_loader)).item()) testcost, accuracy = test.validate(model, test_loader, device, showloss=True) testlog.append(testcost.item()) accuracylog.append(accuracy) model.train() # scheduler.step() # ctrl + C 可停止训练并保存 except KeyboardInterrupt: print("Save.....") torch.save(model.state_dict(), os.path.join('./checkpoints', 'Interrupt.ckpt')) text_save('trainlogs.txt', trainlog) text_save('testlogs.txt', testlog) text_save('accuracylogs.txt', accuracylog) exit(0) text_save('trainlogs.txt', trainlog) text_save('testlogs.txt', testlog) text_save('accuracylogs.txt', accuracylog) return model
def train(exp_dict): history = ms.load_history(exp_dict) # Source src_trainloader, src_valloader = ms.load_src_loaders(exp_dict) ##################### ## Train source model ##################### src_model, src_opt = ms.load_model_src(exp_dict) # Train Source for e in range(history["src_train"][-1]["epoch"], exp_dict["src_epochs"]): train_dict = ts.fit_src(src_model, src_trainloader, src_opt) loss = train_dict["loss"] print("Source ({}) - Epoch [{}/{}] - loss={:.2f}".format( type(src_trainloader).__name__, e, exp_dict["src_epochs"], loss)) history["src_train"] += [{"loss": loss, "epoch": e}] if e % 50 == 0: ms.save_model_src(exp_dict, history, src_model, src_opt) # Test Source src_acc = test.validate(src_model, src_model, src_trainloader, src_valloader) print("{} TEST Accuracy = {:2%}\n".format(exp_dict["src_dataset"], src_acc)) history["src_acc"] = src_acc ms.save_model_src(exp_dict, history, src_model, src_opt) ##################### ## Train Target model ##################### tgt_trainloader, tgt_valloader = ms.load_tgt_loaders(exp_dict) # load models tgt_model, tgt_opt, disc_model, disc_opt = ms.load_model_tgt(exp_dict) tgt_model.load_state_dict(src_model.state_dict()) for e in range(history["tgt_train"][-1]["epoch"], exp_dict["tgt_epochs"] + 1): # 1. Train disc if exp_dict["options"]["disc"] == True: tg.fit_disc(src_model, tgt_model, disc_model, src_trainloader, tgt_trainloader, opt_tgt=tgt_opt, opt_disc=disc_opt, epochs=3, verbose=0) acc_tgt = test.validate(src_model, tgt_model, src_trainloader, tgt_valloader) history["tgt_train"] += [{ "epoch": e, "acc_src": src_acc, "acc_tgt": acc_tgt, "n_train - " + exp_dict["src_dataset"]: len(src_trainloader.dataset), "n_train - " + exp_dict["tgt_dataset"]: len(tgt_trainloader.dataset), "n_test - " + exp_dict["tgt_dataset"]: len(tgt_valloader.dataset) }] print("\n>>> Methods: {} - Source: {} -> Target: {}".format( None, exp_dict["src_dataset"], exp_dict["tgt_dataset"])) print(pd.DataFrame([history["tgt_train"][-1]])) if (e % 5) == 0: ms.save_model_tgt(exp_dict, history, tgt_model, tgt_opt, disc_model, disc_opt) #ms.test_latest_model(exp_dict) # 2. Train center-magnet if exp_dict["options"]["center"] == True: tg.fit_center(src_model, tgt_model, src_trainloader, tgt_trainloader, tgt_opt, epochs=1) ms.save_model_tgt(exp_dict, history, tgt_model, tgt_opt, disc_model, disc_opt) exp_dict["reset_src"] = 0 exp_dict["reset_tgt"] = 0 ms.test_latest_model(exp_dict)
def main(args): #torch.backends.cudnn.benchmark=True # This makes dilated conv much faster for CuDNN 7.5 # MODEL num_features = [args.features*i for i in range(1, args.levels+1)] if args.feature_growth == "add" else \ [args.features*2**i for i in range(0, args.levels)] target_outputs = int(args.output_size * args.sr) model = Waveunet(args.channels, num_features, args.channels, args.instruments, kernel_size=args.kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res, separate=args.separate, difference_output=args.difference_output) if args.cuda: model = utils.DataParallel(model) print("move model to gpu") model.cuda() print('model: ', model) print('parameter count: ', str(sum(p.numel() for p in model.parameters()))) writer = SummaryWriter(args.log_dir) ### DATASET musdb = get_musdb_folds(args.dataset_dir) # If not data augmentation, at least crop targets to fit model output shape crop_func = partial(crop, shapes=model.shapes) # Data augmentation function for training augment_func = partial(random_amplify, shapes=model.shapes, min=0.7, max=1.0) train_data = SeparationDataset(musdb, "train", args.instruments, args.sr, args.channels, model.shapes, True, args.hdf_dir, audio_transform=augment_func) val_data = SeparationDataset(musdb, "val", args.instruments, args.sr, args.channels, model.shapes, False, args.hdf_dir, audio_transform=crop_func) test_data = SeparationDataset(musdb, "test", args.instruments, args.sr, args.channels, model.shapes, False, args.hdf_dir, audio_transform=crop_func) dataloader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, worker_init_fn=utils.worker_init_fn) ##### TRAINING #### # Set up the loss function if args.loss == "L1": criterion = nn.L1Loss() elif args.loss == "L2": criterion = nn.MSELoss() elif args.loss == "SI-SDR": criterion = compute_si_sdr elif args.loss == "entropy": criterion = compute_L1_entropy(int(args.loss_arg_1), args.loss_arg_2) elif args.loss == "L1_time": criterion = compute_L1_time(args.loss_arg_1) elif args.loss == "multi_spec": criterion = compute_multi_scale_spectral_loss() else: raise NotImplementedError("Couldn't find this loss!") # Set up optimiser optimizer = Adam(params=model.parameters(), lr=args.lr) # Set up training state dict that will also be saved into checkpoints state = {"step" : 0, "worse_epochs" : 0, "epochs" : 0, "best_loss" : np.Inf} # LOAD MODEL CHECKPOINT IF DESIRED if args.load_model is not None: print("Continuing training full model from checkpoint " + str(args.load_model)) if args.load_state_only: state = utils.load_model(model, None, args.load_model, args.cuda) optimizer = Adam(params=model.parameters(), lr=args.lr) state = {'step': 0, "worse_epochs" : 0, "epochs" : 0, "best_loss" : np.Inf} else: state = utils.load_model(model, optimizer, args.load_model, args.cuda) print('TRAINING START') while state["worse_epochs"] < args.patience: print("Training one epoch from iteration " + str(state["step"])) avg_time = 0. model.train() with tqdm(total=len(train_data) // args.batch_size) as pbar: np.random.seed() for example_num, (x, targets) in enumerate(dataloader): if args.cuda: x = x.cuda() for k in list(targets.keys()): targets[k] = targets[k].cuda() t = time.time() # Set LR for this iteration utils.set_cyclic_lr(optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) writer.add_scalar("lr", utils.get_lr(optimizer), state["step"]) # Compute loss for each instrument/model optimizer.zero_grad() outputs, avg_loss = utils.compute_loss(model, x, targets, criterion, compute_grad=True) optimizer.step() state["step"] += 1 t = time.time() - t avg_time += (1. / float(example_num + 1)) * (t - avg_time) writer.add_scalar("train_loss", avg_loss, state["step"]) if example_num % args.example_freq == 0: input_centre = torch.mean(x[0, :, model.shapes["output_start_frame"]:model.shapes["output_end_frame"]], 0) # Stereo not supported for logs yet writer.add_audio("input", input_centre, state["step"], sample_rate=args.sr) for inst in outputs.keys(): writer.add_audio(inst + "_pred", torch.mean(outputs[inst][0], 0), state["step"], sample_rate=args.sr) writer.add_audio(inst + "_target", torch.mean(targets[inst][0], 0), state["step"], sample_rate=args.sr) pbar.update(1) # VALIDATE val_loss = validate(args, model, criterion, val_data) print("VALIDATION FINISHED: LOSS: " + str(val_loss)) writer.add_scalar("val_loss", val_loss, state["step"]) # EARLY STOPPING CHECK checkpoint_path = os.path.join(args.checkpoint_dir, "checkpoint_" + str(state["step"])) if val_loss >= state["best_loss"]: state["worse_epochs"] += 1 else: print("MODEL IMPROVED ON VALIDATION SET!") state["worse_epochs"] = 0 state["best_loss"] = val_loss state["best_checkpoint"] = checkpoint_path # CHECKPOINT print("Saving model...") utils.save_model(model, optimizer, state, checkpoint_path) state["epochs"] += 1 #### TESTING #### # Test loss print("TESTING") # Load best model based on validation loss state = utils.load_model(model, None, state["best_checkpoint"], args.cuda) test_loss = validate(args, model, criterion, test_data) print("TEST FINISHED: LOSS: " + str(test_loss)) writer.add_scalar("test_loss", test_loss, state["step"]) # Mir_eval metrics test_metrics = evaluate(args, musdb["test"], model, args.instruments) # Dump all metrics results into pickle file for later analysis if needed with open(os.path.join(args.checkpoint_dir, "results.pkl"), "wb") as f: pickle.dump(test_metrics, f) # Write most important metrics into Tensorboard log avg_SDRs = {inst : np.mean([np.nanmean(song[inst]["SDR"]) for song in test_metrics]) for inst in args.instruments} avg_SIRs = {inst : np.mean([np.nanmean(song[inst]["SIR"]) for song in test_metrics]) for inst in args.instruments} for inst in args.instruments: writer.add_scalar("test_SDR_" + inst, avg_SDRs[inst], state["step"]) writer.add_scalar("test_SIR_" + inst, avg_SIRs[inst], state["step"]) overall_SDR = np.mean([v for v in avg_SDRs.values()]) writer.add_scalar("test_SDR", overall_SDR) print("SDR: " + str(overall_SDR)) writer.close()
def main(): # Parse options args = Options().parse() print('Parameters:\t' + str(args)) if args.filter_sketch: assert args.dataset == 'Sketchy' if args.split_eccv_2018: assert args.dataset == 'Sketchy_extended' or args.dataset == 'Sketchy' if args.gzs_sbir: args.test = True # Read the config file and config = utils.read_config() path_dataset = config['path_dataset'] path_aux = config['path_aux'] # modify the log and check point paths ds_var = None if '_' in args.dataset: token = args.dataset.split('_') args.dataset = token[0] ds_var = token[1] str_aux = '' if args.split_eccv_2018: str_aux = 'split_eccv_2018' if args.gzs_sbir: str_aux = os.path.join(str_aux, 'generalized') args.semantic_models = sorted(args.semantic_models) model_name = '+'.join(args.semantic_models) root_path = os.path.join(path_dataset, args.dataset) path_sketch_model = os.path.join(path_aux, 'CheckPoints', args.dataset, 'sketch') path_image_model = os.path.join(path_aux, 'CheckPoints', args.dataset, 'image') path_cp = os.path.join(path_aux, 'CheckPoints', args.dataset, str_aux, model_name, str(args.dim_out)) path_log = os.path.join(path_aux, 'LogFiles', args.dataset, str_aux, model_name, str(args.dim_out)) path_results = os.path.join(path_aux, 'Results', args.dataset, str_aux, model_name, str(args.dim_out)) files_semantic_labels = [] sem_dim = 0 for f in args.semantic_models: fi = os.path.join(path_aux, 'Semantic', args.dataset, f + '.npy') files_semantic_labels.append(fi) sem_dim += list(np.load(fi, allow_pickle=True).item().values())[0].shape[0] print('Checkpoint path: {}'.format(path_cp)) print('Logger path: {}'.format(path_log)) print('Result path: {}'.format(path_results)) # Parameters for transforming the images transform_image = transforms.Compose( [transforms.Resize((args.im_sz, args.im_sz)), transforms.ToTensor()]) transform_sketch = transforms.Compose( [transforms.Resize((args.sk_sz, args.sk_sz)), transforms.ToTensor()]) # Load the dataset print('Loading data...', end='') if args.dataset == 'Sketchy': if ds_var == 'extended': photo_dir = 'extended_photo' # photo or extended_photo photo_sd = '' else: photo_dir = 'photo' photo_sd = 'tx_000000000000' sketch_dir = 'sketch' sketch_sd = 'tx_000000000000' splits = utils.load_files_sketchy_zeroshot( root_path=root_path, split_eccv_2018=args.split_eccv_2018, photo_dir=photo_dir, sketch_dir=sketch_dir, photo_sd=photo_sd, sketch_sd=sketch_sd) elif args.dataset == 'TU-Berlin': photo_dir = 'images' sketch_dir = 'sketches' photo_sd = '' sketch_sd = '' splits = utils.load_files_tuberlin_zeroshot(root_path=root_path, photo_dir=photo_dir, sketch_dir=sketch_dir, photo_sd=photo_sd, sketch_sd=sketch_sd) else: raise Exception('Wrong dataset.') # Combine the valid and test set into test set splits['te_fls_sk'] = np.concatenate( (splits['va_fls_sk'], splits['te_fls_sk']), axis=0) splits['te_clss_sk'] = np.concatenate( (splits['va_clss_sk'], splits['te_clss_sk']), axis=0) splits['te_fls_im'] = np.concatenate( (splits['va_fls_im'], splits['te_fls_im']), axis=0) splits['te_clss_im'] = np.concatenate( (splits['va_clss_im'], splits['te_clss_im']), axis=0) if args.gzs_sbir: perc = 0.2 _, idx_sk = np.unique(splits['tr_fls_sk'], return_index=True) tr_fls_sk_ = splits['tr_fls_sk'][idx_sk] tr_clss_sk_ = splits['tr_clss_sk'][idx_sk] _, idx_im = np.unique(splits['tr_fls_im'], return_index=True) tr_fls_im_ = splits['tr_fls_im'][idx_im] tr_clss_im_ = splits['tr_clss_im'][idx_im] if args.dataset == 'Sketchy' and args.filter_sketch: _, idx_sk = np.unique([f.split('-')[0] for f in tr_fls_sk_], return_index=True) tr_fls_sk_ = tr_fls_sk_[idx_sk] tr_clss_sk_ = tr_clss_sk_[idx_sk] idx_sk = np.sort( np.random.choice(tr_fls_sk_.shape[0], int(perc * splits['te_fls_sk'].shape[0]), replace=False)) idx_im = np.sort( np.random.choice(tr_fls_im_.shape[0], int(perc * splits['te_fls_im'].shape[0]), replace=False)) splits['te_fls_sk'] = np.concatenate( (tr_fls_sk_[idx_sk], splits['te_fls_sk']), axis=0) splits['te_clss_sk'] = np.concatenate( (tr_clss_sk_[idx_sk], splits['te_clss_sk']), axis=0) splits['te_fls_im'] = np.concatenate( (tr_fls_im_[idx_im], splits['te_fls_im']), axis=0) splits['te_clss_im'] = np.concatenate( (tr_clss_im_[idx_im], splits['te_clss_im']), axis=0) # class dictionary dict_clss = utils.create_dict_texts(splits['tr_clss_im']) data_train = DataGeneratorPaired(args.dataset, root_path, photo_dir, sketch_dir, photo_sd, sketch_sd, splits['tr_fls_sk'], splits['tr_fls_im'], splits['tr_clss_im'], transforms_sketch=transform_sketch, transforms_image=transform_image) data_valid_sketch = DataGeneratorSketch(args.dataset, root_path, sketch_dir, sketch_sd, splits['va_fls_sk'], splits['va_clss_sk'], transforms=transform_sketch) data_valid_image = DataGeneratorImage(args.dataset, root_path, photo_dir, photo_sd, splits['va_fls_im'], splits['va_clss_im'], transforms=transform_image) data_test_sketch = DataGeneratorSketch(args.dataset, root_path, sketch_dir, sketch_sd, splits['te_fls_sk'], splits['te_clss_sk'], transforms=transform_sketch) data_test_image = DataGeneratorImage(args.dataset, root_path, photo_dir, photo_sd, splits['te_fls_im'], splits['te_clss_im'], transforms=transform_image) print('Done') train_sampler = WeightedRandomSampler(data_train.get_weights(), num_samples=args.epoch_size * args.batch_size, replacement=True) # PyTorch train loader train_loader = DataLoader(dataset=data_train, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.num_workers, pin_memory=True) # PyTorch valid loader for sketch valid_loader_sketch = DataLoader(dataset=data_valid_sketch, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # PyTorch valid loader for image valid_loader_image = DataLoader(dataset=data_valid_image, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # PyTorch test loader for sketch test_loader_sketch = DataLoader(dataset=data_test_sketch, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # PyTorch test loader for image test_loader_image = DataLoader(dataset=data_test_image, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # Model parameters params_model = dict() # Paths to pre-trained sketch and image models params_model['path_sketch_model'] = path_sketch_model params_model['path_image_model'] = path_image_model # Dimensions params_model['dim_out'] = args.dim_out params_model['sem_dim'] = sem_dim # Number of classes params_model['num_clss'] = len(dict_clss) # Weight (on losses) parameters params_model['lambda_se'] = args.lambda_se params_model['lambda_im'] = args.lambda_im params_model['lambda_sk'] = args.lambda_sk params_model['lambda_gen_cyc'] = args.lambda_gen_cyc params_model['lambda_gen_adv'] = args.lambda_gen_adv params_model['lambda_gen_cls'] = args.lambda_gen_cls params_model['lambda_gen_reg'] = args.lambda_gen_reg params_model['lambda_disc_se'] = args.lambda_disc_se params_model['lambda_disc_sk'] = args.lambda_disc_sk params_model['lambda_disc_im'] = args.lambda_disc_im params_model['lambda_regular'] = args.lambda_regular # Optimizers' parameters params_model['lr'] = args.lr params_model['momentum'] = args.momentum params_model['milestones'] = args.milestones params_model['gamma'] = args.gamma # Files with semantic labels params_model['files_semantic_labels'] = files_semantic_labels # Class dictionary params_model['dict_clss'] = dict_clss # Model sem_pcyc_model = SEM_PCYC(params_model) cudnn.benchmark = True # Logger print('Setting logger...', end='') logger = Logger(path_log, force=True) print('Done') # Check cuda print('Checking cuda...', end='') # Check if CUDA is enabled if args.ngpu > 0 & torch.cuda.is_available(): print('*Cuda exists*...', end='') sem_pcyc_model = sem_pcyc_model.cuda() print('Done') best_map = 0 early_stop_counter = 0 # Epoch for loop if not args.test: print('***Train***') for epoch in range(args.epochs): sem_pcyc_model.scheduler_gen.step() sem_pcyc_model.scheduler_disc.step() sem_pcyc_model.scheduler_ae.step() # train on training set losses = train(train_loader, sem_pcyc_model, epoch, args) # evaluate on validation set, map_ since map is already there print('***Validation***') valid_data = validate(valid_loader_sketch, valid_loader_image, sem_pcyc_model, epoch, args) map_ = np.mean(valid_data['aps@all']) print( 'mAP@all on validation set after {0} epochs: {1:.4f} (real), {2:.4f} (binary)' .format(epoch + 1, map_, np.mean(valid_data['aps@all_bin']))) del valid_data if map_ > best_map: best_map = map_ early_stop_counter = 0 utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': sem_pcyc_model.state_dict(), 'best_map': best_map }, directory=path_cp) else: if args.early_stop == early_stop_counter: break early_stop_counter += 1 # Logger step logger.add_scalar('semantic autoencoder loss', losses['aut_enc'].avg) logger.add_scalar('generator adversarial loss', losses['gen_adv'].avg) logger.add_scalar('generator cycle consistency loss', losses['gen_cyc'].avg) logger.add_scalar('generator classification loss', losses['gen_cls'].avg) logger.add_scalar('generator regression loss', losses['gen_reg'].avg) logger.add_scalar('generator loss', losses['gen'].avg) logger.add_scalar('semantic discriminator loss', losses['disc_se'].avg) logger.add_scalar('sketch discriminator loss', losses['disc_sk'].avg) logger.add_scalar('image discriminator loss', losses['disc_im'].avg) logger.add_scalar('discriminator loss', losses['disc'].avg) logger.add_scalar('mean average precision', map_) logger.step() # load the best model yet best_model_file = os.path.join(path_cp, 'model_best.pth') if os.path.isfile(best_model_file): print("Loading best model from '{}'".format(best_model_file)) checkpoint = torch.load(best_model_file) epoch = checkpoint['epoch'] best_map = checkpoint['best_map'] sem_pcyc_model.load_state_dict(checkpoint['state_dict']) print("Loaded best model '{0}' (epoch {1}; mAP@all {2:.4f})".format( best_model_file, epoch, best_map)) print('***Test***') valid_data = validate(test_loader_sketch, test_loader_image, sem_pcyc_model, epoch, args) print( 'Results on test set: mAP@all = {1:.4f}, Prec@100 = {0:.4f}, mAP@200 = {3:.4f}, Prec@200 = {2:.4f}, ' 'Time = {4:.6f} || mAP@all (binary) = {6:.4f}, Prec@100 (binary) = {5:.4f}, mAP@200 (binary) = {8:.4f}, ' 'Prec@200 (binary) = {7:.4f}, Time (binary) = {9:.6f} '.format( valid_data['prec@100'], np.mean(valid_data['aps@all']), valid_data['prec@200'], np.mean(valid_data['aps@200']), valid_data['time_euc'], valid_data['prec@100_bin'], np.mean(valid_data['aps@all_bin']), valid_data['prec@200_bin'], np.mean(valid_data['aps@200_bin']), valid_data['time_bin'])) print('Saving qualitative results...', end='') path_qualitative_results = os.path.join(path_results, 'qualitative_results') utils.save_qualitative_results(root_path, sketch_dir, sketch_sd, photo_dir, photo_sd, splits['te_fls_sk'], splits['te_fls_im'], path_qualitative_results, valid_data['aps@all'], valid_data['sim_euc'], valid_data['str_sim'], save_image=args.save_image_results, nq=args.number_qualit_results, best=args.save_best_results) print('Done') else: print( "No best model found at '{}'. Exiting...".format(best_model_file)) exit()
sample_rate=args.sr) for inst in outputs.keys(): writer.add_audio(inst + "_pred", torch.mean(outputs[inst][0], 0), state["step"], sample_rate=args.sr) writer.add_audio(inst + "_target", torch.mean(targets[inst][0], 0), state["step"], sample_rate=args.sr) pbar.update(1) # VALIDATE val_loss = validate(args, model, criterion, val_data) print("VALIDATION FINISHED: LOSS: " + str(val_loss)) writer.add_scalar("val_loss", val_loss, state["step"]) # EARLY STOPPING CHECK checkpoint_path = os.path.join(args.checkpoint_dir, "checkpoint_" + str(state["step"])) if val_loss >= state["best_loss"]: state["worse_epochs"] += 1 else: print("MODEL IMPROVED ON VALIDATION SET!") state["worse_epochs"] = 0 state["best_loss"] = val_loss state["best_checkpoint"] = checkpoint_path # CHECKPOINT
def main(args): best_acc1 = 0 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device: {}'.format(device)) # create model print("=> creating model DFL-CNN...") model = DFL_VGG16(nclass=cfg.NUM_CLASSES) model = model.to(device) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # Data loading code traindir = cfg.TRAIN_DATASET_DIR valdir = cfg.VAL_DATASET_DIR normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = dataset.RPC_SINGLE( traindir, transforms.Compose([ transforms.RandomResizedCrop(448), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) val_dataset = dataset.RPC_SINGLE( valdir, transforms.Compose([ transforms.Resize(512), transforms.CenterCrop(448), transforms.ToTensor(), normalize, ])) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) logger = Logger('./logs') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args) # train for one epoch train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set val_loss, val_acc = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = val_acc > best_acc1 best_acc1 = max(val_acc, best_acc1) # log info = { 'train_loss': float(train_loss), 'train_acc': float(train_acc), 'val_loss': float(val_loss), 'val_acc': float(val_acc) } for tag, value in info.items(): logger.scalar_summary(tag, value, epoch) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)
def main(): print('Training Process\nInitializing...\n') config.init_env() config.print_paras() train_dataset = view_data(config.view_net.data_root, status=STATUS_TRAIN, base_model_name=config.base_model_name) val_dataset = view_data(config.view_net.data_root, status=STATUS_TEST, base_model_name=config.base_model_name) train_loader = DataLoader(train_dataset, batch_size=config.view_net.train.batch_sz, num_workers=config.num_workers, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=config.view_net.train.batch_sz, num_workers=config.num_workers, shuffle=True) best_map = 0 best_prec1 = 0 resume_epoch = 0 # create model net = Net(pretrained=True) net = net.to(device=config.device) net = nn.DataParallel(net) optimizer = optim.SGD(net.parameters(), config.view_net.train.lr, momentum=config.view_net.train.momentum, weight_decay=config.view_net.train.weight_decay) # optimizer = optim.Adam(net.parameters(), config.view_net.train.lr, # weight_decay=config.view_net.train.weight_decay) if config.view_net.train.resume: print(f'loading pretrained model from {config.view_net.ckpt_file}') checkpoint = torch.load(config.view_net.ckpt_file, 'cpu') net.module.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) best_map = checkpoint['best_map'] best_prec1 = checkpoint['best_prec1'] if config.view_net.train.resume_epoch is not None: resume_epoch = config.view_net.train.resume_epoch else: resume_epoch = checkpoint['epoch'] + 1 lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, 0.5) # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [5, 9, 12], 0.3) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device=config.device) # for p in net.module.feature.parameters(): # p.requires_grad = False for epoch in range(resume_epoch, config.view_net.train.max_epoch): if epoch >= 5: for p in net.parameters(): p.requires_grad = True lr_scheduler.step(epoch=epoch) train(train_loader, net, criterion, optimizer, epoch) with torch.no_grad(): prec1, mAP = validate(val_loader, net) # save checkpoints if best_map < mAP: best_map = mAP if best_prec1 < prec1: best_prec1 = prec1 save_ckpt(epoch, best_prec1, best_map, net, optimizer) # save_record(epoch, prec1, net.module) print('curr accuracy: ', prec1) print('best accuracy: ', best_prec1) print('best map: ', best_map) print('Train Finished!')
def train_val(cfg: DictConfig) -> None: # create dataloaders for training and validation loader_train, vocabs = create_dataloader( hydra.utils.to_absolute_path(cfg.path_train), "train", cfg.encoder, None, cfg.batch_size, cfg.num_workers, ) assert vocabs is not None loader_val, _ = create_dataloader( hydra.utils.to_absolute_path(cfg.path_val), "val", cfg.encoder, vocabs, cfg.eval_batch_size, cfg.num_workers, ) # create the model model = Parser(vocabs, cfg) device, _ = get_device() model.to(device) log.info("\n" + str(model)) log.info("#parameters = %d" % count_params(model)) # create the optimizer optimizer = torch.optim.RMSprop( model.parameters(), lr=cfg.learning_rate, weight_decay=cfg.weight_decay, ) start_epoch = 0 if cfg.resume is not None: # resume training from a checkpoint checkpoint = load_model(cfg.resume) model.load_state_dict(checkpoint["model_state"]) start_epoch = checkpoint["epoch"] + 1 optimizer.load_state_dict(checkpoint["optimizer_state"]) del checkpoint scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", factor=0.5, patience=cfg.learning_rate_patience, cooldown=cfg.learning_rate_cooldown, verbose=True, ) # start training and validation best_f1_score = -1.0 num_iters = 0 for epoch in range(start_epoch, cfg.num_epochs): log.info("Epoch #%d" % epoch) if not cfg.skip_training: log.info("Training..") num_iters, accuracy_train, loss_train = train( num_iters, loader_train, model, optimizer, vocabs["label"], cfg, ) log.info("Action accuracy: %.03f, Loss: %.03f" % (accuracy_train, loss_train)) log.info("Validating..") f1_score_val = validate(loader_val, model, cfg) log.info( "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f" % ( f1_score_val.fscore, f1_score_val.complete_match, f1_score_val.precision, f1_score_val.recall, )) if f1_score_val.fscore > best_f1_score: log.info("F1 score has improved") best_f1_score = f1_score_val.fscore scheduler.step(best_f1_score) save_checkpoint( "model_latest.pth", epoch, model, optimizer, f1_score_val.fscore, vocabs, cfg, )
#!/usr/bin/env python from simplejson import load from test import validate f = open("/home/chris/Downloads/biens.json", "r") o = load(f) f.close() print validate(o)