def train(args): my_dataset = MyDataset("../data/train", transform=x_transforms, target_transform=y_transforms) dataloaders = DataLoader(my_dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) model = Unet(3, 1).to(device) model.train() criterion = torch.nn.BCELoss() optimizer = optim.Adam(model.parameters()) num_epochs = args.epochs for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) data_size = len(dataloaders.dataset) epoch_loss = 0 step = 0 for x, y in dataloaders: step += 1 inputs = x.to(device) lables = y.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, lables) loss.backward() optimizer.step() epoch_loss += loss.item() print("%d/%d, train_loss:%0.3f" % (step, (data_size - 1) // dataloaders.batch_size + 1, loss.item())) print("epoch %d loss:%0.3f" % (epoch, epoch_loss)) torch.save(model.state_dict(), 'model_weights.pth') return model
def main(_): pp.pprint(FLAGS.__flags) if FLAGS.height is None: FLAGS.height = FLAGS.width unet = Unet(width=FLAGS.width, height=FLAGS.height, learning_rate=FLAGS.learning_rate, data_set=FLAGS.data_set, test_set=FLAGS.test_set, result_name=FLAGS.result_name, ckpt_dir=FLAGS.ckpt_dir, logs_step=FLAGS.logs_step, restore_step=FLAGS.restore_step, hidden_num=FLAGS.hidden_num, epoch_num=FLAGS.epoch_num, batch_size=FLAGS.batch_size, num_gpu=FLAGS.num_gpu, is_train=FLAGS.is_train, w_bn=FLAGS.w_bn) show_all_variables() if FLAGS.is_train: unet.train() else: unet.test()
def train(): save_dir = "/home/FuDawei/NLP/SQUAD/unet/data/" train_examples, dev_examples, opt = prepare_train(save_dir) epoch = 30 batch_size = 32 model = Unet(opt=opt).to(device) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adamax(parameters, lr=opt["lr"]) best_score, exact_scores, f1_scores = 0, [], [] count = 0 total_loss = 0 for ep in range(epoch): model.train() for batch_data in get_batch_data(train_examples, batch_size): data = model.get_data(batch_data) loss = model(data) model.zero_grad() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(parameters, 10) optimizer.step() model.reset_parameters() count += 1 # print(loss.item()) # Evaluate(dev_examples, model) total_loss += loss.item() if count % 100 == 0: print(total_loss / 100) total_loss = 0 # model.eval() # Evaluate(dev_examples, model, opt) if not opt["fix_word_embedding"]: model.reset_parameters() print(ep) model.eval() exact, f1 = Evaluate(dev_examples, model, opt) exact_scores.append(exact) f1_scores.append(f1) if f1 > best_score: best_score = f1 torch.save(model.state_dict(), save_dir + "best_model") with open(save_dir + '_f1_scores.pkl', 'wb') as f: pkl.dump(f1_scores, f) with open(save_dir + '_exact_scores.pkl', 'wb') as f: pkl.dump(exact_scores, f)
optimizer_s = torch.optim.Adam(unet.parameters(), lr=0.0002) generator.cuda() discriminator.cuda() unet.cuda() EPOCH = 100 num_iter = len(train_loader) D_LOSS = [] G_LOSS = [] # S_LOSS=[] f = open("./loss_gan.txt", 'a') print(time.strftime('|---------%Y-%m-%d %H:%M:%S---------|', time.localtime(time.time())), file=f) discriminator.train() unet.train() for epoch in range(EPOCH): if epoch == 30: update_lr(optimizer_g, 0.0001) update_lr(optimizer_d, 0.0001) update_lr(optimizer_s, 0.0001) print('change lr to :', optimizer_g.param_groups[0]['lr']) elif epoch == 60: update_lr(optimizer_g, 0.00005) update_lr(optimizer_d, 0.00005) update_lr(optimizer_s, 0.00005) print('change lr to :', optimizer_g.param_groups[0]['lr']) elif epoch == 90: update_lr(optimizer_g, 0.00001) update_lr(optimizer_d, 0.00001) update_lr(optimizer_s, 0.00001)
# train data check # train_df, valid_df = split(csv_path=label_path, train_dir=train_image_dir) balanced_df = under_sample(csv_path=label_path, train_dir=train_image_dir) # balanced_df['ships'].hist(bins=np.arange(10)) # plt.show() train_gen = make_image_gen(balanced_df) train_x, train_y = next(train_gen) print('x', train_x.shape, train_x.min(), train_x.max()) print('y', train_y.shape, train_y.min(), train_y.max()) # valid data check valid_x, valid_y = next(make_image_gen(balanced_df, batch_size=VALID_IMG_COUNT)) print(valid_x.shape, valid_y.shape) # augment data check cur_gen = create_aug_gen(train_gen) t_x, t_y = next(cur_gen) print('x', t_x.shape, t_x.dtype, t_x.min(), t_x.max()) print('y', t_y.shape, t_y.dtype, t_y.min(), t_y.max()) model = Unet(t_x.shape[1:]) loss_history = model.train(balanced_train_df=balanced_df, valid_x=valid_x, valid_y=valid_y, make_image_gen=make_image_gen, create_aug_gen=create_aug_gen) gc.collect() # show_loss(loss_history)
net.load_state_dict(dict_model['net']) optim.load_state_dict(dict_model['optim']) epoch = int(ckpt_lst[-1].split('epoch')[1].split('.pth')[0]) return net,optim, epoch #네트워크 학습 st_epoch = 0 if mode == 'train': if train_continue == 'on': net, optim, st_epoch = load(ckpt_dir=ckpt_dir, net=net, optim=optim) for epoch in range(st_epoch+1, num_epoch+1): net.train() loss_arr=[] for batch, data in enumerate(loader_train, 1): label = data['label'].to(device) input = data['input'].to(device) output = net(input) optim.zero_grad() loss = fn_loss(output, label) loss.backward() optim.step() loss_arr += [loss.item()]
lr = get_learning_rate(epoch) for optimizer in optimizers: optimizer.param_groups[0]['lr'] = lr print("learning rate = {}".format(lr)) # dataiter = iter(train_dataloader) # data = dataiter.next() for batch_idx, data in enumerate(train_dataloader): real_A = data['A'].squeeze(0).cuda(cuda) # 40962*1 real_B = data['B'].squeeze(0).cuda(cuda) # 40962*1 target_real = torch.tensor(1.0).cuda(cuda) target_false = torch.tensor(0.0).cuda(cuda) netG_A.train() netG_B.train() netD_A.train() netD_B.train() """Run forward pass; called by both functions""" fake_B = netG_A(real_A) # G_A(A) # 40962*1 rec_A = netG_B(fake_B) # G_B(G_A(A)) # 40962*1 fake_A = netG_B(real_B) # G_B(B) # 40962*1 rec_B = netG_A(fake_A) # G_A(G_B(B)) # 40962*1 """ train G_A and G_B""" set_requires_grad([netD_A, netD_B], False) # Ds require no gradients when optimizing Gs optimizer_G_A.zero_grad() # set G_A and G_B's gradients to zero optimizer_G_B.zero_grad() # set G_A and G_B's gradients to zero """Calculate the loss for generators G_A and G_B""" # Identity loss
def main(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' ### Hyperparameters Setting ### epochs = args.epochs batch_size = args.batch_size num_workers = args.num_workers valid_ratio = args.valid_ratio threshold = args.threshold separable = args.separable down_method = args.down_method up_method = args.up_method ### DataLoader ### dataset = DataSetWrapper(batch_size, num_workers, valid_ratio) train_dl, valid_dl = dataset.get_data_loaders(train=True) ### Model: U-Net ### model = Unet(input_dim=1, separable=separable, down_method=down_method, up_method=up_method) model.summary() model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_dl), eta_min=0, last_epoch=-1) criterion = nn.BCEWithLogitsLoss() train_losses = [] val_losses = [] ###Train & Validation start ### mIOU_list = [] best_mIOU = 0. step = 0 for epoch in range(epochs): ### train ### pbar = tqdm(train_dl) model.train() losses = [] for (img, label) in pbar: optimizer.zero_grad() img, label = img.to(device), label.to(device) pred = model(img) # pred = Padding()(pred, label.size(3)) loss = criterion(pred, label) loss.backward() optimizer.step() losses.append(loss.item()) pbar.set_description( f'E: {epoch + 1} | L: {loss.item():.4f} | lr: {scheduler.get_lr()[0]:.7f}' ) scheduler.step() if (epoch + 1) % 10: losses = sum(losses) / len(losses) train_losses.append(losses) ### validation ### with torch.no_grad(): model.eval() mIOU = [] losses = [] pbar = tqdm(valid_dl) for (img, label) in pbar: img, label = img.to(device), label.to(device) pred = model(img) loss = criterion(pred, label) mIOU.append(get_IOU(pred, label, threshold=threshold)) losses.append(loss.item()) mIOU = sum(mIOU) / len(mIOU) mIOU_list.append(mIOU) if (epoch + 1) % 10: losses = sum(losses) / len(losses) val_losses.append(losses) print( f'VL: {loss.item():.4f} | mIOU: {100 * mIOU:.1f}% | best mIOU: {100 * best_mIOU:.1f}' ) ### Early Stopping ### if mIOU > best_mIOU: best_mIOU = mIOU save_state = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'train_losses': train_losses, 'val_losses': val_losses, 'best_mIOU': best_mIOU } torch.save( save_state, f'./checkpoint/{down_method}_{up_method}_{separable}.ckpt') step = 0 else: step += 1 if step > args.patience: print('Early stopped...') return
train_loss,val_loss = [],[] train_iou,val_iou = [],[] valid_loss_min = 10000 i = 1 if initial_checkpoint is not None: model, optimizer, epochnum, valid_loss_min = load_ckp(checkpoint_path+initial_checkpoint, model, optimizer) print(f'initial ckp: {epochnum}') i = i + epochnum for epoch in range(epochs): running_train_loss = [] running_train_score = [] model.train() for step,(image, mask) in enumerate(train_loader): image = image.cuda() mask = mask.cuda() pred_mask = model.forward(image) # forward propogation loss = criterion(pred_mask, mask) score = iou_batch(pred_mask, mask) optimizer.zero_grad() # setting gradient to zero loss.backward() optimizer.step() running_train_loss.append(loss.item()) running_train_score.append(score) print(f'batch DiceBCELoss: {loss.item()}') print(f'batch iou: {score}')