def trainNet(net, opt, cri, sch, cp_dir, epoch_range, training_set, val_set, batch_size_big, rep=1): tb_dir = os.path.join(cp_dir, 'tb') utils.ensure_exists(tb_dir) fout = open(os.path.join(cp_dir, 'train.log'), 'a') for epoch in epoch_range: net.train() running_loss = 0 start_time = time.time() tl = len(training_set) for e in range(rep): for i, data in enumerate(training_set, 0): com = data['com'].float().cuda() org = data['org'].float().cuda() opt.zero_grad() if com.shape[0] > batch_size_big: com_big = com[:batch_size_big, :, :, :] org_big = org[:batch_size_big, :, :, :] ret_big = net(com_big) loss_big = cri(ret_big, org_big) _, _, h, w = com.shape new_h = 32 new_w = 32 top = 8 * np.random.randint(0, (h - new_h) // 8) left = 8 * np.random.randint(0, (w - new_w) // 8) com_small = com[batch_size_big:, :, top:top + new_h, left:left + new_w] org_small = org[batch_size_big:, :, top:top + new_h, left:left + new_w] ret_small = net(com_small) loss_small = cri(ret_small, org_small) loss = loss_big + loss_small loss.backward() nn.utils.clip_grad_norm_(net.parameters(), 5) opt.step() else: ret = net(com) loss = cri(ret, org) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), 5) opt.step() running_loss += loss.item() if i % 100 == 0: print('[Running epoch %2d, batch %4d] loss: %.3f' % (epoch + 1, i + 1, \ 10000 * running_loss / (e * tl + i + 1), ), end='\n') else: print('[Running epoch %2d, batch %4d] loss: %.3f' % (epoch + 1, i + 1, \ 10000 * running_loss / (e * tl + i + 1), ), end='\r') if not (epoch + 1) % 1: timestamp = time.time() print('[timestamp %d, epoch %2d] loss: %.3f, time: %6ds ' % (timestamp, epoch + 1, 10000 * running_loss / ((i + 1) * rep), timestamp - start_time), end='\n') with torch.no_grad(): p_psnr = utils.evalPsnr(net, val_set, fout=fout) save_model(net, opt, os.path.join(cp_dir, str(epoch + 1) + '_withopt')) torch.save(net.state_dict(), os.path.join(cp_dir, str(epoch + 1))) sch.step() print('cur_lr: %.5f' % sch.get_lr()[0])
def trainNet(net, opt, cri, sch, cp_dir, epoch_range, training_set, val_set, batch_size_big, rep=1): tb_dir = os.path.join(cp_dir, 'tb') utils.ensure_exists(tb_dir) fout = open(os.path.join(cp_dir, 'train.log'), 'a') for epoch in epoch_range: net.train() running_loss = 0 start_time = time.time() tl = len(training_set) for e in range(rep): for i, data in enumerate(training_set, 0): com = data['com'].float().cuda() c_2 = data['com_2'].float().cuda() c_4 = data['com_4'].float().cuda() org = data['org'].float().cuda() o_2 = data['org_2'].float().cuda() o_4 = data['org_4'].float().cuda() com_pair = (c_4, c_2, com) org_pair = (o_4, o_2, org) opt.zero_grad() ret = net(com_pair) loss, MSE4, MSE2, MSEp, MSEd = cri(ret, org_pair) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), 10) opt.step() running_loss += loss.item() if i % 100 == 0: print('[Running epoch %2d, batch %4d] loss: %.3f' % (epoch + 1, i + 1, \ 10000 * running_loss / (e * tl + i + 1), ), end='\n') else: print('[Running epoch %2d, batch %4d] loss: %.3f' % (epoch + 1, i + 1, \ 10000 * running_loss / (e * tl + i + 1), ), end='\r') if not (epoch + 1) % 1: timestamp = time.time() print('[timestamp %d, epoch %2d] loss: %.3f, time: %6ds ' % (timestamp, epoch + 1, 10000 * running_loss / ((i + 1) * rep), timestamp - start_time), end='\n') with torch.no_grad(): p_psnr = utils.evalPsnr(net, val_set, fout=fout) save_model(net, opt, os.path.join(cp_dir, str(epoch + 1) + '_withopt')) torch.save(net.state_dict(), os.path.join(cp_dir, str(epoch + 1))) sch.step() print('cur_lr: %.5f' % sch.get_lr()[0])