Esempio n. 1
0
def trainNet(net,
             opt,
             cri,
             sch,
             cp_dir,
             epoch_range,
             training_set,
             val_set,
             batch_size_big,
             rep=1):
    tb_dir = os.path.join(cp_dir, 'tb')
    utils.ensure_exists(tb_dir)
    fout = open(os.path.join(cp_dir, 'train.log'), 'a')

    for epoch in epoch_range:
        net.train()
        running_loss = 0
        start_time = time.time()
        tl = len(training_set)

        for e in range(rep):
            for i, data in enumerate(training_set, 0):

                com = data['com'].float().cuda()
                org = data['org'].float().cuda()

                opt.zero_grad()
                if com.shape[0] > batch_size_big:
                    com_big = com[:batch_size_big, :, :, :]
                    org_big = org[:batch_size_big, :, :, :]
                    ret_big = net(com_big)
                    loss_big = cri(ret_big, org_big)

                    _, _, h, w = com.shape
                    new_h = 32
                    new_w = 32
                    top = 8 * np.random.randint(0, (h - new_h) // 8)
                    left = 8 * np.random.randint(0, (w - new_w) // 8)
                    com_small = com[batch_size_big:, :, top:top + new_h,
                                    left:left + new_w]
                    org_small = org[batch_size_big:, :, top:top + new_h,
                                    left:left + new_w]

                    ret_small = net(com_small)
                    loss_small = cri(ret_small, org_small)
                    loss = loss_big + loss_small
                    loss.backward()

                    nn.utils.clip_grad_norm_(net.parameters(), 5)
                    opt.step()
                else:
                    ret = net(com)
                    loss = cri(ret, org)
                    loss.backward()
                    nn.utils.clip_grad_norm_(net.parameters(), 5)
                    opt.step()

                running_loss += loss.item()
                if i % 100 == 0:
                    print('[Running epoch %2d, batch %4d] loss: %.3f' %
                          (epoch + 1, i + 1, \
                           10000 * running_loss / (e * tl + i + 1),
                           ), end='\n')
                else:
                    print('[Running epoch %2d, batch %4d] loss: %.3f' %
                          (epoch + 1, i + 1, \
                           10000 * running_loss / (e * tl + i + 1),
                           ), end='\r')

        if not (epoch + 1) % 1:
            timestamp = time.time()
            print('[timestamp %d, epoch %2d] loss: %.3f, time: %6ds        ' %
                  (timestamp, epoch + 1, 10000 * running_loss /
                   ((i + 1) * rep), timestamp - start_time),
                  end='\n')
            with torch.no_grad():
                p_psnr = utils.evalPsnr(net, val_set, fout=fout)

            save_model(net, opt,
                       os.path.join(cp_dir,
                                    str(epoch + 1) + '_withopt'))
            torch.save(net.state_dict(), os.path.join(cp_dir, str(epoch + 1)))
            sch.step()
            print('cur_lr: %.5f' % sch.get_lr()[0])
Esempio n. 2
0
def trainNet(net,
             opt,
             cri,
             sch,
             cp_dir,
             epoch_range,
             training_set,
             val_set,
             batch_size_big,
             rep=1):
    tb_dir = os.path.join(cp_dir, 'tb')
    utils.ensure_exists(tb_dir)
    fout = open(os.path.join(cp_dir, 'train.log'), 'a')

    for epoch in epoch_range:
        net.train()
        running_loss = 0
        start_time = time.time()
        tl = len(training_set)

        for e in range(rep):
            for i, data in enumerate(training_set, 0):

                com = data['com'].float().cuda()
                c_2 = data['com_2'].float().cuda()
                c_4 = data['com_4'].float().cuda()
                org = data['org'].float().cuda()
                o_2 = data['org_2'].float().cuda()
                o_4 = data['org_4'].float().cuda()

                com_pair = (c_4, c_2, com)
                org_pair = (o_4, o_2, org)

                opt.zero_grad()
                ret = net(com_pair)
                loss, MSE4, MSE2, MSEp, MSEd = cri(ret, org_pair)

                loss.backward()
                nn.utils.clip_grad_norm_(net.parameters(), 10)
                opt.step()

                running_loss += loss.item()
                if i % 100 == 0:
                    print('[Running epoch %2d, batch %4d] loss: %.3f' %
                          (epoch + 1, i + 1, \
                           10000 * running_loss / (e * tl + i + 1),
                           ), end='\n')
                else:
                    print('[Running epoch %2d, batch %4d] loss: %.3f' %
                          (epoch + 1, i + 1, \
                           10000 * running_loss / (e * tl + i + 1),
                           ), end='\r')

        if not (epoch + 1) % 1:
            timestamp = time.time()
            print('[timestamp %d, epoch %2d] loss: %.3f, time: %6ds        ' %
                  (timestamp, epoch + 1, 10000 * running_loss /
                   ((i + 1) * rep), timestamp - start_time),
                  end='\n')
            with torch.no_grad():
                p_psnr = utils.evalPsnr(net, val_set, fout=fout)

            save_model(net, opt,
                       os.path.join(cp_dir,
                                    str(epoch + 1) + '_withopt'))
            torch.save(net.state_dict(), os.path.join(cp_dir, str(epoch + 1)))
            sch.step()
            print('cur_lr: %.5f' % sch.get_lr()[0])