Example #1
0
def train(epoch):
        epoch_loss = 0
        for iteration, batch in enumerate(training_data_loader, 1):
            LR, HR_2_target, HR_4_target = batch[0].to(device), batch[1].to(device), batch[2].to(device)

            optimizer.zero_grad()
            HR_2, HR_4 = model(LR)

            loss1 = Loss(HR_2, HR_2_target)
            loss2 = Loss(HR_4, HR_4_target)
            loss = loss1 + loss2
            
            epoch_loss += loss.item()

            loss1.backward(retain_graph = True)
            loss.backward()
            
            optimizer.step()

        print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader)))
        results['Avg. Loss'].append(float('%.4f'%(epoch_loss / len(training_data_loader))))
def backprop(y, cache):
    derivative_w = []    
    derivative_b = []

    loss = Loss(cache[-1], y)
    loss_value = loss.forward()

    # print(loss_value)

    dA = loss.backward()
    for index,layer in reversed(list(enumerate(layers))[1:]):
        derivative_w.append(np.dot(cache[index].T,dA))
        derivative_b.append(np.sum(dA, axis=0, keepdims=True))

        dZ = np.dot(dA, layer.w.T) 
        dA = dZ * layer.backward(cache[index])

    derivative_w.append(np.dot(cache[0].T, dA))
    derivative_b.append(np.sum(dA, axis=0))

    derivative_w = derivative_w[::-1]
    derivative_b = derivative_b[::-1]

    return derivative_w, derivative_b, loss_value
	

	#print(lossFunc.forward(activation2.output, y))
	loss = lossFunc.forward(activation2.output, y)

	# Calculate accuracy from output of activation2 and targets
	predictions = np.argmax(activation2.output, axis=1)  # calculate values along first axis
	accuracy = np.mean(predictions==y)

	

	#-------------------------------------------------------------------------------------------------------
	#back propagation

	lossFunc.backward(activation2.output, y)
	activation2.backward(lossFunc.dvalues)
	layer2.backward(activation2.dvalues)
	activation1.backward(layer2.dvalues)
	layer1.backward(activation1.dvalues)

	if not epoch % 100:
	        print(f'epoch: {epoch}, acc: {accuracy:.3f}, loss: {loss:.3f}, lr: {optimizer.currentLearningRate}')

	optimizer.preUpdateParameters()
	optimizer.updateParameters(layer1)
	optimizer.updateParameters(layer2)
	optimizer.postUpdateParameters()

#Create test data
X_test, y_test = create_data(100, 3)
Example #4
0
def train_advent(model, trainloader, targetloader, cfg):
    ''' UDA training with advent
    '''
    # Create the model and start the training.
    input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE
    input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET
    device = cfg.GPU_ID
    num_classes = cfg.NUM_CLASSES  ###########################TODO
    viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR)
    if viz_tensorboard:
        writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR)

    # # SEGMNETATION NETWORK
    # model.train()
    # model.to(device)
    # cudnn.benchmark = True
    # cudnn.enabled = True
    model.train()
    model.to(device)
    model.apply(weights_init)
    cudnn.benchmark = True
    cudnn.enabled = True

    # DISCRIMINATOR NETWORK
    # feature-level
    d_aux = get_fc_discriminator(num_classes=num_classes)
    d_aux.train()
    d_aux.to(device)

    # seg maps, i.e. output, level
    d_main = get_fc_discriminator(num_classes=num_classes)
    d_main.train()
    d_main.to(device)

    # OPTIMIZERS
    # segnet's optimizer
    # optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE),
    #                       lr=cfg.TRAIN.LEARNING_RATE,
    #                       momentum=cfg.TRAIN.MOMENTUM,
    #                       weight_decay=cfg.TRAIN.WEIGHT_DECAY)
    optimizer = Adam(model.parameters(), config.lr)

    # discriminators' optimizers
    optimizer_d_aux = optim.Adam(d_aux.parameters(),
                                 lr=cfg.TRAIN.LEARNING_RATE_D,
                                 betas=(0.9, 0.99))
    optimizer_d_main = optim.Adam(d_main.parameters(),
                                  lr=cfg.TRAIN.LEARNING_RATE_D,
                                  betas=(0.9, 0.99))

    # interpolate output segmaps
    interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]),
                         mode='bilinear',
                         align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)

    # labels for adversarial training
    source_label = 0
    target_label = 1
    trainloader_iter = enumerate(trainloader)
    targetloader_iter = enumerate(targetloader)
    for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)):

        # reset optimizers
        optimizer.zero_grad()
        optimizer_d_aux.zero_grad()
        optimizer_d_main.zero_grad()
        # adapt LR if needed
        adjust_learning_rate(optimizer, i_iter, cfg)
        adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg)
        adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg)

        # UDA Training
        # only train segnet. Don't accumulate grads in disciminators
        for param in d_aux.parameters():
            param.requires_grad = False
        for param in d_main.parameters():
            param.requires_grad = False
        # train on source
        _, batch = trainloader_iter.__next__()
        x, y = batch
        # x, y = x.to(self.device), y.to(self.device)
        y_pred = model(x.cuda(device))
        loss = Loss().to(torch.device('cuda:0'))
        loss = loss(y_pred, y)
        loss.backward()
        # images_source, labels, _, _ = batch
        # pred_src_aux, pred_src_main = model(images_source.cuda(device))
        # if cfg.TRAIN.MULTI_LEVEL:
        #     pred_src_aux = interp(pred_src_aux)
        #     loss_seg_src_aux = loss_calc(pred_src_aux, labels, device)
        # else:
        #     loss_seg_src_aux = 0
        # pred_src_main = interp(pred_src_main)
        # loss_seg_src_main = loss_calc(pred_src_main, labels, device)
        # loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main
        #         + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux)
        # loss.backward()

        # adversarial training ot fool the discriminator
        _, batch = targetloader_iter.__next__()
        images, _, _, _ = batch
        pred_trg_aux, pred_trg_main = model(images.cuda(device))
        if cfg.TRAIN.MULTI_LEVEL:
            pred_trg_aux = interp_target(pred_trg_aux)
            d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux)))
            loss_adv_trg_aux = bce_loss(d_out_aux, source_label)
        else:
            loss_adv_trg_aux = 0
        pred_trg_main = interp_target(pred_trg_main)
        d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main)))
        loss_adv_trg_main = bce_loss(d_out_main, source_label)
        loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main +
                cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux)
        loss = loss
        loss.backward()

        # Train discriminator networks
        # enable training mode on discriminator networks
        for param in d_aux.parameters():
            param.requires_grad = True
        for param in d_main.parameters():
            param.requires_grad = True
        # train with source
        if cfg.TRAIN.MULTI_LEVEL:
            pred_src_aux = pred_src_aux.detach()
            d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux)))
            loss_d_aux = bce_loss(d_out_aux, source_label)
            loss_d_aux = loss_d_aux / 2
            loss_d_aux.backward()
        pred_src_main = pred_src_main.detach()
        d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main)))
        loss_d_main = bce_loss(d_out_main, source_label)
        loss_d_main = loss_d_main / 2
        loss_d_main.backward()

        # train with target
        if cfg.TRAIN.MULTI_LEVEL:
            pred_trg_aux = pred_trg_aux.detach()
            d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux)))
            loss_d_aux = bce_loss(d_out_aux, target_label)
            loss_d_aux = loss_d_aux / 2
            loss_d_aux.backward()
        else:
            loss_d_aux = 0
        pred_trg_main = pred_trg_main.detach()
        d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main)))
        loss_d_main = bce_loss(d_out_main, target_label)
        loss_d_main = loss_d_main / 2
        loss_d_main.backward()

        optimizer.step()
        if cfg.TRAIN.MULTI_LEVEL:
            optimizer_d_aux.step()
        optimizer_d_main.step()

        current_losses = {
            'loss_seg_src_aux': loss_seg_src_aux,
            'loss_seg_src_main': loss_seg_src_main,
            'loss_adv_trg_aux': loss_adv_trg_aux,
            'loss_adv_trg_main': loss_adv_trg_main,
            'loss_d_aux': loss_d_aux,
            'loss_d_main': loss_d_main
        }
        print_losses(current_losses, i_iter)

        if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0:
            print('taking snapshot ...')
            print('exp =', cfg.TRAIN.SNAPSHOT_DIR)
            snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR)
            torch.save(model.state_dict(),
                       snapshot_dir / f'model_{i_iter}.pth')
            torch.save(d_aux.state_dict(),
                       snapshot_dir / f'model_{i_iter}_D_aux.pth')
            torch.save(d_main.state_dict(),
                       snapshot_dir / f'model_{i_iter}_D_main.pth')
            if i_iter >= cfg.TRAIN.EARLY_STOP - 1:
                break
        sys.stdout.flush()

        # Visualize with tensorboard
        if viz_tensorboard:
            log_losses_tensorboard(writer, current_losses, i_iter)

            if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1:
                draw_in_tensorboard(writer, images, i_iter, pred_trg_main,
                                    num_classes, 'T')
                draw_in_tensorboard(writer, images_source, i_iter,
                                    pred_src_main, num_classes, 'S')