コード例 #1
0
ファイル: train.py プロジェクト: kukosmos/adain-keras-2019
def calculate_style_loss(x, epsilon=1e-5):
    y_trues, y_preds = x
    loss = [
        mse_loss(K.mean(y_true, axis=(1, 2)), K.mean(y_pred, axis=(1, 2))) +
        mse_loss(K.sqrt(K.var(y_true, axis=(1, 2)) + epsilon),
                 K.sqrt(K.var(y_pred, axis=(1, 2)) + epsilon))
        for y_true, y_pred in zip(y_trues, y_preds)
    ]
    return K.sum(loss)
コード例 #2
0
    def forward_szn(self, data, target):
        #  get score
        target, target_embed = target

        if self.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)

        if self.cuda:
            target_embed = target_embed.cuda()
        target_embed = Variable(target_embed)

        fcn_score, seen_mask_score = self.model(data, mode='both')

        # get fcn loss
        if self.loss_func == "cos":
            loss = utils.cosine_loss(fcn_score, target, target_embed)
        elif self.loss_func == "mse":
            loss = utils.mse_loss(fcn_score, target, target_embed)

        lbl_pred = utils.infer_lbl_szn(fcn_score, seen_mask_score,
                                       self.seen_embeddings,
                                       self.unseen_embeddings, self.cuda)

        lbl_true = target.data.cpu()

        return fcn_score, loss, lbl_pred, lbl_true
コード例 #3
0
def evaluate(model, data):
    model.eval()
    with torch.no_grad():
        x, y, mask, idx = data
        output, _ = model(x)
        output = output.squeeze(0)
        loss = mse_loss(output, y, mask)
        mse = loss.item()
        rmse = np.sqrt(mse)

    return output, mse, rmse
コード例 #4
0
  def forward(self, x, edge_index, batch, num_graphs):

    # batch_size = data.num_graphs
    if x is None:
        x = torch.ones(batch.shape[0]).to(device)

    node_mu, node_logvar, class_mu, class_logvar = self.encoder(x, edge_index, batch)
    grouped_mu, grouped_logvar = accumulate_group_evidence(
        class_mu.data, class_logvar.data, batch, True
    )

    # kl-divergence error for style latent space
    node_kl_divergence_loss = torch.mean(
        - 0.5 * torch.sum(1 + node_logvar - node_mu.pow(2) - node_logvar.exp())
    )
    node_kl_divergence_loss = 0.0000001 * node_kl_divergence_loss *num_graphs
    node_kl_divergence_loss.backward(retain_graph=True)

    # kl-divergence error for class latent space
    class_kl_divergence_loss = torch.mean(
        - 0.5 * torch.sum(1 + grouped_logvar - grouped_mu.pow(2) - grouped_logvar.exp())
    )
    class_kl_divergence_loss = 0.0000001 * class_kl_divergence_loss * num_graphs
    class_kl_divergence_loss.backward(retain_graph=True)

    # reconstruct samples
    """
    sampling from group mu and logvar for each graph in mini-batch differently makes
    the decoder consider class latent embeddings as random noise and ignore them 
    """
    node_latent_embeddings = reparameterize(training=True, mu=node_mu, logvar=node_logvar)
    class_latent_embeddings = group_wise_reparameterize(
        training=True, mu=grouped_mu, logvar=grouped_logvar, labels_batch=batch, cuda=True
    )

    #need to reduce ml between node and class latents
    '''measure='JSD'
    mi_loss = local_global_loss_disen(node_latent_embeddings, class_latent_embeddings, edge_index, batch, measure)
    mi_loss.backward(retain_graph=True)'''

    reconstructed_node = self.decoder(node_latent_embeddings, class_latent_embeddings, edge_index)
    #check input feat first
    #print('recon ', x[0],reconstructed_node[0])
    reconstruction_error =  0.1*mse_loss(reconstructed_node, x) * num_graphs
    reconstruction_error.backward()

    
    return reconstruction_error.item() , class_kl_divergence_loss.item() , node_kl_divergence_loss.item()
コード例 #5
0
def train(model, data, optimizer):
    model.train()

    x, y, mask, idx = data
    optimizer.zero_grad()

    output, KLD = model(x)
    output = output.squeeze(0)
    loss = mse_loss(output, y, mask)
    loss = loss + KLD / torch.sum(mask)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1000.0)
    optimizer.step()
    mse = loss.item()
    rmse = np.sqrt(mse)
    return output, mse, rmse
コード例 #6
0
    def fit(self, input_data, output_data, epochs, batch_size=1):
        itr = 0
        while itr < 50:
            for X, Y in zip(input_data, output_data):
                # do feed forward
                self.forward(itr, X)
                print("I : ", self.layer_list[1].neurons)
                print("J : ", self.layer_list[2].neurons)
                print("D : ", Y)

                loss = utils.mse_loss(self.layer_list[-1].neurons, Y)
                print("loss : ", loss)

                # do backprop
                self.backward(itr, Y)

                utils.init_layers(self.layer_list)  # init every neurons to -1.

            itr = itr + 1  # increase step

        return None
コード例 #7
0
    def forward(self, data, target):
        #  get score
        if self.pixel_embeddings:
            target, target_embed = target

        if self.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)

        if self.pixel_embeddings:
            if self.cuda:
                target_embed = target_embed.cuda()
            target_embed = Variable(target_embed)

        score = self.model(data, mode='fcn')

        # get loss
        if self.loss_func == "cos":
            loss = utils.cosine_loss(score, target, target_embed)
        elif self.loss_func == "mse":
            loss = utils.mse_loss(score, target, target_embed)
        elif self.loss_func == "cross_entropy":
            loss = utils.cross_entropy2d(score, target, size_average=False)

        if np.isnan(float(loss.data[0])):
            raise ValueError('loss is nan while training')

        # inference
        if self.pixel_embeddings:
            if self.forced_unseen:
                lbl_pred = utils.infer_lbl_forced_unseen(
                    score, target, self.seen_embeddings,
                    self.unseen_embeddings, self.unseen, self.cuda)
            else:
                lbl_pred = utils.infer_lbl(score, self.embeddings, self.cuda)
        else:
            lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
        lbl_true = target.data.cpu()

        return score, loss, lbl_pred, lbl_true
コード例 #8
0
    def train(self):
        loss_collector = []
        pbar_epoch = tqdm(total=self.max_epoch, desc='[Epoch]')
        max_iteration = int(len(self.dataset) / self.batch_size)
        for epoch in range(self.max_epoch):
            pbar_iteration = tqdm(total=max_iteration, desc='[Iteration]')
            iteration = 0
            for data_x_s, data_x_t, data_c_s, data_c_t in self.dataloader:
                self.batch_size = data_x_s.size(0)

                self.x_s.resize_(self.batch_size, 50, 4)
                self.x_t.resize_(self.batch_size, 50, 4)
                self.c_s.resize_(self.batch_size, 1, self.image_size,
                                 self.image_size)
                self.c_t.resize_(self.batch_size, 1, self.image_size,
                                 self.image_size)

                pbar_iteration.update(1)
                iteration += 1
                self.x_s.copy_(data_x_s)
                self.x_t.copy_(data_x_t)
                self.c_s.copy_(data_c_s)
                self.c_t.copy_(data_c_t)

                x_s_, x_t_, z_s_bag, z_t_bag, cons_bag = self.ada_vae(
                    self.x_s, self.x_t, self.c_s, self.c_t)

                # kl divergence
                kld_s = kld_loss(z_s_bag)
                kld_t = kld_loss(z_t_bag)
                kld = kld_s + kld_t

                # reconstruction loss
                recon_s = mse_loss(self.x_s, x_s_)
                recon_t = mse_loss(self.x_t, x_t_)
                recon = recon_s + recon_t

                # fusion loss
                fusion_loss_1 = mse_loss(cons_bag[0], cons_bag[1])
                fusion_loss_2 = mse_loss(cons_bag[2], cons_bag[3])
                fusion_loss_3 = mse_loss(cons_bag[4], cons_bag[5])
                fusion_loss_4 = mse_loss(cons_bag[6], cons_bag[7])
                fusion = fusion_loss_1 + fusion_loss_2 + fusion_loss_3 + fusion_loss_4

                # total loss
                total_loss = self.alpha * recon + self.beta * kld + self.gamma * fusion

                self.optimizer.zero_grad()
                total_loss.backward()
                self.optimizer.step()

                if iteration % self.print_iter == 0:
                    pbar_iteration.write(
                        '[%d/%d] kld: %.6f, recon: %.6f, fusion: %.6f, total_loss: %.6f'
                        %
                        (iteration, max_iteration, kld.detach().cpu().numpy(),
                         recon.detach().cpu().numpy(),
                         fusion.detach().cpu().numpy(),
                         total_loss.detach().cpu().numpy()))
                    loss_collector.append([
                        epoch, iteration,
                        kld.detach().cpu().numpy(),
                        recon.detach().cpu().numpy(),
                        fusion.detach().cpu().numpy(),
                        total_loss.detach().cpu().numpy()
                    ])

            # save model
            if epoch % self.save_epoch == 0:
                self.save_model()
                pbar_iteration.write('[*] Save one model')
                np.save(self.sample_path + '/loss.npy',
                        np.array(loss_collector))

            pbar_iteration.close()
            pbar_epoch.update(1)

        pbar_epoch.write("[*] Training stage finishes")
        pbar_epoch.close()
コード例 #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--log_dir',
                        type=str,
                        default='log',
                        help='Name of the log folder')
    parser.add_argument('--save_models',
                        type=bool,
                        default=True,
                        help='Set True if you want to save trained models')
    parser.add_argument('--pre_trained_model_path',
                        type=str,
                        default=None,
                        help='Pre-trained model path')
    parser.add_argument('--pre_trained_model_epoch',
                        type=str,
                        default=None,
                        help='Pre-trained model epoch e.g 200')
    parser.add_argument('--train_imgs_path',
                        type=str,
                        default='/mnt/sdb/data/COCO/train2017',
                        help='Path to training images')
    parser.add_argument(
        '--train_annotation_path',
        type=str,
        default='/mnt/sdb/data/COCO/annotations/instances_train2017.json',
        help='Path to annotation file, .json file')
    parser.add_argument('--category_names',
                        type=str,
                        default='giraffe,elephant,zebra,sheep,cow,bear',
                        help='List of categories in MS-COCO dataset')
    parser.add_argument('--num_test_img',
                        type=int,
                        default=4,
                        help='Number of images saved during training')
    parser.add_argument('--img_size',
                        type=int,
                        default=256,
                        help='Generated image size')
    parser.add_argument(
        '--local_patch_size',
        type=int,
        default=256,
        help='Image size of instance images after interpolation')
    parser.add_argument('--batch_size',
                        type=int,
                        default=4,
                        help='Mini-batch size')
    parser.add_argument('--train_epoch',
                        type=int,
                        default=400,
                        help='Maximum training epoch')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0002,
                        help='Initial learning rate')
    parser.add_argument('--optim_step_size',
                        type=int,
                        default=80,
                        help='Learning rate decay step size')
    parser.add_argument('--optim_gamma',
                        type=float,
                        default=0.5,
                        help='Learning rate decay ratio')
    parser.add_argument(
        '--critic_iter',
        type=int,
        default=5,
        help='Number of discriminator update against each generator update')
    parser.add_argument('--noise_size',
                        type=int,
                        default=256,
                        help='Noise vector size')
    parser.add_argument('--lambda_FM',
                        type=float,
                        default=1,
                        help='Trade-off param for feature matching loss')
    parser.add_argument('--lambda_branch',
                        type=float,
                        default=100,
                        help='Trade-off param for reconstruction loss')
    parser.add_argument(
        '--num_res_blocks',
        type=int,
        default=2,
        help='Number of residual block in generator shared part')
    parser.add_argument('--num_res_blocks_fg',
                        type=int,
                        default=2,
                        help='Number of residual block in non-bg branch')
    parser.add_argument('--num_res_blocks_bg',
                        type=int,
                        default=0,
                        help='Number of residual block in generator bg branch')

    opt = parser.parse_args()
    print(opt)

    #Create log folder
    root = 'result_bg/'
    model = 'coco_model_'
    result_folder_name = 'images_' + opt.log_dir
    model_folder_name = 'models_' + opt.log_dir
    if not os.path.isdir(root):
        os.mkdir(root)
    if not os.path.isdir(root + result_folder_name):
        os.mkdir(root + result_folder_name)
    if not os.path.isdir(root + model_folder_name):
        os.mkdir(root + model_folder_name)

    #Save the script
    copyfile(os.path.basename(__file__),
             root + result_folder_name + '/' + os.path.basename(__file__))

    #Define transformation for dataset images - e.g scaling
    transform = transforms.Compose([
        transforms.Scale((opt.img_size, opt.img_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    #Load dataset
    category_names = opt.category_names.split(',')
    dataset = CocoData(root=opt.train_imgs_path,
                       annFile=opt.train_annotation_path,
                       category_names=category_names,
                       transform=transform,
                       final_img_size=opt.img_size)

    #Discard images contain very small instances
    dataset.discard_small(min_area=0.0, max_area=1)
    #dataset.discard_bad_examples('bad_examples_list.txt')

    #Define data loader
    train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True)

    #For evaluation define fixed masks and noises
    data_iter = iter(train_loader)
    sample_batched = data_iter.next()
    y_fixed = sample_batched['seg_mask'][0:opt.num_test_img]
    y_fixed = Variable(y_fixed.cuda())
    z_fixed = torch.randn((opt.num_test_img, opt.noise_size))
    z_fixed = Variable(z_fixed.cuda())

    #Define networks
    G_bg = Generator_BG(z_dim=opt.noise_size,
                        label_channel=len(category_names),
                        num_res_blocks=opt.num_res_blocks,
                        num_res_blocks_fg=opt.num_res_blocks_fg,
                        num_res_blocks_bg=opt.num_res_blocks_bg)
    D_glob = Discriminator(channels=3 + len(category_names),
                           input_size=opt.img_size)
    G_bg.cuda()
    D_glob.cuda()

    #Load parameters from pre-trained models
    if opt.pre_trained_model_path != None and opt.pre_trained_model_epoch != None:
        try:
            G_bg.load_state_dict(
                torch.load(opt.pre_trained_model_path + 'G_bg_epoch_' +
                           opt.pre_trained_model_epoch))
            D_glob.load_state_dict(
                torch.load(opt.pre_trained_model_path + 'D_glob_epoch_' +
                           opt.pre_trained_model_epoch))
            print('Parameters are loaded!')
        except:
            print('Error: Pre-trained parameters are not loaded!')
            pass

    #Define training loss function - binary cross entropy
    BCE_loss = nn.BCELoss()

    #Define feature matching loss
    criterionVGG = VGGLoss()
    criterionVGG = criterionVGG.cuda()

    #Define optimizer
    G_local_optimizer = optim.Adam(G_bg.parameters(),
                                   lr=opt.lr,
                                   betas=(0.0, 0.9))
    D_local_optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                          D_glob.parameters()),
                                   lr=opt.lr,
                                   betas=(0.0, 0.9))

    #Deine learning rate scheduler
    scheduler_G = lr_scheduler.StepLR(G_local_optimizer,
                                      step_size=opt.optim_step_size,
                                      gamma=opt.optim_gamma)
    scheduler_D = lr_scheduler.StepLR(D_local_optimizer,
                                      step_size=opt.optim_step_size,
                                      gamma=opt.optim_gamma)

    #----------------------------TRAIN---------------------------------------
    print('training start!')
    start_time = time.time()

    for epoch in range(opt.train_epoch):
        scheduler_G.step()
        scheduler_D.step()

        D_local_losses = []
        G_local_losses = []

        y_real_ = torch.ones(opt.batch_size)
        y_fake_ = torch.zeros(opt.batch_size)
        y_real_, y_fake_ = Variable(y_real_.cuda()), Variable(y_fake_.cuda())
        epoch_start_time = time.time()

        data_iter = iter(train_loader)
        num_iter = 0
        while num_iter < len(train_loader):
            j = 0
            while j < opt.critic_iter and num_iter < len(train_loader):
                j += 1
                sample_batched = data_iter.next()
                num_iter += 1
                x_ = sample_batched['image']
                y_ = sample_batched['seg_mask']
                y_reduced = torch.sum(y_, 1).view(y_.size(0), 1, y_.size(2),
                                                  y_.size(3))
                y_reduced = torch.clamp(y_reduced, 0, 1)
                y_reduced = Variable(y_reduced.cuda())

                #Update discriminators - D
                #Real examples
                D_glob.zero_grad()
                mini_batch = x_.size()[0]

                if mini_batch != opt.batch_size:
                    y_real_ = torch.ones(mini_batch)
                    y_fake_ = torch.zeros(mini_batch)
                    y_real_, y_fake_ = Variable(y_real_.cuda()), Variable(
                        y_fake_.cuda())

                x_, y_ = Variable(x_.cuda()), Variable(y_.cuda())
                x_d = torch.cat([x_, y_], 1)

                D_result = D_glob(x_d).squeeze()
                D_real_loss = BCE_loss(D_result, y_real_)
                D_real_loss.backward()

                #Fake examples
                z_ = torch.randn((mini_batch, opt.noise_size))
                z_ = Variable(z_.cuda())

                #Generate fake images
                G_result, G_result_bg = G_bg(z_, y_)
                G_result_d = torch.cat([G_result, y_], 1)
                D_result = D_glob(G_result_d.detach()).squeeze()

                D_fake_loss = BCE_loss(D_result, y_fake_)
                D_fake_loss.backward()
                D_local_optimizer.step()
                D_train_loss = D_real_loss + D_fake_loss
                D_local_losses.append(D_train_loss.item())
            #Update generator G
            G_bg.zero_grad()
            D_result = D_glob(G_result_d).squeeze()

            G_train_loss = BCE_loss(D_result, y_real_)

            #Feature matching loss between generated image and corresponding ground truth
            FM_loss = criterionVGG(G_result, x_)

            #Branch-similar loss
            branch_sim_loss = mse_loss(torch.mul(G_result, (1 - y_reduced)),
                                       torch.mul(G_result_bg, (1 - y_reduced)))

            total_loss = G_train_loss + opt.lambda_FM * FM_loss + opt.lambda_branch * branch_sim_loss
            total_loss.backward()
            G_local_optimizer.step()
            G_local_losses.append(G_train_loss.item())

            print('loss_d: %.3f, loss_g: %.3f' %
                  (D_train_loss.item(), G_train_loss.item()))
            if (num_iter % 100) == 0:
                print('%d - %d complete!' % ((epoch + 1), num_iter))
                print(result_folder_name)
        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time
        print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' %
              ((epoch + 1), opt.train_epoch, per_epoch_ptime,
               torch.mean(torch.FloatTensor(D_local_losses)),
               torch.mean(torch.FloatTensor(G_local_losses))))

        #Save images
        G_bg.eval()
        G_result, G_result_bg = G_bg(z_fixed, y_fixed)
        G_bg.train()

        if epoch % 10 == 0:
            for t in range(y_fixed.size()[1]):
                show_result((epoch + 1),
                            y_fixed[:, t:t + 1, :, :],
                            save=True,
                            path=root + result_folder_name + '/' + model +
                            str(epoch + 1) + '_masked.png')

        show_result((epoch + 1),
                    G_result,
                    save=True,
                    path=root + result_folder_name + '/' + model +
                    str(epoch + 1) + '.png')
        show_result((epoch + 1),
                    G_result_bg,
                    save=True,
                    path=root + result_folder_name + '/' + model +
                    str(epoch + 1) + '_bg.png')

        #Save model params
        if opt.save_models and (epoch > 21 and epoch % 10 == 0):
            torch.save(
                G_bg.state_dict(), root + model_folder_name + '/' + model +
                'G_bg_epoch_' + str(epoch) + '.pth')
            torch.save(
                D_glob.state_dict(), root + model_folder_name + '/' + model +
                'D_glob_epoch_' + str(epoch) + '.pth')

    end_time = time.time()
    total_ptime = end_time - start_time
    print("Training finish!... save training results")
    print('Training time: ' + str(total_ptime))
コード例 #10
0
        epoch_loss = 0
        for iteration in range(len(dataset) // BATCH_SIZE):

            # load a batch of videos
            X_in = next(loader).float().cuda()

            Y_flat = X_in.view(X_in.size()[0], -1)

            optimizer.zero_grad()

            X1, KL1, muL1, det_q1 = encoder(X_in)
            dec = decoder(X1)

            # calculate recon loss
            dec_flat = dec.view(dec.size()[0], -1)
            img_loss = mse_loss(Y_flat, dec_flat)
            img_loss.backward(retain_graph=True)

            sigma_q1 = torch.einsum('ijkl,ijlm->ijkm', KL1,
                                    torch.einsum('ijkl->ijlk', KL1))

            mul1_transpose = torch.transpose(muL1, dim0=1, dim1=2)
            if (ZERO_MEAN_FEA):
                mu_p_transpose = get_prior_mean(FEA_MEAN_S, FEA_MEAN_E)
                kl_loss1 = KL_loss_L1(sigma_p_inv, sigma_q1, mul1_transpose,
                                      mu_p_transpose, det_p, det_q1)
            else:
                kl_loss1 = KL_loss_L1_without_mean(sigma_p_inv, sigma_q1,
                                                   mul1_transpose, det_p,
                                                   det_q1)
コード例 #11
0
def training_procedure(FLAGS):
    """
    model definition
    """
    encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    encoder.apply(weights_init)

    decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    decoder.apply(weights_init)

    # load saved models if load_saved flag is true
    if FLAGS.load_saved:
        encoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.encoder_save)))
        decoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.decoder_save)))
    """
    variable definition
    """
    X = torch.FloatTensor(FLAGS.batch_size, 1, FLAGS.image_size,
                          FLAGS.image_size)
    '''
    add option to run on GPU
    '''
    if FLAGS.cuda:
        encoder.cuda()
        decoder.cuda()

        X = X.cuda()
    """
    optimizer definition
    """
    auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) +
                                        list(decoder.parameters()),
                                        lr=FLAGS.initial_learning_rate,
                                        betas=(FLAGS.beta_1, FLAGS.beta_2))
    """
    training
    """
    if torch.cuda.is_available() and not FLAGS.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')

    # load_saved is false when training is started from 0th iteration
    if not FLAGS.load_saved:
        with open(FLAGS.log_file, 'w') as log:
            log.write(
                'Epoch\tIteration\tReconstruction_loss\tStyle_KL_divergence_loss\tClass_KL_divergence_loss\n'
            )

    # load data set and create data loader instance
    print('Loading MNIST dataset...')
    mnist = datasets.MNIST(root='mnist',
                           download=True,
                           train=True,
                           transform=transform_config)
    loader = cycle(
        DataLoader(mnist,
                   batch_size=FLAGS.batch_size,
                   shuffle=True,
                   num_workers=0,
                   drop_last=True))

    # initialize summary writer
    writer = SummaryWriter()

    for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch):
        print('')
        print(
            'Epoch #' + str(epoch) +
            '..........................................................................'
        )

        for iteration in range(int(len(mnist) / FLAGS.batch_size)):
            # load a mini-batch
            image_batch, labels_batch = next(loader)

            # set zero_grad for the optimizer
            auto_encoder_optimizer.zero_grad()

            X.copy_(image_batch)

            style_mu, style_logvar, class_mu, class_logvar = encoder(
                Variable(X))
            grouped_mu, grouped_logvar = accumulate_group_evidence(
                class_mu.data, class_logvar.data, labels_batch, FLAGS.cuda)

            # kl-divergence error for style latent space
            style_kl_divergence_loss = FLAGS.kl_divergence_coef * (
                -0.5 * torch.sum(1 + style_logvar - style_mu.pow(2) -
                                 style_logvar.exp()))
            style_kl_divergence_loss /= (FLAGS.batch_size *
                                         FLAGS.num_channels *
                                         FLAGS.image_size * FLAGS.image_size)
            style_kl_divergence_loss.backward(retain_graph=True)

            # kl-divergence error for class latent space
            class_kl_divergence_loss = FLAGS.kl_divergence_coef * (
                -0.5 * torch.sum(1 + grouped_logvar - grouped_mu.pow(2) -
                                 grouped_logvar.exp()))
            class_kl_divergence_loss /= (FLAGS.batch_size *
                                         FLAGS.num_channels *
                                         FLAGS.image_size * FLAGS.image_size)
            class_kl_divergence_loss.backward(retain_graph=True)

            # reconstruct samples
            """
            sampling from group mu and logvar for each image in mini-batch differently makes
            the decoder consider class latent embeddings as random noise and ignore them 
            """
            style_latent_embeddings = reparameterize(training=True,
                                                     mu=style_mu,
                                                     logvar=style_logvar)
            class_latent_embeddings = group_wise_reparameterize(
                training=True,
                mu=grouped_mu,
                logvar=grouped_logvar,
                labels_batch=labels_batch,
                cuda=FLAGS.cuda)

            reconstructed_images = decoder(style_latent_embeddings,
                                           class_latent_embeddings)

            reconstruction_error = FLAGS.reconstruction_coef * mse_loss(
                reconstructed_images, Variable(X))
            reconstruction_error.backward()

            auto_encoder_optimizer.step()

            if (iteration + 1) % 50 == 0:
                print('')
                print('Epoch #' + str(epoch))
                print('Iteration #' + str(iteration))

                print('')
                print('Reconstruction loss: ' +
                      str(reconstruction_error.data.storage().tolist()[0]))
                print('Style KL-Divergence loss: ' +
                      str(style_kl_divergence_loss.data.storage().tolist()[0]))
                print('Class KL-Divergence loss: ' +
                      str(class_kl_divergence_loss.data.storage().tolist()[0]))

            # write to log
            with open(FLAGS.log_file, 'a') as log:
                log.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    epoch, iteration,
                    reconstruction_error.data.storage().tolist()[0],
                    style_kl_divergence_loss.data.storage().tolist()[0],
                    class_kl_divergence_loss.data.storage().tolist()[0]))

            # write to tensorboard
            writer.add_scalar(
                'Reconstruction loss',
                reconstruction_error.data.storage().tolist()[0],
                epoch * (int(len(mnist) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar(
                'Style KL-Divergence loss',
                style_kl_divergence_loss.data.storage().tolist()[0],
                epoch * (int(len(mnist) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar(
                'Class KL-Divergence loss',
                class_kl_divergence_loss.data.storage().tolist()[0],
                epoch * (int(len(mnist) / FLAGS.batch_size) + 1) + iteration)

        # save checkpoints after every 5 epochs
        if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch:
            torch.save(encoder.state_dict(),
                       os.path.join('checkpoints', FLAGS.encoder_save))
            torch.save(decoder.state_dict(),
                       os.path.join('checkpoints', FLAGS.decoder_save))
コード例 #12
0
    def train(self, data, all_y_trues):
        '''
        - data is a (n x 2) numpy array, n = # of samples in the dataset.
        - all_y_trues is a numpy array with n elements.
        Elements in all_y_trues correspond to those in data.
        '''
        learn_rate = 0.1
        epochs = 1000  # number of times to loop through the entire dataset

        for epoch in range(epochs):
            for x, y_true in zip(data, all_y_trues):
                # --- Do a feedforward (we'll need these values later)
                sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
                h1 = sigmoid(sum_h1)

                sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
                h2 = sigmoid(sum_h2)

                sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
                o1 = sigmoid(sum_o1)
                y_pred = o1

                # --- Calculate partial derivatives.
                # --- Naming: d_L_d_w1 represents "partial L / partial w1"
                d_L_d_ypred = -2 * (y_true - y_pred)

                # Neuron o1
                d_ypred_d_w5 = h1 * derive_sigmoid(sum_o1)
                d_ypred_d_w6 = h2 * derive_sigmoid(sum_o1)
                d_ypred_d_b3 = derive_sigmoid(sum_o1)

                d_ypred_d_h1 = self.w5 * derive_sigmoid(sum_o1)
                d_ypred_d_h2 = self.w6 * derive_sigmoid(sum_o1)

                # Neuron h1
                d_h1_d_w1 = x[0] * derive_sigmoid(sum_h1)
                d_h1_d_w2 = x[1] * derive_sigmoid(sum_h1)
                d_h1_d_b1 = derive_sigmoid(sum_h1)

                # Neuron h2
                d_h2_d_w3 = x[0] * derive_sigmoid(sum_h2)
                d_h2_d_w4 = x[1] * derive_sigmoid(sum_h2)
                d_h2_d_b2 = derive_sigmoid(sum_h2)

                # --- Update weights and biases
                # Neuron h1
                self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
                self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
                self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

                # Neuron h2
                self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
                self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
                self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

                # Neuron o1
                self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
                self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
                self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

            # --- Calculate total loss at the end of each epoch
            if epoch % 10 == 0:
                y_preds = np.apply_along_axis(self.feedforward, 1, data)
                loss = mse_loss(all_y_trues, y_preds)
                print("Epoch %d loss: %.3f" % (epoch, loss))
コード例 #13
0
def training_procedure(FLAGS):
    """
    model definition
    """
    encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    encoder.apply(weights_init)

    decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    decoder.apply(weights_init)

    # load saved models if load_saved flag is true
    if FLAGS.load_saved:
        encoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.encoder_save)))
        decoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.decoder_save)))
    """
    variable definition
    """
    X = torch.FloatTensor(FLAGS.batch_size, 784)
    '''
    run on GPU if GPU is available
    '''
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    encoder.to(device=device)
    decoder.to(device=device)
    X = X.to(device=device)
    """
    optimizer definition
    """
    auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) +
                                        list(decoder.parameters()),
                                        lr=FLAGS.initial_learning_rate,
                                        betas=(FLAGS.beta_1, FLAGS.beta_2))
    """
    
    """
    if torch.cuda.is_available() and not FLAGS.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')

    # load_saved is false when training is started from 0th iteration
    if not FLAGS.load_saved:
        with open(FLAGS.log_file, 'w') as log:
            log.write(
                'Epoch\tIteration\tReconstruction_loss\tStyle_KL_divergence_loss\tClass_KL_divergence_loss\n'
            )

    # load data set and create data loader instance
    dirs = os.listdir(os.path.join(os.getcwd(), 'data'))
    print('Loading double multivariate normal time series data...')
    for dsname in dirs:
        params = dsname.split('_')
        if params[2] in ('theta=-1'):
            print('Running dataset ', dsname)
            ds = DoubleMulNormal(dsname)
            # ds = experiment3(1000, 50, 3)
            loader = cycle(
                DataLoader(ds,
                           batch_size=FLAGS.batch_size,
                           shuffle=True,
                           drop_last=True))

            # initialize summary writer
            writer = SummaryWriter()

            for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch):
                print()
                print(
                    'Epoch #' + str(epoch) +
                    '........................................................')

                # the total loss at each epoch after running iterations of batches
                total_loss = 0

                for iteration in range(int(len(ds) / FLAGS.batch_size)):
                    # load a mini-batch
                    image_batch, labels_batch = next(loader)

                    # set zero_grad for the optimizer
                    auto_encoder_optimizer.zero_grad()

                    X.copy_(image_batch)

                    style_mu, style_logvar, class_mu, class_logvar = encoder(
                        Variable(X))
                    grouped_mu, grouped_logvar = accumulate_group_evidence(
                        class_mu.data, class_logvar.data, labels_batch,
                        FLAGS.cuda)

                    # kl-divergence error for style latent space
                    style_kl_divergence_loss = FLAGS.kl_divergence_coef * (
                        -0.5 * torch.sum(1 + style_logvar - style_mu.pow(2) -
                                         style_logvar.exp()))
                    style_kl_divergence_loss /= (FLAGS.batch_size *
                                                 FLAGS.num_channels *
                                                 FLAGS.image_size *
                                                 FLAGS.image_size)
                    style_kl_divergence_loss.backward(retain_graph=True)

                    # kl-divergence error for class latent space
                    class_kl_divergence_loss = FLAGS.kl_divergence_coef * (
                        -0.5 *
                        torch.sum(1 + grouped_logvar - grouped_mu.pow(2) -
                                  grouped_logvar.exp()))
                    class_kl_divergence_loss /= (FLAGS.batch_size *
                                                 FLAGS.num_channels *
                                                 FLAGS.image_size *
                                                 FLAGS.image_size)
                    class_kl_divergence_loss.backward(retain_graph=True)

                    # reconstruct samples
                    """
                    sampling from group mu and logvar for each image in mini-batch differently makes
                    the decoder consider class latent embeddings as random noise and ignore them 
                    """
                    style_latent_embeddings = reparameterize(
                        training=True, mu=style_mu, logvar=style_logvar)
                    class_latent_embeddings = group_wise_reparameterize(
                        training=True,
                        mu=grouped_mu,
                        logvar=grouped_logvar,
                        labels_batch=labels_batch,
                        cuda=FLAGS.cuda)

                    reconstructed_images = decoder(style_latent_embeddings,
                                                   class_latent_embeddings)

                    reconstruction_error = FLAGS.reconstruction_coef * mse_loss(
                        reconstructed_images, Variable(X))
                    reconstruction_error.backward()

                    total_loss += style_kl_divergence_loss + class_kl_divergence_loss + reconstruction_error

                    auto_encoder_optimizer.step()

                    if (iteration + 1) % 50 == 0:
                        print('\tIteration #' + str(iteration))
                        print('Reconstruction loss: ' + str(
                            reconstruction_error.data.storage().tolist()[0]))
                        print('Style KL loss: ' +
                              str(style_kl_divergence_loss.data.storage().
                                  tolist()[0]))
                        print('Class KL loss: ' +
                              str(class_kl_divergence_loss.data.storage().
                                  tolist()[0]))

                    # write to log
                    with open(FLAGS.log_file, 'a') as log:
                        log.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                            epoch, iteration,
                            reconstruction_error.data.storage().tolist()[0],
                            style_kl_divergence_loss.data.storage().tolist()
                            [0],
                            class_kl_divergence_loss.data.storage().tolist()
                            [0]))

                    # write to tensorboard
                    writer.add_scalar(
                        'Reconstruction loss',
                        reconstruction_error.data.storage().tolist()[0],
                        epoch * (int(len(ds) / FLAGS.batch_size) + 1) +
                        iteration)
                    writer.add_scalar(
                        'Style KL-Divergence loss',
                        style_kl_divergence_loss.data.storage().tolist()[0],
                        epoch * (int(len(ds) / FLAGS.batch_size) + 1) +
                        iteration)
                    writer.add_scalar(
                        'Class KL-Divergence loss',
                        class_kl_divergence_loss.data.storage().tolist()[0],
                        epoch * (int(len(ds) / FLAGS.batch_size) + 1) +
                        iteration)

                    if epoch == 0 and (iteration + 1) % 50 == 0:
                        torch.save(
                            encoder.state_dict(),
                            os.path.join('checkpoints', 'encoder_' + dsname))
                        torch.save(
                            decoder.state_dict(),
                            os.path.join('checkpoints', 'decoder_' + dsname))

                # save checkpoints after every 10 epochs
                if (epoch + 1) % 10 == 0 or (epoch + 1) == FLAGS.end_epoch:
                    torch.save(
                        encoder.state_dict(),
                        os.path.join('checkpoints', 'encoder_' + dsname))
                    torch.save(
                        decoder.state_dict(),
                        os.path.join('checkpoints', 'decoder_' + dsname))

                print('Total loss at current epoch: ', total_loss.item())
コード例 #14
0
 def loss(self):
     return digits.mse_loss(self.inference, self.x)
コード例 #15
0
def training_procedure(FLAGS):
    """
    model definition
    """
    encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    encoder.apply(weights_init)

    decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    decoder.apply(weights_init)

    discriminator = Discriminator()
    discriminator.apply(weights_init)

    # load saved models if load_saved flag is true
    if FLAGS.load_saved:
        raise Exception('This is not implemented')
        encoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.encoder_save)))
        decoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.decoder_save)))

    """
    variable definition
    """

    X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)
    X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)
    X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)

    style_latent_space = torch.FloatTensor(FLAGS.batch_size, FLAGS.style_dim)

    """
    loss definitions
    """
    cross_entropy_loss = nn.CrossEntropyLoss()
    adversarial_loss = nn.BCELoss()

    '''
    add option to run on GPU
    '''
    if FLAGS.cuda:
        encoder.cuda()
        decoder.cuda()
        discriminator.cuda()

        cross_entropy_loss.cuda()
        adversarial_loss.cuda()

        X_1 = X_1.cuda()
        X_2 = X_2.cuda()
        X_3 = X_3.cuda()

        style_latent_space = style_latent_space.cuda()

    """
    optimizer and scheduler definition
    """
    auto_encoder_optimizer = optim.Adam(
        list(encoder.parameters()) + list(decoder.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    reverse_cycle_optimizer = optim.Adam(
        list(encoder.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    generator_optimizer = optim.Adam(
        list(decoder.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    discriminator_optimizer = optim.Adam(
        list(discriminator.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    # divide the learning rate by a factor of 10 after 80 epochs
    auto_encoder_scheduler = optim.lr_scheduler.StepLR(auto_encoder_optimizer, step_size=80, gamma=0.1)
    reverse_cycle_scheduler = optim.lr_scheduler.StepLR(reverse_cycle_optimizer, step_size=80, gamma=0.1)
    generator_scheduler = optim.lr_scheduler.StepLR(generator_optimizer, step_size=80, gamma=0.1)
    discriminator_scheduler = optim.lr_scheduler.StepLR(discriminator_optimizer, step_size=80, gamma=0.1)

    # Used later to define discriminator ground truths
    Tensor = torch.cuda.FloatTensor if FLAGS.cuda else torch.FloatTensor

    """
    training
    """
    if torch.cuda.is_available() and not FLAGS.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')

    if not os.path.exists('reconstructed_images'):
        os.makedirs('reconstructed_images')

    # load_saved is false when training is started from 0th iteration
    if not FLAGS.load_saved:
        with open(FLAGS.log_file, 'w') as log:
            headers = ['Epoch', 'Iteration', 'Reconstruction_loss', 'KL_divergence_loss', 'Reverse_cycle_loss']

            if FLAGS.forward_gan:
              headers.extend(['Generator_forward_loss', 'Discriminator_forward_loss'])

            if FLAGS.reverse_gan:
              headers.extend(['Generator_reverse_loss', 'Discriminator_reverse_loss'])

            log.write('\t'.join(headers) + '\n')

    # load data set and create data loader instance
    print('Loading CIFAR paired dataset...')
    paired_cifar = CIFAR_Paired(root='cifar', download=True, train=True, transform=transform_config)
    loader = cycle(DataLoader(paired_cifar, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True))

    # Save a batch of images to use for visualization
    image_sample_1, image_sample_2, _ = next(loader)
    image_sample_3, _, _ = next(loader)

    # initialize summary writer
    writer = SummaryWriter()

    for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch):
        print('')
        print('Epoch #' + str(epoch) + '..........................................................................')

        # update the learning rate scheduler
        auto_encoder_scheduler.step()
        reverse_cycle_scheduler.step()
        generator_scheduler.step()
        discriminator_scheduler.step()

        for iteration in range(int(len(paired_cifar) / FLAGS.batch_size)):
            # Adversarial ground truths
            valid = Variable(Tensor(FLAGS.batch_size, 1).fill_(1.0), requires_grad=False)
            fake = Variable(Tensor(FLAGS.batch_size, 1).fill_(0.0), requires_grad=False)

            # A. run the auto-encoder reconstruction
            image_batch_1, image_batch_2, _ = next(loader)

            auto_encoder_optimizer.zero_grad()

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)

            style_mu_1, style_logvar_1, class_latent_space_1 = encoder(Variable(X_1))
            style_latent_space_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1)

            kl_divergence_loss_1 = FLAGS.kl_divergence_coef * (
                - 0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp())
            )
            kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size)
            kl_divergence_loss_1.backward(retain_graph=True)

            style_mu_2, style_logvar_2, class_latent_space_2 = encoder(Variable(X_2))
            style_latent_space_2 = reparameterize(training=True, mu=style_mu_2, logvar=style_logvar_2)

            kl_divergence_loss_2 = FLAGS.kl_divergence_coef * (
                - 0.5 * torch.sum(1 + style_logvar_2 - style_mu_2.pow(2) - style_logvar_2.exp())
            )
            kl_divergence_loss_2 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size)
            kl_divergence_loss_2.backward(retain_graph=True)

            reconstructed_X_1 = decoder(style_latent_space_1, class_latent_space_2)
            reconstructed_X_2 = decoder(style_latent_space_2, class_latent_space_1)

            reconstruction_error_1 = FLAGS.reconstruction_coef * mse_loss(reconstructed_X_1, Variable(X_1))
            reconstruction_error_1.backward(retain_graph=True)

            reconstruction_error_2 = FLAGS.reconstruction_coef * mse_loss(reconstructed_X_2, Variable(X_2))
            reconstruction_error_2.backward()

            reconstruction_error = (reconstruction_error_1 + reconstruction_error_2) / FLAGS.reconstruction_coef
            kl_divergence_error = (kl_divergence_loss_1 + kl_divergence_loss_2) / FLAGS.kl_divergence_coef

            auto_encoder_optimizer.step()

            # A-1. Discriminator training during forward cycle
            if FLAGS.forward_gan:
              # Training generator
              generator_optimizer.zero_grad()

              g_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), valid)
              g_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), valid)

              gen_f_loss = (g_loss_1 + g_loss_2) / 2.0
              gen_f_loss.backward()

              generator_optimizer.step()

              # Training discriminator
              discriminator_optimizer.zero_grad()

              real_loss_1 = adversarial_loss(discriminator(Variable(X_1)), valid)
              real_loss_2 = adversarial_loss(discriminator(Variable(X_2)), valid)
              fake_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), fake)
              fake_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), fake)

              dis_f_loss = (real_loss_1 + real_loss_2 + fake_loss_1 + fake_loss_2) / 4.0
              dis_f_loss.backward()

              discriminator_optimizer.step()

            # B. reverse cycle
            image_batch_1, _, __ = next(loader)
            image_batch_2, _, __ = next(loader)

            reverse_cycle_optimizer.zero_grad()

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)

            style_latent_space.normal_(0., 1.)

            _, __, class_latent_space_1 = encoder(Variable(X_1))
            _, __, class_latent_space_2 = encoder(Variable(X_2))

            reconstructed_X_1 = decoder(Variable(style_latent_space), class_latent_space_1.detach())
            reconstructed_X_2 = decoder(Variable(style_latent_space), class_latent_space_2.detach())

            style_mu_1, style_logvar_1, _ = encoder(reconstructed_X_1)
            style_latent_space_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1)

            style_mu_2, style_logvar_2, _ = encoder(reconstructed_X_2)
            style_latent_space_2 = reparameterize(training=False, mu=style_mu_2, logvar=style_logvar_2)

            reverse_cycle_loss = FLAGS.reverse_cycle_coef * l1_loss(style_latent_space_1, style_latent_space_2)
            reverse_cycle_loss.backward()
            reverse_cycle_loss /= FLAGS.reverse_cycle_coef

            reverse_cycle_optimizer.step()

            # B-1. Discriminator training during reverse cycle
            if FLAGS.reverse_gan:
              # Training generator
              generator_optimizer.zero_grad()

              g_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), valid)
              g_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), valid)

              gen_r_loss = (g_loss_1 + g_loss_2) / 2.0
              gen_r_loss.backward()

              generator_optimizer.step()

              # Training discriminator
              discriminator_optimizer.zero_grad()

              real_loss_1 = adversarial_loss(discriminator(Variable(X_1)), valid)
              real_loss_2 = adversarial_loss(discriminator(Variable(X_2)), valid)
              fake_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), fake)
              fake_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), fake)

              dis_r_loss = (real_loss_1 + real_loss_2 + fake_loss_1 + fake_loss_2) / 4.0
              dis_r_loss.backward()

              discriminator_optimizer.step()

            if (iteration + 1) % 10 == 0:
                print('')
                print('Epoch #' + str(epoch))
                print('Iteration #' + str(iteration))

                print('')
                print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0]))
                print('KL-Divergence loss: ' + str(kl_divergence_error.data.storage().tolist()[0]))
                print('Reverse cycle loss: ' + str(reverse_cycle_loss.data.storage().tolist()[0]))

                if FLAGS.forward_gan:
                  print('Generator F loss: ' + str(gen_f_loss.data.storage().tolist()[0]))
                  print('Discriminator F loss: ' + str(dis_f_loss.data.storage().tolist()[0]))

                if FLAGS.reverse_gan:
                  print('Generator R loss: ' + str(gen_r_loss.data.storage().tolist()[0]))
                  print('Discriminator R loss: ' + str(dis_r_loss.data.storage().tolist()[0]))

            # write to log
            with open(FLAGS.log_file, 'a') as log:
                row = []

                row.append(epoch)
                row.append(iteration)
                row.append(reconstruction_error.data.storage().tolist()[0])
                row.append(kl_divergence_error.data.storage().tolist()[0])
                row.append(reverse_cycle_loss.data.storage().tolist()[0])

                if FLAGS.forward_gan:
                  row.append(gen_f_loss.data.storage().tolist()[0])
                  row.append(dis_f_loss.data.storage().tolist()[0])

                if FLAGS.reverse_gan:
                  row.append(gen_r_loss.data.storage().tolist()[0])
                  row.append(dis_r_loss.data.storage().tolist()[0])

                row = [str(x) for x in row]
                log.write('\t'.join(row) + '\n')

            # write to tensorboard
            writer.add_scalar('Reconstruction loss', reconstruction_error.data.storage().tolist()[0],
                              epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar('KL-Divergence loss', kl_divergence_error.data.storage().tolist()[0],
                              epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar('Reverse cycle loss', reverse_cycle_loss.data.storage().tolist()[0],
                              epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)

            if FLAGS.forward_gan:
              writer.add_scalar('Generator F loss', gen_f_loss.data.storage().tolist()[0],
                                epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)
              writer.add_scalar('Discriminator F loss', dis_f_loss.data.storage().tolist()[0],
                                epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)

            if FLAGS.reverse_gan:
              writer.add_scalar('Generator R loss', gen_r_loss.data.storage().tolist()[0],
                                epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)
              writer.add_scalar('Discriminator R loss', dis_r_loss.data.storage().tolist()[0],
                                epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration)

        # save model after every 5 epochs
        if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch:
            torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save))
            torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save))

            """
            save reconstructed images and style swapped image generations to check progress
            """

            X_1.copy_(image_sample_1)
            X_2.copy_(image_sample_2)
            X_3.copy_(image_sample_3)

            style_mu_1, style_logvar_1, _ = encoder(Variable(X_1))
            _, __, class_latent_space_2 = encoder(Variable(X_2))
            style_mu_3, style_logvar_3, _ = encoder(Variable(X_3))

            style_latent_space_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1)
            style_latent_space_3 = reparameterize(training=False, mu=style_mu_3, logvar=style_logvar_3)

            reconstructed_X_1_2 = decoder(style_latent_space_1, class_latent_space_2)
            reconstructed_X_3_2 = decoder(style_latent_space_3, class_latent_space_2)

            # save input image batch
            image_batch = np.transpose(X_1.cpu().numpy(), (0, 2, 3, 1))
            if FLAGS.num_channels == 1:
              image_batch = np.concatenate((image_batch, image_batch, image_batch), axis=3)
            imshow_grid(image_batch, name=str(epoch) + '_original', save=True)

            # save reconstructed batch
            reconstructed_x = np.transpose(reconstructed_X_1_2.cpu().data.numpy(), (0, 2, 3, 1))
            if FLAGS.num_channels == 1:
              reconstructed_x = np.concatenate((reconstructed_x, reconstructed_x, reconstructed_x), axis=3)
            imshow_grid(reconstructed_x, name=str(epoch) + '_target', save=True)

            style_batch = np.transpose(X_3.cpu().numpy(), (0, 2, 3, 1))
            if FLAGS.num_channels == 1:
              style_batch = np.concatenate((style_batch, style_batch, style_batch), axis=3)
            imshow_grid(style_batch, name=str(epoch) + '_style', save=True)

            # save style swapped reconstructed batch
            reconstructed_style = np.transpose(reconstructed_X_3_2.cpu().data.numpy(), (0, 2, 3, 1))
            if FLAGS.num_channels == 1:
              reconstructed_style = np.concatenate((reconstructed_style, reconstructed_style, reconstructed_style), axis=3)
            imshow_grid(reconstructed_style, name=str(epoch) + '_style_target', save=True)
コード例 #16
0
ファイル: train.py プロジェクト: kukosmos/adain-keras-2019
def calculate_content_loss(x):
    y_true, y_pred = x
    return mse_loss(y_true, y_pred)
コード例 #17
0
            anchor = layer_values_A[
                layer_name]  # TODO: already switched anchor and pos
            pos = layer_values_Ap[layer_name]
        else:
            pos = layer_values_A[
                layer_name]  #TODO: already switched anchor and pos
            anchor = layer_values_Ap[layer_name]
        if Use_B_Bp_A:
            neg = layer_values_Bp[layer_name]
        else:
            neg = layer_values_B[layer_name]

        triplet_loss_data_A_Ap_B[layer_name] = triplet_loss_dict(
            anchor, pos, neg, triplet_loss_type, regularize_lambda,
            triplet_loss_margins[layer_name]['A_Ap_B'])
        mse_loss_A_Ap[layer_name] = mse_loss(anchor, pos)
        mse_loss_A_B[layer_name] = mse_loss(anchor, neg)
        # We only add mse here to add the ALP.
    if Use_A1_Ap_B:
        for i in range(A1_Ap_B_num):
            if switch_an_neg:
                anchor = A1_Ap_B_list[i]['layer_values_A1'][
                    layer_name]  # TODO: I've changed the a and p here
                pos = layer_values_Ap[layer_name]
            else:
                pos = A1_Ap_B_list[i]['layer_values_A1'][
                    layer_name]  #TODO: I've changed the a and p here
                anchor = layer_values_Ap[layer_name]
            neg = layer_values_B[layer_name]

            triplet_loss_data_A1_Ap_B_list[i][layer_name] = triplet_loss_dict(
コード例 #18
0
def main(rank):  #Modified for TPU purposes

    #Seed - Added for TPU purposes
    torch.manual_seed(1)

    #Define Dataset - Modified for TPU purposes
    dataset = SERIAL_EXEC.run(
        lambda: CocoData(root=FLAGS['train_imgs_path'],
                         annFile=FLAGS['train_annotation_path'],
                         category_names=category_names,
                         transform=transform,
                         final_img_size=FLAGS['img_size']))

    #Discard images contain very small instances
    dataset.discard_small(min_area=0.0, max_area=1)
    #dataset.discard_bad_examples('bad_examples_list.txt')

    #Define data sampler - Added for TPU purposes
    train_sampler = DistributedSampler(dataset,
                                       num_replicas=xm.xrt_world_size(),
                                       rank=xm.get_ordinal(),
                                       shuffle=True)

    #Define data loader
    train_loader = DataLoader(  #Modified for TPU purposes
        dataset,
        batch_size=FLAGS['batch_size'],
        sampler=train_sampler,
        num_workers=FLAGS['num_workers'],
        # shuffle=True
    )

    #Define device - Added for TPU purposes
    device = xm.xla_device(devkind='TPU')

    #For evaluation define fixed masks and noises
    data_iter = iter(train_loader)
    sample_batched = data_iter.next()
    y_fixed = sample_batched['seg_mask'][0:FLAGS['num_test_img']]
    y_fixed = Variable(y_fixed.to(device))  #Modified for TPU purposes
    z_fixed = torch.randn((FLAGS['num_test_img'], FLAGS['noise_size']))
    z_fixed = Variable(z_fixed.to(device))  #Modified for TPU purposes

    #Define networks
    G_bg = WRAPPED_GENERATOR.to(device)  #Modified for TPU purposes
    D_glob = WRAPPED_DISCRIMINATOR.to(device)  #Modified for TPU purposes

    #Load parameters from pre-trained models - Modified for TPU purposes
    if FLAGS['pre_trained_model_path'] != None and FLAGS[
            'pre_trained_model_epoch'] != None:
        try:
            G_bg.load_state_dict(
                xser.load(FLAGS['pre_trained_model_path'] + 'G_bg_epoch_' +
                          FLAGS['pre_trained_model_epoch']))
            D_glob.load_state_dict(
                xser.load(FLAGS['pre_trained_model_path'] + 'D_glob_epoch_' +
                          FLAGS['pre_trained_model_epoch']))

            xm.master_print('Parameters are loaded!')
        except:
            xm.master_print('Error: Pre-trained parameters are not loaded!')
            pass

    #Define training loss function - binary cross entropy
    BCE_loss = nn.BCELoss()

    #Define feature matching loss
    criterionVGG = VGGLoss()
    criterionVGG = criterionVGG.to(device)  #Modified for TPU purposes

    #Define optimizer
    G_local_optimizer = optim.Adam(G_bg.parameters(),
                                   lr=FLAGS['lr'],
                                   betas=(0.0, 0.9))
    D_local_optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                          D_glob.parameters()),
                                   lr=FLAGS['lr'],
                                   betas=(0.0, 0.9))

    #Define learning rate scheduler
    scheduler_G = lr_scheduler.StepLR(G_local_optimizer,
                                      step_size=FLAGS['optim_step_size'],
                                      gamma=FLAGS['optim_gamma'])
    scheduler_D = lr_scheduler.StepLR(D_local_optimizer,
                                      step_size=FLAGS['optim_step_size'],
                                      gamma=FLAGS['optim_gamma'])

    #----------------------------TRAIN---------------------------------------
    xm.master_print('training start!')  #Modified for TPU reasons
    tracker = xm.RateTracker()  #Added for TPU reasons
    start_time = time.time()

    for epoch in range(FLAGS['train_epoch']):
        epoch_start_time = time.time()
        para_loader = pl.ParallelLoader(train_loader,
                                        [device])  #Added for TPU purposes
        loader = para_loader.per_device_loader(device)  #Added for TPU purposes

        D_local_losses = []
        G_local_losses = []

        y_real_ = torch.ones(FLAGS['batch_size'])
        y_fake_ = torch.zeros(FLAGS['batch_size'])
        y_real_ = Variable(y_real_.to(device))  #Modified for TPU purposes
        y_fake_ = Variable(y_fake_.to(device))  #Modified for TPU purposes

        data_iter = iter(loader)  #Modified for TPU purposes
        num_iter = 0

        while num_iter < len(loader):
            j = 0
            while j < FLAGS['critic_iter'] and num_iter < len(loader):
                j += 1
                sample_batched = data_iter.next()
                num_iter += 1

                x_ = sample_batched['image']
                x_ = Variable(x_.to(device))  #Modified for TPU purposes

                y_ = sample_batched['seg_mask']
                y_ = Variable(y_.to(device))  #Modified for TPU purposes

                y_reduced = torch.sum(y_, 1).view(y_.size(0), 1, y_.size(2),
                                                  y_.size(3))
                y_reduced = torch.clamp(y_reduced, 0, 1)
                y_reduced = Variable(
                    y_reduced.to(device))  #Modified for TPU purposes

                #Update discriminators - D
                #Real examples
                D_glob.zero_grad()

                mini_batch = x_.size()[0]
                if mini_batch != FLAGS['batch_size']:
                    y_real_ = torch.ones(mini_batch)
                    y_fake_ = torch.zeros(mini_batch)
                    y_real_ = Variable(
                        y_real_.to(device))  #Modified for TPU purposes
                    y_fake_ = Variable(
                        y_fake_.to(device))  #Modified for TPU purposes

                x_d = torch.cat([x_, y_], 1)

                D_result = D_glob(x_d).squeeze()
                D_real_loss = BCE_loss(D_result, y_real_)
                D_real_loss.backward()

                #Fake examples
                z_ = torch.randn((mini_batch, FLAGS['noise_size']))
                z_ = Variable(z_.to(device))

                #Generate fake images
                G_result, G_bg = G_bg(z_, y_)
                G_result_d = torch.cat([G_result, y_], 1)
                D_result = D_glob(G_result_d.detach()).squeeze()

                D_fake_loss = BCE_loss(D_result, y_fake_)
                D_fake_loss.backward()

                xm.optimizer_step(D_local_optimizer)

                D_train_loss = D_real_loss + D_fake_loss
                D_local_losses.append(D_train_loss.data[0])

            #Update generator G
            G_bg.zero_grad()
            D_result = D_glob(G_result_d).squeeze()

            G_train_loss = BCE_loss(D_result, y_real_)

            #Feature matching loss between generated image and corresponding ground truth
            FM_loss = criterionVGG(G_result, x_)

            #Branch-similar loss
            branch_sim_loss = mse_loss(torch.mul(G_result, (1 - y_reduced)),
                                       torch.mul(G_bg, (1 - y_reduced)))

            total_loss = G_train_loss + FLAGS['lambda_FM'] * FM_loss + FLAGS[
                'lambda_branch'] * branch_sim_loss
            total_loss.backward()

            xm.optimizer_step(G_local_optimizer)

            G_local_losses.append(G_train_loss.data[0])

            xm.master_print('loss_d: %.3f, loss_g: %.3f' %
                            (D_train_loss.data[0], G_train_loss.data[0]))
            if (num_iter % 100) == 0:
                xm.master_print('%d - %d complete!' % ((epoch + 1), num_iter))
                xm.master_print(result_folder_name)

        #Modified location of the scheduler step to avoid warning
        scheduler_G.step()
        scheduler_D.step()

        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time
        xm.master_print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' %
                        ((epoch + 1), FLAGS['train_epoch'], per_epoch_ptime,
                         torch.mean(torch.FloatTensor(D_local_losses)),
                         torch.mean(torch.FloatTensor(G_local_losses))))

        #Save images
        G_bg.eval()
        G_result, G_bg = G_bg(z_fixed, y_fixed)
        G_bg.train()

        if epoch == 0:
            for t in range(y_fixed.size()[1]):
                show_result((epoch + 1),
                            y_fixed[:, t:t + 1, :, :],
                            save=True,
                            path=root + result_folder_name + '/' + model +
                            str(epoch + 1) + '_masked.png')

        show_result((epoch + 1),
                    G_result,
                    save=True,
                    path=root + result_folder_name + '/' + model +
                    str(epoch + 1) + '.png')
        show_result((epoch + 1),
                    G_bg,
                    save=True,
                    path=root + result_folder_name + '/' + model +
                    str(epoch + 1) + '_bg.png')

        #Save model params - Modified for TPU purposes
        if FLAGS['save_models'] and (epoch > 21 and epoch % 10 == 0):
            xser.save(G_bg.state_dict(),
                      root + model_folder_name + '/' + model + 'G_bg_epoch_' +
                      str(epoch) + '.pth',
                      master_only=True)

            xser.save(D_glob.state_dict(),
                      root + model_folder_name + '/' + model +
                      'D_glob_epoch_' + str(epoch) + '.pth',
                      master_only=True)

    end_time = time.time()
    total_ptime = end_time - start_time
    xm.master_print("Training finish!... save training results")
    xm.master_print('Training time: ' + str(total_ptime))
コード例 #19
0
def main(rank):
    
    #Seed - Added for TPU purposes
    torch.manual_seed(1)
       
    #Create log folder
    root = 'result_fg/'
    model = 'coco_model_'
    result_folder_name = 'images_' + FLAGS['log_dir']
    model_folder_name = 'models_' + FLAGS['log_dir']
    if not os.path.isdir(root):
        os.mkdir(root)
    if not os.path.isdir(root + result_folder_name):
        os.mkdir(root + result_folder_name)
    if not os.path.isdir(root + model_folder_name):
        os.mkdir(root + model_folder_name)
    
    #Save the script
    copyfile(os.path.basename(__file__), root + result_folder_name + '/' + os.path.basename(__file__))
    
    #Define transformation for dataset images - e.g scaling
    transform = transforms.Compose(
        [
            transforms.Scale((FLAGS['img_size'],FLAGS['img_size'])),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]
    ) 
    #Load dataset
    category_names = FLAGS['category_names'].split(',')
    
    #Serial Executor - This is needed to spread inside TPU for memory purposes
    SERIAL_EXEC = xmp.MpSerialExecutor()
    
    #Define Dataset
    dataset = SERIAL_EXEC.run(
        lambda: CocoData(
            root = FLAGS['train_imgs_path'],
            annFile = FLAGS['train_annotation_path'],
            category_names = category_names,
            transform=transform,
            final_img_size=FLAGS['img_size']
        )
    )
    
    #Discard images contain very small instances  
    dataset.discard_small(min_area=0.03, max_area=1)
    
    #Define data sampler - Added for TPU purposes
    train_sampler = DistributedSampler(
        dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=True
    )

    #Define data loader
    train_loader = DataLoader( #Modified for TPU purposes
        dataset,
        batch_size=FLAGS['batch_size'],
        sampler=train_sampler,
        num_workers=FLAGS['num_workers']
        # shuffle=True
    )

    #Define device - Added for TPU purposes
    device = xm.xla_device(devkind='TPU')

    #For evaluation define fixed masks and noises
    data_iter = iter(train_loader)
    sample_batched = data_iter.next()  
    x_fixed = sample_batched['image'][0:FLAGS['num_test_img']]
    x_fixed = Variable(x_fixed.to(device))
    y_fixed = sample_batched['single_fg_mask'][0:FLAGS['num_test_img']]
    y_fixed = Variable(y_fixed.to(device))
    z_fixed = torch.randn((FLAGS['num_test_img'],FLAGS['noise_size']))
    z_fixed = Variable(z_fixed.to(device))
    
    #Define networks
    generator = Generator_FG(
        z_dim=FLAGS['noise_size'],
        label_channel=len(category_names),
        num_res_blocks=FLAGS['num_res_blocks']
    )

    discriminator_glob = Discriminator(
        channels=3+len(category_names)
    )

    discriminator_instance = Discriminator(
        channels=3+len(category_names),
        input_size=FLAGS['local_patch_size']
    )

    WRAPPED_GENERATOR = xmp.MpModelWrapper(generator) #Added for TPU purposes
    WRAPPED_DISCRIMINATOR_GLOB = xmp.MpModelWrapper(discriminator) #Added for TPU purposes
    WRAPPED_DISCRIMINATOR_INSTANCE = xmp.MpModelWrapper(discriminator) #Added for TPU purposes

    G_fg = WRAPPED_GENERATOR.to(device) #Modified for TPU purposes
    D_glob = WRAPPED_DISCRIMINATOR.to(device) #Modified for TPU purposes
    D_instance = WRAPPED_DISCRIMINATOR.to(device) #Modified for TPU purposes
    
    #Load parameters from pre-trained models
    if FLAGS['pre_trained_model_path'] != None and FLAGS['pre_trained_model_epoch'] != None:
        try:
            G_fg.load_state_dict(xser.load(FLAGS['pre_trained_model_path'] + 'G_fg_epoch_' + FLAGS['pre_trained_model_epoch']))
            D_glob.load_state_dict(xser.load(FLAGS['pre_trained_model_path'] + 'D_glob_epoch_' + FLAGS['pre_trained_model_epoch']))
            D_instance.load_state_dict(xser.load(FLAGS['pre_trained_model_path'] + 'D_local_epoch_' + FLAGS['pre_trained_model_epoch']))
  
            xm.master_print('Parameters are loaded!')
        except:
            xm.master_print('Error: Pre-trained parameters are not loaded!')
            pass
    
    #Define interpolation operation
    up_instance =  nn.Upsample(
        size=(FLAGS['local_patch_size'],FLAGS['local_patch_size']),
        mode='bilinear'
    )
    
    #Define pooling operation for the case that image size and local patch size are mismatched
    pooling_instance = nn.Sequential()
    if FLAGS['local_patch_size']!=FLAGS['img_size']:
        pooling_instance.add_module(
            '0',
            nn.AvgPool2d(int(FLAGS['img_size']/FLAGS['local_patch_size']))
        )
        
    #Define training loss function - binary cross entropy
    BCE_loss = nn.BCELoss()
    
    #Define feature matching loss
    criterionVGG = VGGLoss()
    criterionVGG = criterionVGG.to(device) #Modified for TPU Purposes
         
    #Define optimizer
    G_local_optimizer = optim.Adam(
        G_fg.parameters(),
        lr=FLAGS['lr'],
        betas=(0.0, 0.9)
    )
    D_local_optimizer = optim.Adam(
        list(filter(lambda p: p.requires_grad, D_glob.parameters())) + list(filter(lambda p: p.requires_grad, D_instance.parameters())),
        lr=FLAGS['lr'],
        betas=(0.0,0.9)
    )

    #Deine learning rate scheduler
    scheduler_G = lr_scheduler.StepLR(
        G_local_optimizer,
        step_size=FLAGS['optim_step_size'],
        gamma=FLAGS['optim_gamma']
    )
    scheduler_D = lr_scheduler.StepLR(
        D_local_optimizer,
        step_size=FLAGS['optim_step_size'],
        gamma=FLAGS['optim_gamma']
    )
    
    #----------------------------TRAIN-----------------------------------------
    xm.master_print('training start!')
    tracker = xm.RateTracker() #Added for TPU reasons
    start_time = time.time()
    
    for epoch in range(FLAGS['train_epoch']):
        epoch_start_time = time.time()
        para_loader = pl.ParallelLoader(train_loader, [device]) #Added for TPU purposes
        loader = para_loader.per_device_loader(device) #Added for TPU purposes
         
        D_local_losses = []
        G_local_losses = []
    
        y_real_ = torch.ones(FLAGS['batch_size'])
        y_fake_ = torch.zeros(FLAGS['batch_size'])
        y_real_ = Variable(y_real_.to(device)) #Modified for TPU purposes
        y_fake_ = Variable(y_fake_.to(device)) #Modified for TPU purposes

        data_iter = iter(loader)
        num_iter = 0

        while num_iter < len(loader): #Modified for TPU purposes 
            j=0
            while j < FLAGS['critic_iter'] and num_iter < len(loader):
                j += 1
                sample_batched = data_iter.next()  
                num_iter += 1

                x_ = sample_batched['image']
                x_ = Variable(x_.to(device)) #Modified for TPU purposes

                y_ = sample_batched['single_fg_mask']
                y_ = Variable(y_.to(device)) #Modified for TPU purposes

                fg_mask = sample_batched['seg_mask']
                fg_mask = Variable(fg_mask.to(device)) #Modified for TPU purposes

                y_instances = sample_batched['mask_instance']
                bbox = sample_batched['bbox']
                
                mini_batch = x_.size()[0]
                if mini_batch != FLAGS['batch_size']:
                    break
                
                #Update discriminators - D 
                #Real examples
                D_glob.zero_grad()
                D_instance.zero_grad()
                    
                y_reduced = torch.sum(y_,1).clamp(0,1).view(y_.size(0),1,FLAGS['img_size'],FLAGS['img_size'])
                
                x_d = torch.cat([x_,fg_mask],1)
                
                x_instances = torch.zeros((FLAGS['batch_size'],3,FLAGS['local_patch_size'],FLAGS['local_patch_size']))
                x_instances = Variable(x_instances.to(device))
                y_instances = Variable(y_instances.to(device))
                y_instances = pooling_instance(y_instances)
                G_instances = torch.zeros((FLAGS['batch_size'],3,FLAGS['local_patch_size'],FLAGS['local_patch_size']))
                G_instances = Variable(G_instances.to(device))
                      
                #Obtain instances
                for t in range(x_d.size()[0]):
                    x_instance = x_[t,0:3,bbox[0][t]:bbox[1][t],bbox[2][t]:bbox[3][t]] 
                    x_instance = x_instance.contiguous().view(1,x_instance.size()[0],x_instance.size()[1],x_instance.size()[2]) 
                    x_instances[t] = up_instance(x_instance)
                    
                D_result_instance = D_instance(torch.cat([x_instances,y_instances],1)).squeeze()       
                D_result = D_glob(x_d).squeeze()
                D_real_loss = BCE_loss(D_result, y_real_) +  BCE_loss(D_result_instance, y_real_)
                D_real_loss.backward()
                
                #Fake examples
                z_ = torch.randn((mini_batch,FLAGS['noise_size']))
                z_ = Variable(z_.to(device))
    
                #Generate fake images
                G_fg_result = G_fg(z_,y_, torch.mul(x_,(1-y_reduced)))
                G_result_d = torch.cat([G_fg_result,fg_mask],1) 
                            
                #Obtain fake instances
                for t in range(x_d.size()[0]):
                    G_instance = G_result_d[t,0:3,bbox[0][t]:bbox[1][t],bbox[2][t]:bbox[3][t]] 
                    G_instance = G_instance.contiguous().view(1,G_instance.size()[0],G_instance.size()[1],G_instance.size()[2]) 
                    G_instances[t] = up_instance(G_instance)
                
                
                D_result_instance = D_instance(torch.cat([G_instances,y_instances],1).detach()).squeeze() 
                D_result = D_glob(G_result_d.detach()).squeeze() 
                D_fake_loss = BCE_loss(D_result, y_fake_) +  BCE_loss(D_result_instance, y_fake_)
                D_fake_loss.backward()

                xm.optimizer_step(D_local_optimizer) #Modified for TPU purposes
                
                D_train_loss = D_real_loss + D_fake_loss
                D_local_losses.append(D_train_loss.data[0])
    
            if mini_batch != FLAGS['batch_size']:
                break  
            
            #Update generator G
            G_fg.zero_grad()   
            D_result = D_glob(G_result_d).squeeze() 
            D_result_instance = D_instance(torch.cat([G_instances,y_instances],1)).squeeze() 
            G_train_loss = (1-FLAGS['trade_off_G'])*BCE_loss(D_result, y_real_) + FLAGS['trade_off_G']*BCE_loss(D_result_instance, y_real_) 
            
            #Feature matching loss between generated image and corresponding ground truth
            FM_loss = criterionVGG(G_fg_result, x_)
            
            #Reconstruction loss
            Recon_loss = mse_loss(torch.mul(x_,(1-y_reduced) ), torch.mul(G_fg_result,(1-y_reduced))  )
    
            total_loss = G_train_loss + FLAGS['lambda_FM']*FM_loss + FLAGS['lambda_recon']*Recon_loss
            total_loss.backward() 

            xm.optimizer_step(G_local_optimizer)

            G_local_losses.append(G_train_loss.data[0])
    
            xm.master_print('loss_d: %.3f, loss_g: %.3f' % (D_train_loss.data[0],G_train_loss.data[0]))
            if (num_iter % 100) == 0:
                xm.master_print('%d - %d complete!' % ((epoch+1), num_iter))
                xm.master_print(result_folder_name)

        #Modified location of the scheduler step to avoid warning
        scheduler_G.step()
        scheduler_D.step()

        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time
        xm.master_print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), FLAGS['train_epoch'], per_epoch_ptime, torch.mean(torch.FloatTensor(D_local_losses)), torch.mean(torch.FloatTensor(G_local_losses))))
    
        #Save images
        G_fg.eval()
        
        if epoch == 0:
            show_result(
                (epoch+1),
                x_fixed,
                save=True,
                path=root + result_folder_name+ '/' + model + str(epoch + 1 ) + '_gt.png'
            )
            for t in range(y_fixed.size()[1]):
                show_result(
                    (epoch+1),
                    y_fixed[:,t:t+1,:,:],
                    save=True,
                    path=root + result_folder_name+ '/' + model + str(epoch + 1 ) +'_'+ str(t) +'_masked.png'
                )
            
        show_result(
            (epoch+1),
            G_fg(
                z_fixed,
                y_fixed,
                torch.mul(
                    x_fixed,
                    (1-torch.sum(y_fixed,1).view(y_fixed.size(0),1,FLAGS['img_size'],FLAGS['img_size']))
                )
            ),
            save=True,
            path=root + result_folder_name+ '/' + model + str(epoch + 1 ) + '_fg.png'
        )
        
        G_fg.train()
        
        #Save model params
        if FLAGS['save_models'] and (epoch>11 and epoch % 10 == 0 ):
            xser.save(
                G_fg.state_dict(),
                root + model_folder_name + '/' + model + 'G_fg_epoch_'+str(epoch)+'.pth'
                master_only=True
            )
            xser.save(
                D_glob.state_dict(),
                root + model_folder_name + '/' + model + 'D_glob_epoch_'+str(epoch)+'.pth'
                master_only=True
            )
            xser.save(
                D_instance.state_dict(),
                root + model_folder_name + '/' + model + 'D_local_epoch_'+str(epoch)+'.pth'
                master_only=True
            )
                         
    end_time = time.time()
    total_ptime = end_time - start_time
    xm.master_print("Training finish!... save training results")
    xm.master_print('Training time: ' + str(total_ptime))
コード例 #20
0
ファイル: regression-TF.py プロジェクト: Dasona/DIGITS
 def loss(self):
     label = tf.reshape(self.y, shape=[-1, 2])
     model = self.inference
     loss = digits.mse_loss(model, label)
     return loss
コード例 #21
0
def train():
    method = train_parameters['method']
    print(method)
    save_dir = train_parameters['save_dir']
    print(save_dir)

    
    train_reader = paddle.batch(SH_data_loader('/home/aistudio/sh/sh/part_B_final/train_data/images/', size=[256, 512], mode='train', scale=8),
                                batch_size=train_parameters['train_batch_size'],
                                drop_last=False)
    test_reader = paddle.batch(SH_data_loader('/home/aistudio/sh/sh/part_B_final/test_data/images/', size=[256, 512],mode='val', scale=8),
                                batch_size=1,
                                drop_last=False)
    
    with fluid.dygraph.guard():
        epoch_num = train_parameters["num_epochs"] # 5
        print("epocj_num", epoch_num)
        
       
		print("CSR")
		net = CSRNet("CSR")
            
       
        
        print('train')
        optimizer = optimizer_setting(train_parameters)
        #optimizer = fluid.optimizer.SGD(1e-6,momentum=0.95)
        
        if train_parameters["continue_train"]:
            # 加载上一次训练的模型,继续训练
            
            model, _ = fluid.load_dygraph(train_parameters['continue_train_dir'])
            net.load_dict(model)
            optimizer.set_dict(_)
            print('继续训练', train_parameters['continue_train_dir'])
        
        best_mae = 1000000
        min_epoch=0
        for epoch in range(epoch_num):
          
            epoch_loss = 0
            #mae = 0
            for batch_id, data in enumerate(train_reader()):
                image = np.array([x[0] for x in data]).astype('float32')
                label = np.array([x[1] for x in data]).astype('float32')
        
                image = fluid.dygraph.to_variable(image)
                label = fluid.dygraph.to_variable(label)
                label.stop_gradient = True
                predict = net(image)
                loss = mse_loss(predict, label)
                backward_strategy = fluid.dygraph.BackwardStrategy()
                backward_strategy.sort_sum_gradient = True
                loss.backward(backward_strategy)
                epoch_loss+=loss.numpy()[0]
                #print(net._x_for_debug.gradient())
                optimizer.minimize(loss)
                net.clear_gradients()
                #mae+=abs(predict.numpy().sum()-label.numpy().sum())
            print('epoch:', epoch, 'loss:', epoch_loss)
                
            # dy_param_value = {}
            # for param in net.parameters():
            #     dy_param_value[param.name] = param.numpy()
           
            # fluid.save_dygraph(net.state_dict(), save_dir + method + str(epoch))
            # fluid.save_dygraph(optimizer.state_dict(), save_dir + method + str(epoch))
            
            net.eval()
            mae=0
            mse = 0
            val_loss = 0
            for batch_id, data in enumerate(test_reader()):
                image = np.array([x[0] for x in data]).astype('float32')
                label = np.array([x[1] for x in data]).astype('float32')
         
                image = fluid.dygraph.to_variable(image)
                label = fluid.dygraph.to_variable(label)
                label.stop_gradient = True
                predict = net(image)
                loss = mse_loss(predict, label)
                val_loss += loss.numpy()[0]
                mae += abs(predict.numpy().sum()-label.numpy().sum())
                mse += (predict.numpy().sum()-label.numpy().sum())*(predict.numpy().sum()-label.numpy().sum())
            net.train()    
            if mae/(batch_id+1)<best_mae:
                best_mae=mae/(batch_id+1)
                min_epoch=epoch
                fluid.save_dygraph(net.state_dict(), save_dir + method + str(epoch))
                fluid.save_dygraph(optimizer.state_dict(), save_dir + method + str(epoch))
            print("test epoch:", str(epoch), 'loss:',val_loss, " error:", str(mae/(batch_id+1)), " min_mae:", str(best_mae), " min_epoch:", str(min_epoch), 
                    'mse:', mse/(batch_id+1), 'real:', label.numpy()[0].sum(), 'pre:', predict.numpy()[0].sum())
            del mae, mse, image, label, predict
コード例 #22
0
def training_procedure(FLAGS):
    """
    model definition
    """
    encoder = Encoder(nv_dim=FLAGS.nv_dim, nc_dim=FLAGS.nc_dim)
    encoder.apply(weights_init)

    decoder = Decoder(nv_dim=FLAGS.nv_dim, nc_dim=FLAGS.nc_dim)
    decoder.apply(weights_init)

    discriminator = Discriminator()
    discriminator.apply(weights_init)

    # load saved models if load_saved flag is true
    if FLAGS.load_saved:
        encoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.encoder_save)))
        decoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.decoder_save)))
        discriminator.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.discriminator_save)))
    """
    variable definition
    """
    real_domain_labels = 1
    fake_domain_labels = 0

    X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels,
                            FLAGS.image_size, FLAGS.image_size)
    X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels,
                            FLAGS.image_size, FLAGS.image_size)
    X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels,
                            FLAGS.image_size, FLAGS.image_size)

    domain_labels = torch.LongTensor(FLAGS.batch_size)
    """
    loss definitions
    """
    cross_entropy_loss = nn.CrossEntropyLoss()
    '''
    add option to run on GPU
    '''
    if FLAGS.cuda:
        encoder.cuda()
        decoder.cuda()
        discriminator.cuda()

        cross_entropy_loss.cuda()

        X_1 = X_1.cuda()
        X_2 = X_2.cuda()
        X_3 = X_3.cuda()

        domain_labels = domain_labels.cuda()
    """
    optimizer definition
    """
    auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) +
                                        list(decoder.parameters()),
                                        lr=FLAGS.initial_learning_rate,
                                        betas=(FLAGS.beta_1, FLAGS.beta_2))

    discriminator_optimizer = optim.Adam(list(discriminator.parameters()),
                                         lr=FLAGS.initial_learning_rate,
                                         betas=(FLAGS.beta_1, FLAGS.beta_2))

    generator_optimizer = optim.Adam(list(encoder.parameters()) +
                                     list(decoder.parameters()),
                                     lr=FLAGS.initial_learning_rate,
                                     betas=(FLAGS.beta_1, FLAGS.beta_2))
    """
    training
    """
    if torch.cuda.is_available() and not FLAGS.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')

    if not os.path.exists('reconstructed_images'):
        os.makedirs('reconstructed_images')

    # load_saved is false when training is started from 0th iteration
    if not FLAGS.load_saved:
        with open(FLAGS.log_file, 'w') as log:
            log.write('Epoch\tIteration\tReconstruction_loss\t')
            log.write(
                'Generator_loss\tDiscriminator_loss\tDiscriminator_accuracy\n')

    # load data set and create data loader instance
    print('Loading MNIST paired dataset...')
    paired_mnist = MNIST_Paired(root='mnist',
                                download=True,
                                train=True,
                                transform=transform_config)
    loader = cycle(
        DataLoader(paired_mnist,
                   batch_size=FLAGS.batch_size,
                   shuffle=True,
                   num_workers=0,
                   drop_last=True))

    # initialise variables
    discriminator_accuracy = 0.

    # initialize summary writer
    writer = SummaryWriter()

    for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch):
        print('')
        print(
            'Epoch #' + str(epoch) +
            '..........................................................................'
        )

        for iteration in range(int(len(paired_mnist) / FLAGS.batch_size)):
            # A. run the auto-encoder reconstruction
            image_batch_1, image_batch_2, labels_batch_1 = next(loader)

            auto_encoder_optimizer.zero_grad()

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)

            nv_1, nc_1 = encoder(Variable(X_1))
            nv_2, nc_2 = encoder(Variable(X_2))

            reconstructed_X_1 = decoder(nv_1, nc_2)
            reconstructed_X_2 = decoder(nv_2, nc_1)

            reconstruction_error_1 = mse_loss(reconstructed_X_1, Variable(X_1))
            reconstruction_error_1.backward(retain_graph=True)

            reconstruction_error_2 = mse_loss(reconstructed_X_2, Variable(X_2))
            reconstruction_error_2.backward()

            reconstruction_error = reconstruction_error_1 + reconstruction_error_2

            if FLAGS.train_auto_encoder:
                auto_encoder_optimizer.step()

            # B. run the adversarial part of the architecture

            # B. a) run the discriminator
            for i in range(FLAGS.discriminator_times):
                discriminator_optimizer.zero_grad()

                # train discriminator on real data
                domain_labels.fill_(real_domain_labels)

                image_batch_1, image_batch_2, labels_batch_1 = next(loader)

                X_1.copy_(image_batch_1)
                X_2.copy_(image_batch_2)

                real_output = discriminator(Variable(X_1), Variable(X_2))

                discriminator_real_error = FLAGS.disc_coef * cross_entropy_loss(
                    real_output, Variable(domain_labels))
                discriminator_real_error.backward()

                # train discriminator on fake data
                domain_labels.fill_(fake_domain_labels)

                image_batch_3, _, labels_batch_3 = next(loader)
                X_3.copy_(image_batch_3)

                nv_3, nc_3 = encoder(Variable(X_3))

                # reconstruction is taking common factor from X_1 and varying factor from X_3
                reconstructed_X_3_1 = decoder(nv_3, encoder(Variable(X_1))[1])

                fake_output = discriminator(Variable(X_1), reconstructed_X_3_1)

                discriminator_fake_error = FLAGS.disc_coef * cross_entropy_loss(
                    fake_output, Variable(domain_labels))
                discriminator_fake_error.backward()

                # total discriminator error
                discriminator_error = discriminator_real_error + discriminator_fake_error

                # calculate discriminator accuracy for this step
                target_true_labels = torch.cat((torch.ones(
                    FLAGS.batch_size), torch.zeros(FLAGS.batch_size)),
                                               dim=0)
                if FLAGS.cuda:
                    target_true_labels = target_true_labels.cuda()

                discriminator_predictions = torch.cat(
                    (real_output, fake_output), dim=0)
                _, discriminator_predictions = torch.max(
                    discriminator_predictions, 1)

                discriminator_accuracy = (discriminator_predictions.data
                                          == target_true_labels.long()).sum(
                                          ).item() / (FLAGS.batch_size * 2)

                if discriminator_accuracy < FLAGS.discriminator_limiting_accuracy and FLAGS.train_discriminator:
                    discriminator_optimizer.step()

            # B. b) run the generator
            for i in range(FLAGS.generator_times):

                generator_optimizer.zero_grad()

                image_batch_1, _, labels_batch_1 = next(loader)
                image_batch_3, __, labels_batch_3 = next(loader)

                domain_labels.fill_(real_domain_labels)
                X_1.copy_(image_batch_1)
                X_3.copy_(image_batch_3)

                nv_3, nc_3 = encoder(Variable(X_3))

                # reconstruction is taking common factor from X_1 and varying factor from X_3
                reconstructed_X_3_1 = decoder(nv_3, encoder(Variable(X_1))[1])

                output = discriminator(Variable(X_1), reconstructed_X_3_1)

                generator_error = FLAGS.gen_coef * cross_entropy_loss(
                    output, Variable(domain_labels))
                generator_error.backward()

                if FLAGS.train_generator:
                    generator_optimizer.step()

            # print progress after 10 iterations
            if (iteration + 1) % 10 == 0:
                print('')
                print('Epoch #' + str(epoch))
                print('Iteration #' + str(iteration))

                print('')
                print('Reconstruction loss: ' +
                      str(reconstruction_error.data.storage().tolist()[0]))
                print('Generator loss: ' +
                      str(generator_error.data.storage().tolist()[0]))

                print('')
                print('Discriminator loss: ' +
                      str(discriminator_error.data.storage().tolist()[0]))
                print('Discriminator accuracy: ' + str(discriminator_accuracy))

                print('..........')

            # write to log
            with open(FLAGS.log_file, 'a') as log:
                log.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(
                    epoch, iteration,
                    reconstruction_error.data.storage().tolist()[0],
                    generator_error.data.storage().tolist()[0],
                    discriminator_error.data.storage().tolist()[0],
                    discriminator_accuracy))

            # write to tensorboard
            writer.add_scalar(
                'Reconstruction loss',
                reconstruction_error.data.storage().tolist()[0],
                epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) +
                iteration)
            writer.add_scalar(
                'Generator loss',
                generator_error.data.storage().tolist()[0],
                epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) +
                iteration)
            writer.add_scalar(
                'Discriminator loss',
                discriminator_error.data.storage().tolist()[0],
                epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) +
                iteration)

        # save model after every 5 epochs
        if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch:
            torch.save(encoder.state_dict(),
                       os.path.join('checkpoints', FLAGS.encoder_save))
            torch.save(decoder.state_dict(),
                       os.path.join('checkpoints', FLAGS.decoder_save))
            torch.save(discriminator.state_dict(),
                       os.path.join('checkpoints', FLAGS.discriminator_save))
            """
            save reconstructed images and style swapped image generations to check progress
            """
            image_batch_1, image_batch_2, labels_batch_1 = next(loader)
            image_batch_3, _, __ = next(loader)

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)
            X_3.copy_(image_batch_3)

            nv_1, nc_1 = encoder(Variable(X_1))
            nv_2, nc_2 = encoder(Variable(X_2))
            nv_3, nc_3 = encoder(Variable(X_3))

            reconstructed_X_1 = decoder(nv_1, nc_2)
            reconstructed_X_3_2 = decoder(nv_3, nc_2)

            # save input image batch
            image_batch = np.transpose(X_1.cpu().numpy(), (0, 2, 3, 1))
            image_batch = np.concatenate(
                (image_batch, image_batch, image_batch), axis=3)
            imshow_grid(image_batch, name=str(epoch) + '_original', save=True)

            # save reconstructed batch
            reconstructed_x = np.transpose(
                reconstructed_X_1.cpu().data.numpy(), (0, 2, 3, 1))
            reconstructed_x = np.concatenate(
                (reconstructed_x, reconstructed_x, reconstructed_x), axis=3)
            imshow_grid(reconstructed_x,
                        name=str(epoch) + '_target',
                        save=True)

            # save cross reconstructed batch
            style_batch = np.transpose(X_3.cpu().numpy(), (0, 2, 3, 1))
            style_batch = np.concatenate(
                (style_batch, style_batch, style_batch), axis=3)
            imshow_grid(style_batch, name=str(epoch) + '_style', save=True)

            reconstructed_style = np.transpose(
                reconstructed_X_3_2.cpu().data.numpy(), (0, 2, 3, 1))
            reconstructed_style = np.concatenate(
                (reconstructed_style, reconstructed_style,
                 reconstructed_style),
                axis=3)
            imshow_grid(reconstructed_style,
                        name=str(epoch) + '_style_target',
                        save=True)
コード例 #23
0
 def loss(self):
     label = tf.reshape(self.y, shape=[-1, 2])
     model = self.inference
     loss = digits.mse_loss(model, label)
     return loss
コード例 #24
0
			# augmented_batch = augment_batch(X1)
			augmented_batch, mask = get_augmentations_and_mask(X1)

			encoder_outputs = encoder(X1)
			specified_latents, unspecified_variational_latent, mu, logvar = encoder_outputs[0], encoder_outputs[1], encoder_outputs[2], encoder_outputs[3]

			augmented_encoder_outputs = encoder(augmented_batch)
			aug_specified_latents, aug_unspecified_variational_latent, aug_mu, aug_logvar = augmented_encoder_outputs[0], augmented_encoder_outputs[1], augmented_encoder_outputs[2], augmented_encoder_outputs[3]
			
			# kl loss
			kl_loss = FLAGS.kl_divergence_coef * (-0.5 * (torch.sum(1 + logvar - mu.pow(2) - logvar.exp())))
			kl_loss /= FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size
			
			# reconstruction loss batch
			image_batch_recon = decoder(specified_latents, unspecified_variational_latent)
			recon_loss = mse_loss(image_batch_recon, X1)

			gen_loss = recon_loss + kl_loss

			# center loss
			cv, cv_full_view = cv_network(specified_latents)
			transformed_chunks = torch.zeros(FLAGS.batch_size*FLAGS.z_num_chunks, FLAGS.c_num_chunks*FLAGS.c_chunk_size)

			with torch.no_grad():

				for i in range(FLAGS.batch_size):
					
					transformed_temp_chunks = []

					for j in range(FLAGS.z_num_chunks):
						curr_tensor = specified_latents[j][i]
コード例 #25
0
def training_procedure(FLAGS):
    """
    model definition
    """
    encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    encoder.apply(weights_init)

    decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    decoder.apply(weights_init)

    # load saved models if load_saved flag is true
    if FLAGS.load_saved:
        encoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.encoder_save)))
        decoder.load_state_dict(
            torch.load(os.path.join('checkpoints', FLAGS.decoder_save)))
    """
    variable definition
    """

    X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels,
                            FLAGS.image_size, FLAGS.image_size)
    X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels,
                            FLAGS.image_size, FLAGS.image_size)
    X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels,
                            FLAGS.image_size, FLAGS.image_size)

    style_latent_space = torch.FloatTensor(FLAGS.batch_size, FLAGS.style_dim)
    """
    loss definitions
    """
    cross_entropy_loss = nn.CrossEntropyLoss()
    '''
    add option to run on GPU
    '''
    if FLAGS.cuda:
        encoder.cuda()
        decoder.cuda()

        cross_entropy_loss.cuda()

        X_1 = X_1.cuda()
        X_2 = X_2.cuda()
        X_3 = X_3.cuda()

        style_latent_space = style_latent_space.cuda()
    """
    optimizer and scheduler definition
    """
    auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) +
                                        list(decoder.parameters()),
                                        lr=FLAGS.initial_learning_rate,
                                        betas=(FLAGS.beta_1, FLAGS.beta_2))

    reverse_cycle_optimizer = optim.Adam(list(encoder.parameters()),
                                         lr=FLAGS.initial_learning_rate,
                                         betas=(FLAGS.beta_1, FLAGS.beta_2))

    # divide the learning rate by a factor of 10 after 80 epochs
    auto_encoder_scheduler = optim.lr_scheduler.StepLR(auto_encoder_optimizer,
                                                       step_size=80,
                                                       gamma=0.1)
    reverse_cycle_scheduler = optim.lr_scheduler.StepLR(
        reverse_cycle_optimizer, step_size=80, gamma=0.1)
    """
    training
    """
    if torch.cuda.is_available() and not FLAGS.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')

    if not os.path.exists('reconstructed_images'):
        os.makedirs('reconstructed_images')

    # load_saved is false when training is started from 0th iteration
    if not FLAGS.load_saved:
        with open(FLAGS.log_file, 'w') as log:
            log.write(
                'Epoch\tIteration\tReconstruction_loss\tKL_divergence_loss\tReverse_cycle_loss\n'
            )

    # load data set and create data loader instance
    print('Loading MNIST paired dataset...')
    paired_mnist = MNIST_Paired(root='mnist',
                                download=True,
                                train=True,
                                transform=transform_config)
    loader = cycle(
        DataLoader(paired_mnist,
                   batch_size=FLAGS.batch_size,
                   shuffle=True,
                   num_workers=0,
                   drop_last=True))

    # initialize summary writer
    writer = SummaryWriter()

    for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch):
        print('')
        print(
            'Epoch #' + str(epoch) +
            '..........................................................................'
        )

        # update the learning rate scheduler
        auto_encoder_scheduler.step()
        reverse_cycle_scheduler.step()

        for iteration in range(int(len(paired_mnist) / FLAGS.batch_size)):
            # A. run the auto-encoder reconstruction
            image_batch_1, image_batch_2, _ = next(loader)

            auto_encoder_optimizer.zero_grad()

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)

            style_mu_1, style_logvar_1, class_latent_space_1 = encoder(
                Variable(X_1))
            style_latent_space_1 = reparameterize(training=True,
                                                  mu=style_mu_1,
                                                  logvar=style_logvar_1)

            kl_divergence_loss_1 = FLAGS.kl_divergence_coef * (
                -0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) -
                                 style_logvar_1.exp()))
            kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels *
                                     FLAGS.image_size * FLAGS.image_size)
            kl_divergence_loss_1.backward(retain_graph=True)

            style_mu_2, style_logvar_2, class_latent_space_2 = encoder(
                Variable(X_2))
            style_latent_space_2 = reparameterize(training=True,
                                                  mu=style_mu_2,
                                                  logvar=style_logvar_2)

            kl_divergence_loss_2 = FLAGS.kl_divergence_coef * (
                -0.5 * torch.sum(1 + style_logvar_2 - style_mu_2.pow(2) -
                                 style_logvar_2.exp()))
            kl_divergence_loss_2 /= (FLAGS.batch_size * FLAGS.num_channels *
                                     FLAGS.image_size * FLAGS.image_size)
            kl_divergence_loss_2.backward(retain_graph=True)

            reconstructed_X_1 = decoder(style_latent_space_1,
                                        class_latent_space_2)
            reconstructed_X_2 = decoder(style_latent_space_2,
                                        class_latent_space_1)

            reconstruction_error_1 = FLAGS.reconstruction_coef * mse_loss(
                reconstructed_X_1, Variable(X_1))
            reconstruction_error_1.backward(retain_graph=True)

            reconstruction_error_2 = FLAGS.reconstruction_coef * mse_loss(
                reconstructed_X_2, Variable(X_2))
            reconstruction_error_2.backward()

            reconstruction_error = (
                reconstruction_error_1 +
                reconstruction_error_2) / FLAGS.reconstruction_coef
            kl_divergence_error = (kl_divergence_loss_1 + kl_divergence_loss_2
                                   ) / FLAGS.kl_divergence_coef

            auto_encoder_optimizer.step()

            # B. reverse cycle
            image_batch_1, _, __ = next(loader)
            image_batch_2, _, __ = next(loader)

            reverse_cycle_optimizer.zero_grad()

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)

            style_latent_space.normal_(0., 1.)

            _, __, class_latent_space_1 = encoder(Variable(X_1))
            _, __, class_latent_space_2 = encoder(Variable(X_2))

            reconstructed_X_1 = decoder(Variable(style_latent_space),
                                        class_latent_space_1.detach())
            reconstructed_X_2 = decoder(Variable(style_latent_space),
                                        class_latent_space_2.detach())

            style_mu_1, style_logvar_1, _ = encoder(reconstructed_X_1)
            style_latent_space_1 = reparameterize(training=False,
                                                  mu=style_mu_1,
                                                  logvar=style_logvar_1)

            style_mu_2, style_logvar_2, _ = encoder(reconstructed_X_2)
            style_latent_space_2 = reparameterize(training=False,
                                                  mu=style_mu_2,
                                                  logvar=style_logvar_2)

            reverse_cycle_loss = FLAGS.reverse_cycle_coef * l1_loss(
                style_latent_space_1, style_latent_space_2)
            reverse_cycle_loss.backward()
            reverse_cycle_loss /= FLAGS.reverse_cycle_coef

            reverse_cycle_optimizer.step()

            if (iteration + 1) % 10 == 0:
                print('')
                print('Epoch #' + str(epoch))
                print('Iteration #' + str(iteration))

                print('')
                print('Reconstruction loss: ' +
                      str(reconstruction_error.data.storage().tolist()[0]))
                print('KL-Divergence loss: ' +
                      str(kl_divergence_error.data.storage().tolist()[0]))
                print('Reverse cycle loss: ' +
                      str(reverse_cycle_loss.data.storage().tolist()[0]))

            # write to log
            with open(FLAGS.log_file, 'a') as log:
                log.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    epoch, iteration,
                    reconstruction_error.data.storage().tolist()[0],
                    kl_divergence_error.data.storage().tolist()[0],
                    reverse_cycle_loss.data.storage().tolist()[0]))

            # write to tensorboard
            writer.add_scalar(
                'Reconstruction loss',
                reconstruction_error.data.storage().tolist()[0],
                epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) +
                iteration)
            writer.add_scalar(
                'KL-Divergence loss',
                kl_divergence_error.data.storage().tolist()[0],
                epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) +
                iteration)
            writer.add_scalar(
                'Reverse cycle loss',
                reverse_cycle_loss.data.storage().tolist()[0],
                epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) +
                iteration)

        # save model after every 5 epochs
        if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch:
            torch.save(encoder.state_dict(),
                       os.path.join('checkpoints', FLAGS.encoder_save))
            torch.save(decoder.state_dict(),
                       os.path.join('checkpoints', FLAGS.decoder_save))
            """
            save reconstructed images and style swapped image generations to check progress
            """
            image_batch_1, image_batch_2, _ = next(loader)
            image_batch_3, _, __ = next(loader)

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)
            X_3.copy_(image_batch_3)

            style_mu_1, style_logvar_1, _ = encoder(Variable(X_1))
            _, __, class_latent_space_2 = encoder(Variable(X_2))
            style_mu_3, style_logvar_3, _ = encoder(Variable(X_3))

            style_latent_space_1 = reparameterize(training=False,
                                                  mu=style_mu_1,
                                                  logvar=style_logvar_1)
            style_latent_space_3 = reparameterize(training=False,
                                                  mu=style_mu_3,
                                                  logvar=style_logvar_3)

            reconstructed_X_1_2 = decoder(style_latent_space_1,
                                          class_latent_space_2)
            reconstructed_X_3_2 = decoder(style_latent_space_3,
                                          class_latent_space_2)

            # save input image batch
            image_batch = np.transpose(X_1.cpu().numpy(), (0, 2, 3, 1))
            image_batch = np.concatenate(
                (image_batch, image_batch, image_batch), axis=3)
            imshow_grid(image_batch, name=str(epoch) + '_original', save=True)

            # save reconstructed batch
            reconstructed_x = np.transpose(
                reconstructed_X_1_2.cpu().data.numpy(), (0, 2, 3, 1))
            reconstructed_x = np.concatenate(
                (reconstructed_x, reconstructed_x, reconstructed_x), axis=3)
            imshow_grid(reconstructed_x,
                        name=str(epoch) + '_target',
                        save=True)

            style_batch = np.transpose(X_3.cpu().numpy(), (0, 2, 3, 1))
            style_batch = np.concatenate(
                (style_batch, style_batch, style_batch), axis=3)
            imshow_grid(style_batch, name=str(epoch) + '_style', save=True)

            # save style swapped reconstructed batch
            reconstructed_style = np.transpose(
                reconstructed_X_3_2.cpu().data.numpy(), (0, 2, 3, 1))
            reconstructed_style = np.concatenate(
                (reconstructed_style, reconstructed_style,
                 reconstructed_style),
                axis=3)
            imshow_grid(reconstructed_style,
                        name=str(epoch) + '_style_target',
                        save=True)
def training_procedure(FLAGS):
    """
    model definition
    """
    encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    encoder.apply(weights_init)

    decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim)
    decoder.apply(weights_init)

    discriminator = Discriminator()
    discriminator.apply(weights_init)

    # load saved models if load_saved flag is true
    if FLAGS.load_saved:
        encoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.encoder_save)))
        decoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.decoder_save)))
        discriminator.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.discriminator_save)))

    """
    variable definition
    """
    real_domain_labels = 1
    fake_domain_labels = 0

    X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)
    X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)
    X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)

    domain_labels = torch.LongTensor(FLAGS.batch_size)
    style_latent_space = torch.FloatTensor(FLAGS.batch_size, FLAGS.style_dim)

    """
    loss definitions
    """
    cross_entropy_loss = nn.CrossEntropyLoss()

    '''
    add option to run on GPU
    '''
    if FLAGS.cuda:
        encoder.cuda()
        decoder.cuda()
        discriminator.cuda()

        cross_entropy_loss.cuda()

        X_1 = X_1.cuda()
        X_2 = X_2.cuda()
        X_3 = X_3.cuda()

        domain_labels = domain_labels.cuda()
        style_latent_space = style_latent_space.cuda()

    """
    optimizer definition
    """
    auto_encoder_optimizer = optim.Adam(
        list(encoder.parameters()) + list(decoder.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    discriminator_optimizer = optim.Adam(
        list(discriminator.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    generator_optimizer = optim.Adam(
        list(encoder.parameters()) + list(decoder.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    """
    training
    """
    if torch.cuda.is_available() and not FLAGS.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    if not os.path.exists('checkpoints'):
        os.makedirs('checkpoints')

    # load_saved is false when training is started from 0th iteration
    if not FLAGS.load_saved:
        with open(FLAGS.log_file, 'w') as log:
            log.write('Epoch\tIteration\tReconstruction_loss\tKL_divergence_loss\t')
            log.write('Generator_loss\tDiscriminator_loss\tDiscriminator_accuracy\n')

    # load data set and create data loader instance
    print('Loading MNIST paired dataset...')
    paired_mnist = MNIST_Paired(root='mnist', download=True, train=True, transform=transform_config)
    loader = cycle(DataLoader(paired_mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True))

    # initialise variables
    discriminator_accuracy = 0.

    # initialize summary writer
    writer = SummaryWriter()

    for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch):
        print('')
        print('Epoch #' + str(epoch) + '..........................................................................')

        for iteration in range(int(len(paired_mnist) / FLAGS.batch_size)):
            # A. run the auto-encoder reconstruction
            image_batch_1, image_batch_2, _ = next(loader)

            auto_encoder_optimizer.zero_grad()

            X_1.copy_(image_batch_1)
            X_2.copy_(image_batch_2)

            style_mu_1, style_logvar_1, class_1 = encoder(Variable(X_1))
            style_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1)

            kl_divergence_loss_1 = - 0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp())
            kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size)
            kl_divergence_loss_1.backward(retain_graph=True)

            _, __, class_2 = encoder(Variable(X_2))

            reconstructed_X_1 = decoder(style_1, class_1)
            reconstructed_X_2 = decoder(style_1, class_2)

            reconstruction_error_1 = mse_loss(reconstructed_X_1, Variable(X_1))
            reconstruction_error_1.backward(retain_graph=True)

            reconstruction_error_2 = mse_loss(reconstructed_X_2, Variable(X_1))
            reconstruction_error_2.backward()

            reconstruction_error = reconstruction_error_1 + reconstruction_error_2
            kl_divergence_error = kl_divergence_loss_1

            auto_encoder_optimizer.step()

            # B. run the generator
            for i in range(FLAGS.generator_times):

                generator_optimizer.zero_grad()

                image_batch_1, _, __ = next(loader)
                image_batch_3, _, __ = next(loader)

                domain_labels.fill_(real_domain_labels)
                X_1.copy_(image_batch_1)
                X_3.copy_(image_batch_3)

                style_mu_1, style_logvar_1, _ = encoder(Variable(X_1))
                style_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1)

                kl_divergence_loss_1 = - 0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp())
                kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size)
                kl_divergence_loss_1.backward(retain_graph=True)

                _, __, class_3 = encoder(Variable(X_3))
                reconstructed_X_1_3 = decoder(style_1, class_3)

                output_1 = discriminator(Variable(X_3), reconstructed_X_1_3)

                generator_error_1 = cross_entropy_loss(output_1, Variable(domain_labels))
                generator_error_1.backward(retain_graph=True)

                style_latent_space.normal_(0., 1.)
                reconstructed_X_latent_3 = decoder(Variable(style_latent_space), class_3)

                output_2 = discriminator(Variable(X_3), reconstructed_X_latent_3)

                generator_error_2 = cross_entropy_loss(output_2, Variable(domain_labels))
                generator_error_2.backward()

                generator_error = generator_error_1 + generator_error_2
                kl_divergence_error += kl_divergence_loss_1

                generator_optimizer.step()

            # C. run the discriminator
            for i in range(FLAGS.discriminator_times):

                discriminator_optimizer.zero_grad()

                # train discriminator on real data
                domain_labels.fill_(real_domain_labels)

                image_batch_1, _, __ = next(loader)
                image_batch_2, image_batch_3, _ = next(loader)

                X_1.copy_(image_batch_1)
                X_2.copy_(image_batch_2)
                X_3.copy_(image_batch_3)

                real_output = discriminator(Variable(X_2), Variable(X_3))

                discriminator_real_error = cross_entropy_loss(real_output, Variable(domain_labels))
                discriminator_real_error.backward()

                # train discriminator on fake data
                domain_labels.fill_(fake_domain_labels)

                style_mu_1, style_logvar_1, _ = encoder(Variable(X_1))
                style_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1)

                _, __, class_3 = encoder(Variable(X_3))
                reconstructed_X_1_3 = decoder(style_1, class_3)

                fake_output = discriminator(Variable(X_3), reconstructed_X_1_3)

                discriminator_fake_error = cross_entropy_loss(fake_output, Variable(domain_labels))
                discriminator_fake_error.backward()

                # total discriminator error
                discriminator_error = discriminator_real_error + discriminator_fake_error

                # calculate discriminator accuracy for this step
                target_true_labels = torch.cat((torch.ones(FLAGS.batch_size), torch.zeros(FLAGS.batch_size)), dim=0)
                if FLAGS.cuda:
                    target_true_labels = target_true_labels.cuda()

                discriminator_predictions = torch.cat((real_output, fake_output), dim=0)
                _, discriminator_predictions = torch.max(discriminator_predictions, 1)

                discriminator_accuracy = (discriminator_predictions.data == target_true_labels.long()
                                          ).sum().item() / (FLAGS.batch_size * 2)

                if discriminator_accuracy < FLAGS.discriminator_limiting_accuracy:
                    discriminator_optimizer.step()

            if (iteration + 1) % 50 == 0:
                print('')
                print('Epoch #' + str(epoch))
                print('Iteration #' + str(iteration))

                print('')
                print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0]))
                print('KL-Divergence loss: ' + str(kl_divergence_error.data.storage().tolist()[0]))

                print('')
                print('Generator loss: ' + str(generator_error.data.storage().tolist()[0]))
                print('Discriminator loss: ' + str(discriminator_error.data.storage().tolist()[0]))
                print('Discriminator accuracy: ' + str(discriminator_accuracy))

                print('..........')

            # write to log
            with open(FLAGS.log_file, 'a') as log:
                log.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n'.format(
                    epoch,
                    iteration,
                    reconstruction_error.data.storage().tolist()[0],
                    kl_divergence_error.data.storage().tolist()[0],
                    generator_error.data.storage().tolist()[0],
                    discriminator_error.data.storage().tolist()[0],
                    discriminator_accuracy
                ))

            # write to tensorboard
            writer.add_scalar('Reconstruction loss', reconstruction_error.data.storage().tolist()[0],
                              epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar('KL-Divergence loss', kl_divergence_error.data.storage().tolist()[0],
                              epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar('Generator loss', generator_error.data.storage().tolist()[0],
                              epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar('Discriminator loss', discriminator_error.data.storage().tolist()[0],
                              epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration)
            writer.add_scalar('Discriminator accuracy', discriminator_accuracy * 100,
                              epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration)

        # save model after every 5 epochs
        if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch:
            torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save))
            torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save))
            torch.save(discriminator.state_dict(), os.path.join('checkpoints', FLAGS.discriminator_save))
コード例 #27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_imgs', type=str, help='dataset path')
    parser.add_argument('--mask_imgs', type=str, help='dataset path')
    parser.add_argument('--log_dir',
                        type=str,
                        default='log',
                        help='Name of the log folder')
    parser.add_argument('--save_models',
                        type=bool,
                        default=True,
                        help='Set True if you want to save trained models')
    parser.add_argument('--pre_trained_model_path',
                        type=str,
                        default=None,
                        help='Pre-trained model path')
    parser.add_argument('--pre_trained_model_epoch',
                        type=str,
                        default=None,
                        help='Pre-trained model epoch e.g 200')
    parser.add_argument('--train_imgs_path',
                        type=str,
                        default='C:/Users/motur/coco/images/train2017',
                        help='Path to training images')
    parser.add_argument(
        '--train_annotation_path',
        type=str,
        default='C:/Users/motur/coco/annotations/instances_train2017.json',
        help='Path to annotation file, .json file')
    parser.add_argument('--category_names',
                        type=str,
                        default='giraffe,elephant,zebra,sheep,cow,bear',
                        help='List of categories in MS-COCO dataset')
    parser.add_argument('--num_test_img',
                        type=int,
                        default=16,
                        help='Number of images saved during training')
    parser.add_argument('--img_size',
                        type=int,
                        default=256,
                        help='Generated image size')
    parser.add_argument(
        '--local_patch_size',
        type=int,
        default=256,
        help='Image size of instance images after interpolation')
    parser.add_argument('--batch_size',
                        type=int,
                        default=16,
                        help='Mini-batch size')
    parser.add_argument('--train_epoch',
                        type=int,
                        default=20,
                        help='Maximum training epoch')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0002,
                        help='Initial learning rate')
    parser.add_argument('--optim_step_size',
                        type=int,
                        default=80,
                        help='Learning rate decay step size')
    parser.add_argument('--optim_gamma',
                        type=float,
                        default=0.5,
                        help='Learning rate decay ratio')
    parser.add_argument(
        '--critic_iter',
        type=int,
        default=5,
        help='Number of discriminator update against each generator update')
    parser.add_argument('--noise_size',
                        type=int,
                        default=128,
                        help='Noise vector size')
    parser.add_argument('--lambda_FM',
                        type=float,
                        default=1,
                        help='Trade-off param for feature matching loss')
    parser.add_argument('--lambda_recon',
                        type=float,
                        default=0.00001,
                        help='Trade-off param for reconstruction loss')
    parser.add_argument('--num_res_blocks',
                        type=int,
                        default=5,
                        help='Number of residual block in generator network')
    parser.add_argument(
        '--trade_off_G',
        type=float,
        default=0.1,
        help=
        'Trade-off parameter which controls gradient flow to generator from D_local and D_glob'
    )

    opt = parser.parse_args()
    print(opt)

    #Create log folder
    root = 'result_fg/' + opt.category_names + '/'
    model = 'coco_model_'
    result_folder_name = 'images_' + opt.log_dir
    model_folder_name = 'models_' + opt.log_dir
    if not os.path.isdir(root):
        os.makedirs(root)
    if not os.path.isdir(root + result_folder_name):
        os.makedirs(root + result_folder_name)
    if not os.path.isdir(root + model_folder_name):
        os.makedirs(root + model_folder_name)

    #Save the script
    copyfile(os.path.basename(__file__),
             root + result_folder_name + '/' + os.path.basename(__file__))

    #Define transformation for dataset images - e.g scaling
    transform = transforms.Compose([
        transforms.Scale((opt.img_size, opt.img_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    #Load dataset
    category_names = opt.category_names.split(',')
    allmasks = sorted(
        glob.glob(os.path.join(opt.mask_imgs, '**', '*.png'), recursive=True))
    print('Number of masks: %d' % len(allmasks))
    dataset = chairs(imfile=opt.train_imgs,
                     mfiles=allmasks,
                     category_names=category_names,
                     transform=transform,
                     final_img_size=opt.img_size)

    #Discard images contain very small instances
    # dataset.discard_small(min_area=0.03, max_area=1)

    #Define data loader
    train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True)

    #For evaluation define fixed masks and noises
    data_iter = iter(train_loader)
    sample_batched = data_iter.next()
    x_fixed = sample_batched['image'][0:opt.num_test_img]
    x_fixed = Variable(x_fixed.cuda())
    y_fixed = sample_batched['single_fg_mask'][0:opt.num_test_img]
    y_fixed = Variable(y_fixed.cuda())
    z_fixed = torch.randn((opt.num_test_img, opt.noise_size))
    z_fixed = Variable(z_fixed.cuda())

    #Define networks
    G_fg = Generator_FG(z_dim=opt.noise_size,
                        label_channel=len(category_names),
                        num_res_blocks=opt.num_res_blocks)
    D_glob = Discriminator(channels=3 + len(category_names))
    D_instance = Discriminator(channels=3 + len(category_names),
                               input_size=opt.local_patch_size)
    G_fg.cuda()
    D_glob.cuda()
    D_instance.cuda()

    #Load parameters from pre-trained models
    if opt.pre_trained_model_path != None and opt.pre_trained_model_epoch != None:
        try:
            G_fg.load_state_dict(
                torch.load(opt.pre_trained_model_path + 'G_fg_epoch_' +
                           opt.pre_trained_model_epoch))
            D_glob.load_state_dict(
                torch.load(opt.pre_trained_model_path + 'D_glob_epoch_' +
                           opt.pre_trained_model_epoch))
            D_instance.load_state_dict(
                torch.load(opt.pre_trained_model_path + 'D_local_epoch_' +
                           opt.pre_trained_model_epoch))
            print('Parameters are loaded!')
        except:
            print('Error: Pre-trained parameters are not loaded!')
            pass

    #Define interpolation operation
    up_instance = nn.Upsample(size=(opt.local_patch_size,
                                    opt.local_patch_size),
                              mode='bilinear')

    #Define pooling operation for the case that image size and local patch size are mismatched
    pooling_instance = nn.Sequential()
    if opt.local_patch_size != opt.img_size:
        pooling_instance.add_module(
            '0', nn.AvgPool2d(int(opt.img_size / opt.local_patch_size)))

    #Define training loss function - binary cross entropy
    BCE_loss = nn.BCELoss()

    #Define feature matching loss
    criterionVGG = VGGLoss()
    criterionVGG = criterionVGG.cuda()

    #Define optimizer
    G_local_optimizer = optim.Adam(G_fg.parameters(),
                                   lr=opt.lr,
                                   betas=(0.0, 0.9))
    D_local_optimizer = optim.Adam(
        list(filter(lambda p: p.requires_grad, D_glob.parameters())) +
        list(filter(lambda p: p.requires_grad, D_instance.parameters())),
        lr=opt.lr,
        betas=(0.0, 0.9))
    #Deine learning rate scheduler
    scheduler_G = lr_scheduler.StepLR(G_local_optimizer,
                                      step_size=opt.optim_step_size,
                                      gamma=opt.optim_gamma)
    scheduler_D = lr_scheduler.StepLR(D_local_optimizer,
                                      step_size=opt.optim_step_size,
                                      gamma=opt.optim_gamma)

    #----------------------------TRAIN-----------------------------------------
    print('training start!')
    start_time = time.time()

    for epoch in range(opt.train_epoch):
        epoch_start_time = time.time()

        scheduler_G.step()
        scheduler_D.step()

        D_local_losses = []
        G_local_losses = []

        y_real_ = torch.ones(opt.batch_size)
        y_fake_ = torch.zeros(opt.batch_size)
        y_real_, y_fake_ = Variable(y_real_.cuda()), Variable(y_fake_.cuda())

        data_iter = iter(train_loader)
        num_iter = 0
        while num_iter < len(train_loader):

            j = 0
            while j < opt.critic_iter and num_iter < len(train_loader):
                j += 1
                sample_batched = data_iter.next()
                num_iter += 1
                x_ = sample_batched['image']
                y_ = sample_batched['single_fg_mask']
                fg_mask = sample_batched['seg_mask']

                y_instances = sample_batched['mask_instance']
                bbox = sample_batched['bbox']

                mini_batch = x_.size()[0]
                if mini_batch != opt.batch_size:
                    break

                #Update discriminators - D
                #Real examples
                D_glob.zero_grad()
                D_instance.zero_grad()

                x_, y_ = Variable(x_.cuda()), Variable(y_.cuda())
                fg_mask = Variable(fg_mask.cuda())
                y_reduced = torch.sum(y_,
                                      1).clamp(0,
                                               1).view(y_.size(0), 1,
                                                       opt.img_size,
                                                       opt.img_size)

                x_d = torch.cat([x_, fg_mask], 1)

                x_instances = torch.zeros(
                    (opt.batch_size, 3, opt.local_patch_size,
                     opt.local_patch_size))
                x_instances = Variable(x_instances.cuda())
                y_instances = Variable(y_instances.cuda())
                y_instances = pooling_instance(y_instances)
                G_instances = torch.zeros(
                    (opt.batch_size, 3, opt.local_patch_size,
                     opt.local_patch_size))
                G_instances = Variable(G_instances.cuda())

                #Obtain instances
                for t in range(x_d.size()[0]):
                    x_instance = x_[t, 0:3, bbox[0][t]:bbox[1][t],
                                    bbox[2][t]:bbox[3][t]]
                    x_instance = x_instance.contiguous().view(
                        1,
                        x_instance.size()[0],
                        x_instance.size()[1],
                        x_instance.size()[2])
                    x_instances[t] = up_instance(x_instance)

                D_result_instance = D_instance(
                    torch.cat([x_instances, y_instances], 1)).squeeze()
                D_result = D_glob(x_d).squeeze()
                D_real_loss = BCE_loss(D_result, y_real_) + BCE_loss(
                    D_result_instance, y_real_)
                D_real_loss.backward()

                #Fake examples
                z_ = torch.randn((mini_batch, opt.noise_size))
                z_ = Variable(z_.cuda())

                #Generate fake images
                G_fg_result = G_fg(z_, y_, torch.mul(x_, (1 - y_reduced)))
                G_result_d = torch.cat([G_fg_result, fg_mask], 1)

                #Obtain fake instances
                for t in range(x_d.size()[0]):
                    G_instance = G_result_d[t, 0:3, bbox[0][t]:bbox[1][t],
                                            bbox[2][t]:bbox[3][t]]
                    G_instance = G_instance.contiguous().view(
                        1,
                        G_instance.size()[0],
                        G_instance.size()[1],
                        G_instance.size()[2])
                    G_instances[t] = up_instance(G_instance)

                D_result_instance = D_instance(
                    torch.cat([G_instances, y_instances],
                              1).detach()).squeeze()
                D_result = D_glob(G_result_d.detach()).squeeze()
                D_fake_loss = BCE_loss(D_result, y_fake_) + BCE_loss(
                    D_result_instance, y_fake_)
                D_fake_loss.backward()
                D_local_optimizer.step()

                D_train_loss = D_real_loss + D_fake_loss
                D_local_losses.append(D_train_loss.data)

            if mini_batch != opt.batch_size:
                break

            #Update generator G
            G_fg.zero_grad()
            D_result = D_glob(G_result_d).squeeze()
            D_result_instance = D_instance(
                torch.cat([G_instances, y_instances], 1)).squeeze()
            G_train_loss = (1 - opt.trade_off_G) * BCE_loss(
                D_result, y_real_) + opt.trade_off_G * BCE_loss(
                    D_result_instance, y_real_)

            #Feature matching loss between generated image and corresponding ground truth
            FM_loss = criterionVGG(G_fg_result, x_)

            #Reconstruction loss
            Recon_loss = mse_loss(torch.mul(x_, (1 - y_reduced)),
                                  torch.mul(G_fg_result, (1 - y_reduced)))

            total_loss = G_train_loss + opt.lambda_FM * FM_loss + opt.lambda_recon * Recon_loss
            total_loss.backward()
            G_local_optimizer.step()
            G_local_losses.append(G_train_loss.data)

            print('loss_d: %.3f, loss_g: %.3f' %
                  (D_train_loss.data, G_train_loss.data))
            if (num_iter % 100) == 0:
                print('%d - %d complete!' % ((epoch + 1), num_iter))
                print(result_folder_name)

        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time
        print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' %
              ((epoch + 1), opt.train_epoch, per_epoch_ptime,
               torch.mean(torch.FloatTensor(D_local_losses)),
               torch.mean(torch.FloatTensor(G_local_losses))))

        #Save images
        G_fg.eval()

        if epoch == 0:
            show_result_rgb((epoch + 1),
                            x_fixed,
                            save=True,
                            path=root + result_folder_name + '/' + model +
                            str(epoch + 1) + '_gt.png')
            for t in range(y_fixed.size()[1]):
                show_result_rgb((epoch + 1),
                                y_fixed[:, t:t + 1, :, :],
                                save=True,
                                path=root + result_folder_name + '/' + model +
                                str(epoch + 1) + '_' + str(t) + '_masked.png')

        show_result_rgb(
            (epoch + 1),
            G_fg(
                z_fixed, y_fixed,
                torch.mul(x_fixed, (1 - torch.sum(y_fixed, 1).view(
                    y_fixed.size(0), 1, opt.img_size, opt.img_size)))),
            save=True,
            path=root + result_folder_name + '/' + model + str(epoch + 1) +
            '_fg.png')
        G_fg.train()

        #Save model params
        if opt.save_models and (epoch > 11 and epoch % 10 == 0):
            torch.save(
                G_fg.state_dict(), root + model_folder_name + '/' + model +
                'G_fg_epoch_' + str(epoch) + '.pth')
            torch.save(
                D_glob.state_dict(), root + model_folder_name + '/' + model +
                'D_glob_epoch_' + str(epoch) + '.pth')
            torch.save(
                D_instance.state_dict(), root + model_folder_name + '/' +
                model + 'D_local_epoch_' + str(epoch) + '.pth')

    torch.save(
        G_fg.state_dict(), root + model_folder_name + '/' + model +
        'G_fg_epoch_' + str(epoch) + '.pth')
    torch.save(
        D_glob.state_dict(), root + model_folder_name + '/' + model +
        'D_glob_epoch_' + str(epoch) + '.pth')
    torch.save(
        D_instance.state_dict(), root + model_folder_name + '/' + model +
        'D_local_epoch_' + str(epoch) + '.pth')
    end_time = time.time()
    total_ptime = end_time - start_time
    print("Training finish!... save training results")
    print('Training time: ' + str(total_ptime))
コード例 #28
0
    def loss(self):
        y = tf.reshape(self.y, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        # For a fancy tensorboard summary: put the input, label and model side by side (sbs) for a fancy image summary:
        # tf.summary.image(sbs.op.name, sbs, max_outputs=3, collections=["training summary"])
        return digits.mse_loss(self.inference, y)