예제 #1
0
    def test_mean(test_case):
        input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32)
        of_out = flow.mean(input, dim=1)
        np_out = np.mean(input.numpy(), axis=1)
        test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))

        input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32)
        of_out = flow.mean(input, dim=0)
        np_out = np.mean(input.numpy(), axis=0)
        test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
예제 #2
0
def _test_mean(test_case, shape, device):
    input = flow.tensor(np.random.randn(*shape),
                        dtype=flow.float32,
                        device=flow.device(device))
    of_out = flow.mean(input, dim=1)
    np_out = np.mean(input.numpy(), axis=1)
    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001))
    input = flow.tensor(np.random.randn(*shape),
                        dtype=flow.float32,
                        device=flow.device(device))
    of_out = flow.mean(input, dim=0)
    np_out = np.mean(input.numpy(), axis=0)
    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001))
예제 #3
0
 def forward(self, x):
     """
     x: N x C x T
     """
     if x.dim() != 3:
         raise RuntimeError("{} accept 3D tensor as input".format(self.__name__))
     # N x 1 x 1
     mean = flow.mean(x, (1, 2), keepdim=True)
     var = flow.mean((x - mean) ** 2, (1, 2), keepdim=True)
     # N x C x T
     if self.elementwise_affine:
         x = self.gamma * (x - mean) / flow.sqrt(var + self.eps) + self.beta
     else:
         x = (x - mean) / flow.sqrt(var + self.eps)
     return x
예제 #4
0
def compare_loss(device_type, dim, reduction, cls, data_generator):
    x, y, x1, y1 = data_generator(dim, device_type, *get_sbp(device_type))
    reduce_loss_func = cls(reduction=reduction).to(device_type)
    none_loss_func = cls(reduction="none").to(device_type)

    loss_mean = reduce_loss_func(x, y)
    loss_none = (flow.mean(none_loss_func(x1, y1))
                 if reduction == "mean" else flow.sum(none_loss_func(x1, y1)))

    loss_mean.backward()
    loss_none.backward()

    assert np.allclose(
        loss_none.to_local().numpy(),
        loss_mean.to_local().numpy(),
        rtol=1e-05,
        atol=1e-05,
    )
    assert np.allclose(
        loss_none.numpy(),
        loss_mean.numpy(),
        rtol=1e-05,
        atol=1e-05,
    )
    assert np.allclose(
        x.grad.to_local().numpy(),
        x1.grad.to_local().numpy(),
        rtol=1e-05,
        atol=1e-05,
    )
예제 #5
0
def _test_mean_negative_dim(test_case, shape, device):
    if len(shape) < 4:
        shape = (2, 3, 4, 5)
    input = flow.tensor(np.random.randn(*shape),
                        dtype=flow.float32,
                        device=flow.device(device))
    of_out = flow.mean(input, dim=(-2, -1, -3))
    np_out = np.mean(input.numpy(), axis=(-2, -1, -3))
    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001))
예제 #6
0
파일: loss.py 프로젝트: zheddie/oneflow
 def forward(self, input, target):
     prob, out = self._op(input,
                          target,
                          depth=input.shape[len(input.shape) - 1])
     if self.reduction == "mean":
         return flow.mean(out)
     elif self.reduction == "sum":
         return flow.sum(out)
     else:
         return out
예제 #7
0
def _test_mean_backward(test_case, shape, device):
    np_arr = np.random.randn(*shape)
    x = flow.tensor(
        np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True
    )
    y = flow.mean(x, dim=1)
    z = y.sum()
    z.backward()
    np_grad = np.zeros(shape=np_arr.shape)
    np_grad[:] = 1 / x.size(1)
    test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
예제 #8
0
 def forward(self, out_labels, out_images, target_images):
     # Adversarial Loss
     adversarial_loss = flow.mean(1 - out_labels)
     # Perception Loss
     perception_loss = self.mse_loss(self.loss_network(out_images),
                                     self.loss_network(target_images))
     # Image Loss
     image_loss = self.mse_loss(out_images, target_images)
     # TV Loss
     tv_loss = self.tv_loss(out_images)
     return (image_loss + 0.001 * adversarial_loss +
             0.006 * perception_loss + 2e-8 * tv_loss)
예제 #9
0
    def gradient_penalty(self, y, x):
        """Compute gradient penalty: (L2_norm(dy/dx) - 1)**2."""
        weight = flow.ones(y.size()).to(self.device)

        dydx = flow.autograd.grad(outputs=y,
                                  inputs=x,
                                  out_grads=weight,
                                  retain_graph=True,
                                  create_graph=True)[0]

        dydx = dydx.view(dydx.size(0), -1)
        dydx_l2norm = flow.sqrt(flow.sum(dydx**2, dim=1))

        return flow.mean((dydx_l2norm - 1)**2)
예제 #10
0
 def forward(self):
     (
         labels,
         dense_fields,
         wide_sparse_fields,
         deep_sparse_fields,
     ) = self.train_dataloader()
     labels = labels.to("cuda").to(dtype=flow.float32)
     dense_fields = dense_fields.to("cuda")
     wide_sparse_fields = wide_sparse_fields.to("cuda")
     deep_sparse_fields = deep_sparse_fields.to("cuda")
     predicts = self.wdl_module(dense_fields, wide_sparse_fields, deep_sparse_fields)
     loss = self.loss(predicts, labels)
     reduce_loss = flow.mean(loss)
     return reduce_loss
예제 #11
0
    def sisnr(self, x, s, eps=1e-8):
        """
        Arguments:
        x: separated signal, N x S tensor
        s: reference signal, N x S tensor
        Return:
        sisnr: N tensor
        """
        def l2norm(mat, keepdim=False):
            return flow.linalg.norm(mat, dim=-1, keepdim=keepdim)

        if x.shape != s.shape:
            raise RuntimeError(
                "Dimention mismatch when calculate si-snr, {} vs {}".format(
                    x.shape, s.shape))
        x_zm = x - flow.mean(x, dim=-1, keepdim=True)
        s_zm = s - flow.mean(s, dim=-1, keepdim=True)
        t = (flow.sum(x_zm * s_zm, dim=-1, keepdim=True) * s_zm /
             (l2norm(s_zm, keepdim=True)**2 + eps))

        res = 20 * flow.log(eps + l2norm(t) /
                            (l2norm(x_zm - t) + eps)) / 2.3025851

        return res
예제 #12
0
    def build(self):
        (
            labels,
            dense_fields,
            wide_sparse_fields,
            deep_sparse_fields,
        ) = self.dataloader()
        labels = labels.to("cuda").to(dtype=flow.float32)
        dense_fields = dense_fields.to("cuda")
        wide_sparse_fields = wide_sparse_fields.to("cuda")
        deep_sparse_fields = deep_sparse_fields.to("cuda")

        logits = self.module(dense_fields, wide_sparse_fields, deep_sparse_fields)
        loss = self.bce_loss(logits, labels)
        reduce_loss = flow.mean(loss)
        reduce_loss.backward()
        return reduce_loss
예제 #13
0
 def forward(self, input, target):
     input_shape_len = len(input.shape)
     if input_shape_len == 4:
         b, c, h, w = input.shape[0], input.shape[1], input.shape[
             2], input.shape[3]
         input = flow.tmp.transpose(input, (0, 2, 3, 1))
         input = flow.tmp.reshape(input, shape=[-1, input.shape[3]])
         target = flow.tmp.flatten(target)
     prob, out = self._op(input,
                          target,
                          depth=input.shape[len(input.shape) - 1])
     if self.reduction == "mean":
         return flow.mean(out)
     elif self.reduction == "sum":
         return flow.sum(out)
     else:
         if input_shape_len == 4:
             out = flow.tmp.reshape(out, (b, h, w))
         return out
예제 #14
0
    def forward(self, input, target, weight=None):
        assert (input.shape == target.shape
                ), "The Input shape must be the same as Target shape"

        _cross_entropy_loss = flow.negative(target * flow.log(input) +
                                            (1 - target) * flow.log(1 - input))

        if weight is not None:
            assert (weight.shape == input.shape
                    ), "The weight shape must be the same as Input shape"
            _weighted_loss = weight * _cross_entropy_loss
        else:
            _weighted_loss = _cross_entropy_loss

        if self.reduction == "mean":
            return flow.mean(_weighted_loss)
        elif self.reduction == "sum":
            return flow.sum(_weighted_loss)
        else:
            return _weighted_loss
예제 #15
0
 def forward(self, input: Tensor) -> Tensor:
     assert (len(input.shape) >=
             3), "The dimensions of input tensor must larger than 2"
     assert (input.shape[1] == self.num_channels
             ), "The channels of input tensor must equal num_channels"
     origin_shape = input.shape
     reshape_to_1d = flow.reshape(
         input, shape=[origin_shape[0], self.num_groups, -1])
     mean = flow.mean(reshape_to_1d, dim=2, keepdim=True)
     variance = flow.var(reshape_to_1d, dim=2, unbiased=False, keepdim=True)
     normalized = (reshape_to_1d - mean) / flow.sqrt(variance + self.eps)
     normalized = flow.reshape(
         normalized, shape=[origin_shape[0], self.num_channels, -1])
     if self.weight is not None:
         normalized = normalized * self.weight.reshape(
             1, self.num_channels, 1)
     if self.bias is not None:
         normalized = normalized + self.bias.reshape(
             1, self.num_channels, 1)
     res = flow.reshape(normalized, shape=tuple(input.shape))
     return res
예제 #16
0
 def ae_step(self, data, lambda_kl):
     x = cc(data)
     mu, log_sigma, emb, dec = self.model(x)
     criterion = nn.L1Loss()
     loss_rec = criterion(dec, x)
     loss_kl = 0.5 * flow.mean(
         flow.exp(log_sigma) + flow.mul(mu, mu) - 1 - log_sigma)
     loss = self.config["lambda"][
         "lambda_rec"] * loss_rec + lambda_kl * loss_kl
     self.opt.zero_grad()
     loss.backward()
     grad_norm = flow.nn.utils.clip_grad_norm_(
         self.model.parameters(),
         max_norm=self.config["optimizer"]["grad_norm"])
     self.opt.step()
     meta = {
         "loss_rec": loss_rec.item(),
         "loss_kl": loss_kl.item(),
         "loss": loss.item(),
         "grad_norm": grad_norm,
     }
     return meta
예제 #17
0
    def forward(self, input, target):
        assert len(input.shape) == 2 or len(input.shape) == 4
        input = flow.negative(input)
        if len(input.shape) == 2:
            res = self.nllloss_1d(input, target)
        elif len(input.shape) == 4:
            b, c, h, w = input.shape[0], input.shape[1], input.shape[
                2], input.shape[3]
            input = flow.tmp.transpose(input, (0, 2, 3, 1))
            input = flow.tmp.reshape(input, shape=[-1, input.shape[3]])
            target = flow.tmp.flatten(target)
            res = self.nllloss_1d(input, target)
            res = flow.tmp.reshape(res, (b, h, w))

        else:
            raise NotImplemented

        if self.reduction == "none":
            return res
        elif self.reduction == "sum":
            return flow.sum(res)
        else:
            return flow.mean(res)
예제 #18
0
 def forward(self, logits, label):
     loss = flow._C.sparse_softmax_cross_entropy(logits, label)
     loss = flow.mean(loss)
     return loss
예제 #19
0
    def train(self):
        # Training Begins
        for epoch in range(self.start_epoch, self.num_epochs):
            start_time_epoch = time.time()

            # Constants
            cycle_loss_lambda = 10
            identity_loss_lambda = 5

            # Preparing Dataset
            n_samples = len(self.dataset_A)

            dataset = trainingDataset(datasetA=self.dataset_A,
                                      datasetB=self.dataset_B,
                                      n_frames=128)

            train_loader = flow.utils.data.DataLoader(
                dataset=dataset,
                batch_size=self.mini_batch_size,
                shuffle=True,
                drop_last=False,
            )

            pbar = tqdm(enumerate(train_loader))

            for i, (real_A, real_B) in enumerate(train_loader):

                num_iterations = (n_samples //
                                  self.mini_batch_size) * epoch + i

                if num_iterations > 10000:
                    identity_loss_lambda = 0
                if num_iterations > self.start_decay:
                    self.adjust_lr_rate(self.generator_optimizer,
                                        name="generator")
                    self.adjust_lr_rate(self.generator_optimizer,
                                        name="discriminator")

                real_A = real_A.to(self.device).float()
                real_B = real_B.to(self.device).float()

                # Generator Loss function
                fake_B = self.generator_A2B(real_A)
                cycle_A = self.generator_B2A(fake_B)

                fake_A = self.generator_B2A(real_B)
                cycle_B = self.generator_A2B(fake_A)

                identity_A = self.generator_B2A(real_A)
                identity_B = self.generator_A2B(real_B)

                d_fake_A = self.discriminator_A(fake_A)
                d_fake_B = self.discriminator_B(fake_B)

                # for the second step adverserial loss
                d_fake_cycle_A = self.discriminator_A(cycle_A)
                d_fake_cycle_B = self.discriminator_B(cycle_B)

                # Generator Cycle loss
                cycleLoss = flow.mean(flow.abs(real_A - cycle_A)) + flow.mean(
                    flow.abs(real_B - cycle_B))

                # Generator Identity Loss
                identiyLoss = flow.mean(
                    flow.abs(real_A - identity_A)) + flow.mean(
                        flow.abs(real_B - identity_B))

                # Generator Loss
                generator_loss_A2B = flow.mean((1 - d_fake_B)**2)
                generator_loss_B2A = flow.mean((1 - d_fake_A)**2)

                # Total Generator Loss
                generator_loss = (generator_loss_A2B + generator_loss_B2A +
                                  cycle_loss_lambda * cycleLoss +
                                  identity_loss_lambda * identiyLoss)
                self.generator_loss_store.append(generator_loss.item())

                # Backprop for Generator
                self.reset_grad()
                generator_loss.backward()

                self.generator_optimizer.step()

                # Discriminator Feed Forward
                d_real_A = self.discriminator_A(real_A)
                d_real_B = self.discriminator_B(real_B)

                generated_A = self.generator_B2A(real_B)
                d_fake_A = self.discriminator_A(generated_A)

                # for the second step adverserial loss
                cycled_B = self.generator_A2B(generated_A)
                d_cycled_B = self.discriminator_B(cycled_B)

                generated_B = self.generator_A2B(real_A)
                d_fake_B = self.discriminator_B(generated_B)

                # for the second step adverserial loss
                cycled_A = self.generator_B2A(generated_B)
                d_cycled_A = self.discriminator_A(cycled_A)

                # Loss Functions
                d_loss_A_real = flow.mean((1 - d_real_A)**2)
                d_loss_A_fake = flow.mean((0 - d_fake_A)**2)
                d_loss_A = (d_loss_A_real + d_loss_A_fake) / 2.0

                d_loss_B_real = flow.mean((1 - d_real_B)**2)
                d_loss_B_fake = flow.mean((0 - d_fake_B)**2)
                d_loss_B = (d_loss_B_real + d_loss_B_fake) / 2.0

                # the second step adverserial loss
                d_loss_A_cycled = flow.mean((0 - d_cycled_A)**2)
                d_loss_B_cycled = flow.mean((0 - d_cycled_B)**2)
                d_loss_A_2nd = (d_loss_A_real + d_loss_A_cycled) / 2.0
                d_loss_B_2nd = (d_loss_B_real + d_loss_B_cycled) / 2.0

                # Final Loss for discriminator with the second step adverserial loss
                d_loss = (d_loss_A + d_loss_B) / 2.0 + (d_loss_A_2nd +
                                                        d_loss_B_2nd) / 2.0
                self.discriminator_loss_store.append(d_loss.item())

                # Backprop for Discriminator
                self.reset_grad()
                d_loss.backward()

                self.discriminator_optimizer.step()

                if (i + 1) % 2 == 0:
                    pbar.set_description(
                        "Iter:{} Generator Loss:{:.4f} Discrimator Loss:{:.4f} GA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}"
                        .format(
                            num_iterations,
                            generator_loss.item(),
                            d_loss.item(),
                            generator_loss_A2B,
                            generator_loss_B2A,
                            identiyLoss,
                            cycleLoss,
                            d_loss_A,
                            d_loss_B,
                        ))

            if epoch % 2000 == 0 and epoch != 0:
                end_time = time.time()
                store_to_file = "Epoch: {} Generator Loss: {:.4f} Discriminator Loss: {}, Time: {:.2f}\n\n".format(
                    epoch,
                    generator_loss.item(),
                    d_loss.item(),
                    end_time - start_time_epoch,
                )
                self.store_to_file(store_to_file)
                print(
                    "Epoch: {} Generator Loss: {:.4f} Discriminator Loss: {}, Time: {:.2f}\n\n"
                    .format(
                        epoch,
                        generator_loss.item(),
                        d_loss.item(),
                        end_time - start_time_epoch,
                    ))

                # Save the Entire model
                print("Saving model Checkpoint  ......")
                store_to_file = "Saving model Checkpoint  ......"
                self.store_to_file(store_to_file)
                self.saveModelCheckPoint(epoch, self.modelCheckpoint)
                print("Model Saved!")

            if epoch % 2000 == 0 and epoch != 0:
                # Validation Set
                validation_start_time = time.time()
                self.validation_for_A_dir()
                self.validation_for_B_dir()
                validation_end_time = time.time()
                store_to_file = "Time taken for validation Set: {}".format(
                    validation_end_time - validation_start_time)
                self.store_to_file(store_to_file)
                print("Time taken for validation Set: {}".format(
                    validation_end_time - validation_start_time))
예제 #20
0
def to_numpy(x, mean=True):
    if mean:
        x = flow.mean(x)

    return x.numpy()
예제 #21
0
            d_loss.backward()
            optimizerD.step()
            optimizerD.zero_grad()

            ############################
            # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss
            ###########################

            fake_img_0 = netG(z)
            fake_out_0 = netD(fake_img_0)
            g_loss = generator_criterion(fake_out_0, fake_img_0, real_img)
            g_loss.backward()
            optimizerG.step()
            optimizerG.zero_grad()

            fake_out = flow.mean(fake_out)
            real_out = flow.mean(fake_out)
            # loss for current batch before optimization
            running_results["g_loss"] += g_loss.numpy() * batch_size
            running_results["d_loss"] += d_loss.numpy() * batch_size
            running_results["d_score"] += real_out.numpy() * batch_size
            running_results["g_score"] += fake_out.numpy() * batch_size

            train_bar.set_description(
                desc=
                "[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f" %
                (
                    epoch,
                    NUM_EPOCHS,
                    running_results["d_loss"] / running_results["batch_sizes"],
                    running_results["g_loss"] / running_results["batch_sizes"],
예제 #22
0
    def train_one_epoch(self, epoch, train_loader):

        self.model.train()
        batch_steps = len(train_loader)

        step_loss = AverageMeter()
        auxiliary_loss = AuxiliaryLossAverageMeter()
        span = 0
        for step, (_, inputs, targets) in enumerate(train_loader):

            if self.ngpu > 0:
                inputs = map_to_cuda(inputs)
                targets = map_to_cuda(targets)

            start = time.time()
            loss, aux_loss = self.model(inputs, targets)

            loss = flow.mean(loss) / self.accum_steps
            loss.backward()
            end = time.time()
            span += end - start

            if self.get_rank() == 0:
                step_loss.update(loss.item() * self.accum_steps,
                                 inputs["inputs"].size(0))
                auxiliary_loss.update(aux_loss, self.accum_steps,
                                      inputs["inputs"].size(0))

            if self.global_training_step % self.accum_steps == 0:
                if self.local_rank == 0:
                    self.mean_loss.update(step_loss.avg)

                grad_norm = flow.nn.utils.clip_grad_norm_(
                    self.model.parameters(),
                    self.grad_clip,
                    error_if_nonfinite=False)

                if self.grad_noise > 0.0:
                    for p in self.model.parameters():
                        if p.requires_grad:
                            noise = flow.tensor(
                                np.random.normal(
                                    0,
                                    self.grad_noise,
                                    p.grad.shape,
                                ),
                                device=loss.device,
                            )
                            p.grad += noise / self.accum_steps

                if math.isnan(grad_norm.numpy()):
                    logging.warning("Grad norm is NAN. DO NOT UPDATE MODEL!")
                else:
                    self.scheduler.step()
                    self.optimizer.step()
                self.optimizer.zero_grad()

                if (self.scheduler.global_step % self.log_interval == 0
                        and self.local_rank == 0):
                    process = (step + 1) / batch_steps * 100
                    print_info = (
                        "-Training-Epoch-%d(%.5f%%), Global Step:%d, lr:%.8f, Loss:%.5f, AvgLoss: %.5f, Run Time:%.3f"
                        % (
                            epoch,
                            process,
                            self.scheduler.global_step,
                            self.scheduler.lr,
                            step_loss.avg,
                            self.mean_loss.mean(),
                            span,
                        ))
                    print_info += auxiliary_loss.avg_infos
                    logger.info(print_info)

                    span = 0

                step_loss.reset()
                auxiliary_loss.reset()

            self.global_training_step += 1

            if self.is_debug and step > 30:
                break

        return self.mean_loss.mean()
예제 #23
0
def _mean(self, dim=[], keepdim=False):
    return flow.mean(self, dim, keepdim)
예제 #24
0
    def train(self):
        """Implements the training loop for MaskCycleGAN-VC
        """
        for epoch in range(self.start_epoch, self.num_epochs + 1):

            for i, (real_A, mask_A, real_B,
                    mask_B) in enumerate(self.train_dataloader):
                num_iterations = (self.n_samples //
                                  self.mini_batch_size) * epoch + i
                if num_iterations > 10000:
                    self.identity_loss_lambda = 0
                if num_iterations > self.decay_after:
                    self.adjust_lr_rate(self.generator_optimizer,
                                        generator=True)
                    self.adjust_lr_rate(self.generator_optimizer,
                                        generator=False)

                real_A = real_A.to(self.device, dtype=flow.float)
                mask_A = mask_A.to(self.device, dtype=flow.float)
                real_B = real_B.to(self.device, dtype=flow.float)
                mask_B = mask_B.to(self.device, dtype=flow.float)

                # Train Generator
                self.generator_A2B.train()
                self.generator_B2A.train()
                self.discriminator_A.eval()
                self.discriminator_B.eval()
                self.discriminator_A2.eval()
                self.discriminator_B2.eval()

                # Generator Feed Forward
                fake_B = self.generator_A2B(real_A, mask_A)
                cycle_A = self.generator_B2A(fake_B, flow.ones_like(fake_B))
                fake_A = self.generator_B2A(real_B, mask_B)
                cycle_B = self.generator_A2B(fake_A, flow.ones_like(fake_A))
                identity_A = self.generator_B2A(real_A, flow.ones_like(real_A))
                identity_B = self.generator_A2B(real_B, flow.ones_like(real_B))
                d_fake_A = self.discriminator_A(fake_A)
                d_fake_B = self.discriminator_B(fake_B)

                # For Two Step Adverserial Loss
                d_fake_cycle_A = self.discriminator_A2(cycle_A)
                d_fake_cycle_B = self.discriminator_B2(cycle_B)

                # Generator Cycle Loss
                cycleLoss = flow.mean(flow.abs(real_A - cycle_A)) + flow.mean(
                    flow.abs(real_B - cycle_B))

                # Generator Identity Loss
                identityLoss = flow.mean(
                    flow.abs(real_A - identity_A)) + flow.mean(
                        flow.abs(real_B - identity_B))

                # Generator Loss
                g_loss_A2B = flow.mean((1 - d_fake_B)**2)
                g_loss_B2A = flow.mean((1 - d_fake_A)**2)

                # Generator Two Step Adverserial Loss
                generator_loss_A2B_2nd = flow.mean((1 - d_fake_cycle_B)**2)
                generator_loss_B2A_2nd = flow.mean((1 - d_fake_cycle_A)**2)

                # Total Generator Loss
                g_loss = (g_loss_A2B + g_loss_B2A + generator_loss_A2B_2nd +
                          generator_loss_B2A_2nd +
                          self.cycle_loss_lambda * cycleLoss +
                          self.identity_loss_lambda * identityLoss)

                # Backprop for Generator
                self.reset_grad()
                g_loss.backward()
                self.generator_optimizer.step()

                # Train Discriminator
                self.generator_A2B.eval()
                self.generator_B2A.eval()
                self.discriminator_A.train()
                self.discriminator_B.train()
                self.discriminator_A2.train()
                self.discriminator_B2.train()

                # Discriminator Feed Forward
                d_real_A = self.discriminator_A(real_A)
                d_real_B = self.discriminator_B(real_B)
                d_real_A2 = self.discriminator_A2(real_A)
                d_real_B2 = self.discriminator_B2(real_B)
                generated_A = self.generator_B2A(real_B, mask_B)
                d_fake_A = self.discriminator_A(generated_A)

                # For Two Step Adverserial Loss A->B
                cycled_B = self.generator_A2B(generated_A,
                                              flow.ones_like(generated_A))
                d_cycled_B = self.discriminator_B2(cycled_B)

                generated_B = self.generator_A2B(real_A, mask_A)
                d_fake_B = self.discriminator_B(generated_B)

                # For Two Step Adverserial Loss B->A
                cycled_A = self.generator_B2A(generated_B,
                                              flow.ones_like(generated_B))
                d_cycled_A = self.discriminator_A2(cycled_A)

                # Loss Functions
                d_loss_A_real = flow.mean((1 - d_real_A)**2)
                d_loss_A_fake = flow.mean((0 - d_fake_A)**2)
                d_loss_A = (d_loss_A_real + d_loss_A_fake) / 2.0

                d_loss_B_real = flow.mean((1 - d_real_B)**2)
                d_loss_B_fake = flow.mean((0 - d_fake_B)**2)
                d_loss_B = (d_loss_B_real + d_loss_B_fake) / 2.0

                # Two Step Adverserial Loss
                d_loss_A_cycled = flow.mean((0 - d_cycled_A)**2)
                d_loss_B_cycled = flow.mean((0 - d_cycled_B)**2)
                d_loss_A2_real = flow.mean((1 - d_real_A2)**2)
                d_loss_B2_real = flow.mean((1 - d_real_B2)**2)
                d_loss_A_2nd = (d_loss_A2_real + d_loss_A_cycled) / 2.0
                d_loss_B_2nd = (d_loss_B2_real + d_loss_B_cycled) / 2.0

                # Final Loss for discriminator with the Two Step Adverserial Loss
                d_loss = (d_loss_A + d_loss_B) / 2.0 + (d_loss_A_2nd +
                                                        d_loss_B_2nd) / 2.0

                # Backprop for Discriminator
                self.reset_grad()
                d_loss.backward()
                self.discriminator_optimizer.step()

                if (i + 1) % 2 == 0:
                    print(
                        "Iter:{} Generator Loss:{:.4f} Discrimator Loss:{:.4f} GA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}"
                        .format(
                            num_iterations,
                            g_loss.item(),
                            d_loss.item(),
                            g_loss_A2B,
                            g_loss_B2A,
                            identityLoss,
                            cycleLoss,
                            d_loss_A,
                            d_loss_B,
                        ))

            # Save each model checkpoint and validation
            if epoch % self.epochs_per_save == 0 and epoch != 0:
                self.saveModelCheckPoint(epoch, PATH="model_checkpoint")
                self.validation_for_A_dir()
                self.validation_for_B_dir()