Exemple #1
0
def lsgan_loss(real, weight, fake=None):
    if fake:
        loss = weight * F.mean(F.squared_error(F.constant(1, real.shape), real)
                               + F.pow_scalar(fake, 2))
    else:
        loss = weight * \
            F.mean(F.squared_error(F.constant(1, real.shape), real))
    return loss
Exemple #2
0
def create_network(batchsize, imheight, imwidth, args, seen):
    import gc
    gc.collect()
    nnabla_ext.cuda.clear_memory_cache()

    anchors = args.num_anchors
    classes = args.num_classes
    yolo_x = nn.Variable((batchsize, 3, imheight, imwidth))
    target = nn.Variable((batchsize, 50 * 5))
    yolo_features = yolov2.yolov2(yolo_x, anchors, classes, test=False)

    nB = yolo_features.shape[0]
    nA = args.num_anchors
    nC = args.num_classes
    nH = yolo_features.shape[2]
    nW = yolo_features.shape[3]

    # Bouding box regression loss
    # pred.shape = [nB, nA, 4, nH, nW]
    output = F.reshape(yolo_features, (nB, nA, (5 + nC), nH, nW))
    xy = F.sigmoid(output[:, :, :2, ...])
    wh = output[:, :, 2:4, ...]
    bbox_pred = F.concatenate(xy, wh, axis=2)
    conf_pred = F.sigmoid(output[:, :, 4:5, ...])
    cls_pred = output[:, :, 5:, ...]

    region_loss_targets = RegionLossTargets(nC, args.anchors, seen,
                                            args.coord_scale,
                                            args.noobject_scale,
                                            args.object_scale,
                                            args.class_scale, args.thresh)

    tcoord, mcoord, tconf, mconf, tcls, mcls = region_loss_targets(
        bbox_pred, target)
    for v in tcoord, mcoord, tconf, mconf, tcls, mcls:
        v.need_grad = False

    # Bounding box regression
    bbox_loss = F.sum(F.squared_error(bbox_pred, tcoord) * mcoord)

    # Conf (IoU) regression loss
    conf_loss = F.sum(F.squared_error(conf_pred, tconf) * mconf)

    # Class probability regression loss
    cls_loss = F.sum(F.softmax_cross_entropy(cls_pred, tcls, axis=2) * mcls)

    # Note:
    # loss is devided by 2.0 due to the fact that the original darknet
    # code doesn't multiply the derivative of square functions by 2.0
    # in region_layer.c.
    loss = (bbox_loss + conf_loss) / 2.0 + cls_loss

    return yolo_x, target, loss, region_loss_targets
def ls_gan_loss(r_out, f_out):
    # todo: set constant arbitrary
    # D
    d_gan_real = F.mean(F.squared_error(r_out,
                                        F.constant(1., shape=r_out.shape)))
    d_gan_fake = F.mean(F.squared_error(f_out,
                                        F.constant(0., shape=f_out.shape)))

    # G
    g_gan = F.mean(F.squared_error(f_out,
                                   F.constant(1., shape=f_out.shape)))

    return d_gan_real, d_gan_fake, g_gan
Exemple #4
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        h = F.mean(h, axis=1)
        r = F.mean(F.squared_error(h, one))
    return r
Exemple #5
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        b = log_var.shape[0]
        r = F.sum(F.squared_error(h, one)) / b
    return r
Exemple #6
0
def test_simple_loop():
    nn.clear_parameters()

    x = nn.Variable.from_numpy_array(np.random.randn(10, 3, 128, 128))
    t = nn.Variable.from_numpy_array(np.random.randint(0, 100, (10, )))

    unet = UNet(num_classes=1,
                model_channels=128,
                output_channels=3,
                num_res_blocks=2,
                attention_resolutions=(16, 8),
                attention_num_heads=4,
                channel_mult=(1, 1, 2, 2, 4, 4))
    y = unet(x, t)

    loss = F.mean(F.squared_error(y, x))

    import nnabla.solvers as S
    solver = S.Sgd()
    solver.set_parameters(nn.get_parameters())

    from tqdm import trange
    tr = trange(100)
    for i in tr:
        loss.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss.backward(clear_buffer=True)
        solver.update()

        tr.set_description(f"diff: {loss.d.copy():.5f}")
Exemple #7
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var):
    #TODO: squared error/absolute error
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.squared_error(
            F.softmax(pred0), F.softmax(pred1)) * F.exp(-log_var)) \
                  + F.mean(log_var)
    return loss_sr
Exemple #8
0
def capsule_loss(v_norm, t_onehot, recon=None, x=None, m_pos=0.9, m_neg=0.1, wn=0.5, wr=0.0005):
    '''
    Compute a margin loss given a length vector of  output capsules and one-hot labels, and.optionally comptues a reconstruction loss.

    Margin loss is given in eq 4. Reconstruction loss is given in Sec 4.1.

    Args:
        v_norm (nnabla.Variable): A length vector of capsules. A shape of [B, capsules].
        t_onehot (nnabla.Variable): A shape of [B, capsules].
        recon (nnabla.Variable): Reconstruction output with a shape of [B, 1, 28, 28]. The values are in [0, 0.1].
        x (nnabla.Variable): Reconstruction target (i.e. input) with a shape of [B, 1, 28, 28]. The values are in [0, 0.1].
        m_pos (float): Margin of capsules corresponding targets.
        m_neg (float): Margin of capsules corresponding non-targets.
        wn (float): Weight of the non-target margin loss.
        wr (float): Weight of the reconstruction loss.

    Returns:
        nnabla.Variable: 0-dim

    '''
    # Classification loss
    lp = F.sum(t_onehot * F.relu(m_pos - v_norm) ** 2)
    ln = F.sum((1 - t_onehot) * F.relu(v_norm - m_neg) ** 2)
    lmargin = lp + wn * ln
    if recon is None or x is None:
        return lmargin / v_norm.shape[0]
    # Reconstruction loss
    lr = F.sum(F.squared_error(recon, x))
    # return (lmargin + wr * lr) / v_norm.shape[0]
    lmargin = lmargin / v_norm.shape[0]
    lmargin.persistent = True
    lreconst = (wr * lr) / v_norm.shape[0]
    lreconst.persistent = True
    return lmargin, lreconst, lmargin + lreconst
Exemple #9
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        b = log_var.shape[0]
        r = F.sum(F.squared_error(h, one)) / b
    return r
Exemple #10
0
def siamese_loss(e0, e1, t, margin=1.0, eps=1e-4):
    dist = F.sum(F.squared_error(e0, e1), axis=1)  # Squared distance
    # Contrastive loss
    sim_cost = t * dist
    dissim_cost = (1 - t) * (F.maximum_scalar(margin -
                                              (dist + eps)**(0.5), 0)**2)
    return F.mean(sim_cost + dissim_cost)
Exemple #11
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        h = F.mean(h, axis=1)
        r = F.mean(F.squared_error(h, one))
    return r
def create_network(batchsize, imheight, imwidth, args):
    import gc
    gc.collect()
    nnabla_ext.cuda.clear_memory_cache()

    anchors = args.num_anchors
    classes = args.num_classes
    yolo_x = nn.Variable((batchsize, 3, imheight, imwidth))
    yolo_features = yolov2.yolov2(yolo_x, anchors, classes, test=False)

    nB = yolo_features.shape[0]
    nA = args.num_anchors
    nC = args.num_classes
    nH = yolo_features.shape[2]
    nW = yolo_features.shape[3]

    output = yolo_features.get_unlinked_variable(need_grad=True)
    # TODO: Workaround until v1.0.2.
    # Explicitly enable grad since need_grad option above didn't work.
    output.need_grad = True

    output = F.reshape(output, (nB, nA, (5 + nC), nH, nW))
    output_splitted = F.split(output, 2)
    x, y, w, h, conf = [v.reshape((nB, nA, nH, nW))
                        for v in output_splitted[0:5]]
    x, y, conf = map(F.sigmoid, [x, y, conf])

    cls = F.stack(*output_splitted[5:], axis=2)
    cls = cls.reshape((nB*nA, nC, nH*nW))
    cls = F.transpose(cls, [0, 2, 1]).reshape((nB*nA*nH*nW, nC))

    tx, ty, tw, th, tconf, coord_mask, conf_mask_sq = [
        nn.Variable(v.shape) for v in [x, y, w, h, conf, x, conf]]
    cls_ones, cls_mask = [nn.Variable(cls.shape) for _ in range(2)]
    tcls, cls_mask_bb = [nn.Variable((cls.shape[0], 1)) for _ in range(2)]

    coord_mask_sq = F.pow_scalar(coord_mask, 2)
    loss_x = args.coord_scale * F.sum(F.squared_error(x, tx) * coord_mask_sq)
    loss_y = args.coord_scale * F.sum(F.squared_error(y, ty) * coord_mask_sq)
    loss_w = args.coord_scale * F.sum(F.squared_error(w, tw) * coord_mask_sq)
    loss_h = args.coord_scale * F.sum(F.squared_error(h, th) * coord_mask_sq)
    loss_conf = F.sum(F.squared_error(conf, tconf) * conf_mask_sq)
    loss_cls = args.class_scale * \
        F.sum(cls_mask_bb * F.softmax_cross_entropy(cls + cls_ones - cls_mask, tcls))
    loss_nnabla = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

    return yolo_x, yolo_features, (x, y, w, h, conf, cls), (tx, ty, tw, th, tconf, coord_mask, conf_mask_sq, cls_ones, cls_mask, tcls, cls_mask_bb), loss_nnabla
Exemple #13
0
    def _build(self):
        # inference
        self.infer_obs_t = nn.Variable((1,) + self.obs_shape)

        with nn.parameter_scope('trainable'):
            self.infer_policy_t = policy_network(self.infer_obs_t,
                                                 self.action_size, 'actor')

        # training
        self.obss_t = nn.Variable((self.batch_size,) + self.obs_shape)
        self.acts_t = nn.Variable((self.batch_size, self.action_size))
        self.rews_tp1 = nn.Variable((self.batch_size, 1))
        self.obss_tp1 = nn.Variable((self.batch_size,) + self.obs_shape)
        self.ters_tp1 = nn.Variable((self.batch_size, 1))

        # critic training
        with nn.parameter_scope('trainable'):
            q_t = q_network(self.obss_t, self.acts_t, 'critic')
        with nn.parameter_scope('target'):
            policy_tp1 = policy_network(self.obss_tp1, self.action_size,
                                        'actor')
            q_tp1 = q_network(self.obss_tp1, policy_tp1, 'critic')
        y = self.rews_tp1 + self.gamma * q_tp1 * (1.0 - self.ters_tp1)
        self.critic_loss = F.mean(F.squared_error(q_t, y))

        # actor training
        with nn.parameter_scope('trainable'):
            policy_t = policy_network(self.obss_t, self.action_size, 'actor')
            q_t_with_actor = q_network(self.obss_t, policy_t, 'critic')
        self.actor_loss = -F.mean(q_t_with_actor)

        # get neural network parameters
        with nn.parameter_scope('trainable'):
            with nn.parameter_scope('critic'):
                critic_params = nn.get_parameters()
            with nn.parameter_scope('actor'):
                actor_params = nn.get_parameters()

        # setup optimizers
        self.critic_solver = S.Adam(self.critic_lr)
        self.critic_solver.set_parameters(critic_params)
        self.actor_solver = S.Adam(self.actor_lr)
        self.actor_solver.set_parameters(actor_params)

        with nn.parameter_scope('trainable'):
            trainable_params = nn.get_parameters()
        with nn.parameter_scope('target'):
            target_params = nn.get_parameters()

        # build target update
        update_targets = []
        sync_targets = []
        for key, src in trainable_params.items():
            dst = target_params[key]
            updated_dst = (1.0 - self.tau) * dst + self.tau * src
            update_targets.append(F.assign(dst, updated_dst))
            sync_targets.append(F.assign(dst, src))
        self.update_target_expr = F.sink(*update_targets)
        self.sync_target_expr = F.sink(*sync_targets)
Exemple #14
0
def mnist_lenet_siamese(x0, x1, test=False):
    """"""
    h0 = mnist_lenet_feature(x0, test)
    h1 = mnist_lenet_feature(x1, test)  # share weights
    # h = (h0 - h1) ** 2 # equivalent
    h = F.squared_error(h0, h1)
    p = F.sum(h, axis=1)
    return p
Exemple #15
0
def mnist_lenet_siamese(x0, x1, test=False):
    """"""
    h0 = mnist_lenet_feature(x0, test)
    h1 = mnist_lenet_feature(x1, test)  # share weights
    # h = (h0 - h1) ** 2 # equivalent
    h = F.squared_error(h0, h1)
    p = F.sum(h, axis=1)
    return p
Exemple #16
0
def feature_matching_loss(x, y, num=4):
    """
    Calculate feature matching loss
    """
    fm_loss = 0.0
    for i in range(num):
        fm_loss += F.mean(F.squared_error(x[i], y[i]))
    return fm_loss
Exemple #17
0
def sigmas_regularization(ctx, log_var0, log_var1):
    with nn.context_scope(ctx):
        h0 = F.exp(log_var0)
        h0 = F.pow_scalar(h0, 0.5)
        h1 = F.exp(log_var1)
        h1 = F.pow_scalar(h1, 0.5)
        r = F.mean(F.squared_error(h0, h1))
    return r
Exemple #18
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) + (s0 / s1 + s1 / s0)) * 0.5
    return loss_sr
Exemple #19
0
def sigmas_regularization(ctx, log_var0, log_var1):
    with nn.context_scope(ctx):
        h0 = F.exp(log_var0)
        h0 = F.pow_scalar(h0, 0.5)
        h1 = F.exp(log_var1)
        h1 = F.pow_scalar(h1, 0.5)
        r = F.mean(F.squared_error(h0, h1))
    return r
Exemple #20
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) + (s0 / s1 + s1 / s0)) * 0.5
    return loss_sr
Exemple #21
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    var0 = F.exp(log_var0)
    var1 = F.exp(log_var1)
    s0 = F.pow_scalar(var0, 0.5)
    s1 = F.pow_scalar(var0, 0.5)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss = F.log(s1/s0) + (var0/var1 + squared_error/var1) * 0.5
        loss_sr = F.mean(loss)
    return loss_sr
Exemple #22
0
    def __call__(self, x, return_encoding_indices=False):

        x = F.transpose(x, (0, 2, 3, 1))
        x_flat = x.reshape((-1, self.embedding_dim))

        x_flat_squared = F.broadcast(F.sum(x_flat**2, axis=1, keepdims=True),
                                     (x_flat.shape[0], self.num_embedding))
        emb_wt_squared = F.transpose(
            F.sum(self.embedding_weight**2, axis=1, keepdims=True), (1, 0))

        distances = x_flat_squared + emb_wt_squared - 2 * \
            F.affine(x_flat, F.transpose(self.embedding_weight, (1, 0)))

        encoding_indices = F.min(distances,
                                 only_index=True,
                                 axis=1,
                                 keepdims=True)
        encoding_indices.need_grad = False

        quantized = F.embed(
            encoding_indices.reshape(encoding_indices.shape[:-1]),
            self.embedding_weight).reshape(x.shape)

        if return_encoding_indices:
            return encoding_indices, F.transpose(quantized, (0, 3, 1, 2))

        encodings = F.one_hot(encoding_indices, (self.num_embedding, ))

        e_latent_loss = F.mean(
            F.squared_error(quantized.get_unlinked_variable(need_grad=False),
                            x))
        q_latent_loss = F.mean(
            F.squared_error(quantized,
                            x.get_unlinked_variable(need_grad=False)))
        loss = q_latent_loss + self.commitment_cost * e_latent_loss

        quantized = x + (quantized - x).get_unlinked_variable(need_grad=False)

        avg_probs = F.mean(encodings, axis=0)
        perplexity = F.exp(-F.sum(avg_probs * F.log(avg_probs + 1.0e-10)))

        return loss, F.transpose(quantized,
                                 (0, 3, 1, 2)), perplexity, encodings
Exemple #23
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, 
                             log_s0, log_s1):
    v0 = F.exp(log_v0)
    v1 = F.exp(log_v1)
    squared_error = F.squared_error(pred0, pred1)
    s0 = F.exp(log_s0)
    s1 = F.exp(log_s1)
    with nn.context_scope(ctx):
        error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + (s0 / s1 + s1 / s0)
        loss_sr = F.mean(error) * 0.5
    return loss_sr
Exemple #24
0
    def preservation_loss(self, x, target):
        r"""Returns content preservation loss.

            Args:
                x (nn.Variable): Input variable.
                target (nn.Variable): Target variable.

            Returns:
                nn.Variable: Output loss.
            """
        loss = F.mean(F.squared_error(x, target))
        return loss
Exemple #25
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, log_s0,
                             log_s1):
    v0 = F.exp(log_v0)
    v1 = F.exp(log_v1)
    squared_error = F.squared_error(pred0, pred1)
    s0 = F.exp(log_s0)
    s1 = F.exp(log_s1)
    with nn.context_scope(ctx):
        error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + (
            s0 / s1 + s1 / s0)
        loss_sr = F.mean(error) * 0.5
    return loss_sr
def mse(x, y, mask=None, eps=1e-5):
    # l2 distance and reduce mean
    se = F.squared_error(x, y)

    if mask is not None:
        assert se.shape[:2] == mask.shape[:2]

        se *= F.reshape(mask, se.shape)

        return F.sum(se) / (F.sum(mask) + eps)

    return F.mean(se)
Exemple #27
0
    def train(self):
        # variables for training
        tx_in = nn.Variable(
            [self._batch_size, self._x_input_length, self._cols_size])
        tx_out = nn.Variable(
            [self._batch_size, self._x_output_length, self._cols_size])
        tpred = self.network(tx_in, self._lstm_unit_name, self._lstm_units)
        tpred.persistent = True
        loss = F.mean(F.squared_error(tpred, tx_out))
        solver = S.Adam(self._learning_rate)
        solver.set_parameters(nn.get_parameters())

        # variables for validation
        vx_in = nn.Variable(
            [self._batch_size, self._x_input_length, self._cols_size])
        vx_out = nn.Variable(
            [self._batch_size, self._x_output_length, self._cols_size])
        vpred = self.network(vx_in, self._lstm_unit_name, self._lstm_units)

        # data iterators
        tdata = self._load_dataset(self._training_dataset_path,
                                   self._batch_size,
                                   shuffle=True)
        vdata = self._load_dataset(self._validation_dataset_path,
                                   self._batch_size,
                                   shuffle=True)

        # monitors
        from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
        monitor = Monitor(self._monitor_path)
        monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
        monitor_err = MonitorSeries("Training error", monitor, interval=10)
        monitor_time = MonitorTimeElapsed("Training time",
                                          monitor,
                                          interval=100)
        monitor_verr = MonitorSeries("Validation error", monitor, interval=10)

        # Training loop
        for i in range(self._max_iter):
            if i % self._val_interval == 0:
                ve = self._validate(vpred, vx_in, vx_out, vdata,
                                    self._val_iter)
                monitor_verr.add(i, ve / self._val_iter)
            te = self._train(tpred, solver, loss, tx_in, tx_out, tdata.next(),
                             self._weight_decay)
            monitor_loss.add(i, loss.d.copy())
            monitor_err.add(i, te)
            monitor_time.add(i)
        ve = self._validate(vpred, vx_in, vx_out, vdata, self._val_iter)
        monitor_verr.add(i, ve / self._val_iter)

        # Save a best model parameters
        nn.save_parameters(self._model_params_path)
Exemple #28
0
    def spectral_loss(self, x, target):
        r"""Returns the multi-scale spectral loss.

        Args:
            x (nn.Variable): Input variable.
            target (nn.Variable): Target variable.

        Returns:
            nn.Variable: Multi-scale spectral loss.
        """
        loss = []
        for window_size in self.hp.window_sizes:
            sx = log_mel_spectrogram(x, self.hp.sr, window_size)
            st = log_mel_spectrogram(target, self.hp.sr, window_size)
            st.need_grad = False  # avoid grads flowing though targets
            loss.append(F.mean(F.squared_error(sx, st)))
        return sum(loss)
def get_warp_loss(conf, rnn_length, frame_t, frame_t_pre, flow_lr):
    """
    Warp loss
    """
    input_frames = F.reshape(frame_t,
                             (conf.train.batch_size * (rnn_length - 1),
                              conf.train.crop_size, conf.train.crop_size, 3))
    frame_t_pre_reshaped = F.reshape(
        frame_t_pre,
        (conf.train.batch_size *
         (rnn_length - 1), conf.train.crop_size, conf.train.crop_size, 3))
    s_input_warp = warp_by_flow(frame_t_pre_reshaped, flow_lr)

    warp_loss = F.mean(
        F.sum(F.squared_error(input_frames, s_input_warp), axis=[3]))

    return warp_loss
Exemple #30
0
def sr_loss_with_uncertainty_and_coef(ctx, pred0, pred1, log_var0, log_var1):
    c0 = srwu_learned_coef(ctx, log_var0)
    c1 = srwu_learned_coef(ctx, log_var1)
    sc0 = sigmas_learned_coef(ctx, log_var0, log_var1)
    sc1 = sigmas_learned_coef(ctx, log_var1, log_var0)
    c0.need_grad = False
    c1.need_grad = False
    sc0.need_grad = False
    sc1.need_grad = False

    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss_sr = F.mean(
            squared_error * (c0 / s0 + c1 / s1) + (sc0 * s0 / s1 + sc1 * s1 / s0)) * 0.5
    return loss_sr
Exemple #31
0
    def _build(self):
        generator_fn, discriminator_fn = self._network_funcs()

        # real shape
        ch, w, h = self.real.shape[1:]

        # inputs
        self.x = nn.Variable((1, ch, w, h))
        self.y = nn.Variable((1, ch, w, h))
        self.rec_x = nn.Variable((1, ch, w, h))
        self.rec_y = nn.Variable((1, ch, w, h))
        y_real = nn.Variable.from_numpy_array(self.real)
        y_real.persistent = True

        # padding inputs
        padded_x = _pad(self.x, self.kernel, self.num_layer)
        padded_rec_x = _pad(self.rec_x, self.kernel, self.num_layer)

        # generate fake image
        self.fake = generator_fn(x=padded_x, y=self.y)
        fake_without_grads = F.identity(self.fake)
        fake_without_grads.need_grad = False
        rec = generator_fn(x=padded_rec_x, y=self.rec_y)

        # discriminate images
        p_real = discriminator_fn(x=y_real)
        p_fake = discriminator_fn(x=self.fake)
        p_fake_without_grads = discriminator_fn(x=fake_without_grads)

        # gradient penalty for discriminator
        grad_penalty = _calc_gradient_penalty(y_real, fake_without_grads,
                                              discriminator_fn)

        # discriminator loss
        self.d_real_error = -F.mean(p_real)
        self.d_fake_error = F.mean(p_fake_without_grads)
        self.d_error = self.d_real_error + self.d_fake_error \
                                         + self.lam_grad * grad_penalty

        # generator loss
        self.rec_error = F.mean(F.squared_error(rec, y_real))
        self.g_fake_error = -F.mean(p_fake)
        self.g_error = self.g_fake_error + self.alpha_recon * self.rec_error
Exemple #32
0
    def __init__(self, num_actions, num_envs, batch_size, v_coeff, ent_coeff,
                 lr_scheduler):
        # inference graph
        self.infer_obs_t = nn.Variable((num_envs, 4, 84, 84))
        self.infer_pi_t,\
        self.infer_value_t = cnn_network(self.infer_obs_t, num_actions,
                                         'network')
        self.infer_t = F.sink(self.infer_pi_t, self.infer_value_t)

        # evaluation graph
        self.eval_obs_t = nn.Variable((1, 4, 84, 84))
        self.eval_pi_t, _ = cnn_network(self.eval_obs_t, num_actions,
                                        'network')

        # training graph
        self.obss_t = nn.Variable((batch_size, 4, 84, 84))
        self.acts_t = nn.Variable((batch_size, 1))
        self.rets_t = nn.Variable((batch_size, 1))
        self.advs_t = nn.Variable((batch_size, 1))

        pi_t, value_t = cnn_network(self.obss_t, num_actions, 'network')

        # value loss
        l2loss = F.squared_error(value_t, self.rets_t)
        self.value_loss = v_coeff * F.mean(l2loss)

        # policy loss
        log_pi_t = F.log(pi_t + 1e-20)
        a_one_hot = F.one_hot(self.acts_t, (num_actions, ))
        log_probs_t = F.sum(log_pi_t * a_one_hot, axis=1, keepdims=True)
        self.pi_loss = F.mean(log_probs_t * self.advs_t)

        # KL loss
        entropy = -ent_coeff * F.mean(F.sum(pi_t * log_pi_t, axis=1))

        self.loss = self.value_loss - self.pi_loss - entropy

        self.params = nn.get_parameters()
        self.solver = S.RMSprop(lr_scheduler(0.0), 0.99, 1e-5)
        self.solver.set_parameters(self.params)
        self.lr_scheduler = lr_scheduler
def vgg16_perceptual_loss(fake, real):
    '''
        VGG perceptual loss based on VGG-16 network.
        Assuming the values in fake and real are in [0, 255].
    '''
    from nnabla.models.imagenet import VGG16

    class VisitFeatures(object):
        def __init__(self):
            self.features = []
            self.relu_counter = 0
            # self.features_at = set([1, 4, 7, 10]) : ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']
            self.features_at = set([4, 7])

        def __call__(self, f):
            if not f.name.startswith('ReLU'):
                return
            if self.relu_counter in self.features_at:
                self.features.append(f.outputs[0])
            self.relu_counter += 1

    vgg = VGG16()

    def get_features(x):
        o = vgg(x, use_up_to='lastconv')
        f = VisitFeatures()
        o.visit(f)
        return f

    with nn.parameter_scope("vgg16_loss"):
        fake_features = get_features(fake)
        real_features = get_features(real)

    return sum([
        F.mean(F.squared_error(ff, fr))
        for ff, fr in zip(fake_features.features, real_features.features)
    ])
Exemple #34
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        pred_x_u0 = F.softmax(pred0)
        pred_x_u1 = F.softmax(pred1)
        loss_sr = F.mean(F.squared_error(pred_x_u0, pred_x_u1))
    return loss_sr
Exemple #35
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.squared_error(pred0, pred1))
    return loss_sr
Exemple #36
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.squared_error(pred0, pred1))
    return loss_sr
Exemple #37
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    x1 = nn.Variable([args.batch_size, 1, 28, 28])

    #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1])
    #z = vectorizer(x1,maxh = 1024)
    #fake = generator(z,maxh= 1024)
    z = vectorizer(x1)
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    loss_vec = F.mean(F.squared_error(fake, x1))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    solver_vec = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("vec"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec1 = M.MonitorImageTile("vec images1",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec2 = M.MonitorImageTile("vec images2",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()

        x1.d = image / 255. - 0.5
        # Generator update.
        solver_vec.zero_grad()
        loss_vec.forward(clear_no_need_grad=True)
        loss_vec.backward(clear_buffer=True)
        solver_vec.weight_decay(args.weight_decay)
        solver_vec.update()
        monitor_vec1.add(i, fake)
        monitor_vec2.add(i, x1)
        monitor_loss_vec.add(i, loss_vec.d.copy())

        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))
Exemple #38
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        pred_x_u0 = F.softmax(pred0)
        pred_x_u1 = F.softmax(pred1)
        loss_sr = F.mean(F.squared_error(pred_x_u0, pred_x_u1))
    return loss_sr
Exemple #39
0
def recon_loss(ctx, pred, x_l):
    with nn.context_scope(ctx):
        loss_recon = F.mean(F.squared_error(pred, x_l))
    return loss_recon