Exemple #1
    def _compute_laplacian_mmd(self, samples1, samples2, *, sigma=20.0):
        n = samples1.shape[1]
        m = samples2.shape[1]

        k_xx = F.expand_dims(x=samples1, axis=2) - \
            F.expand_dims(x=samples1, axis=1)
        sum_k_xx = F.sum(F.exp(
            -F.sum(F.absolute(k_xx), axis=-1, keepdims=True) / (2.0 * sigma)),
                         axis=(1, 2))

        k_xy = F.expand_dims(x=samples1, axis=2) - \
            F.expand_dims(x=samples2, axis=1)
        sum_k_xy = F.sum(F.exp(
            -F.sum(F.absolute(k_xy), axis=-1, keepdims=True) / (2.0 * sigma)),
                         axis=(1, 2))

        k_yy = F.expand_dims(x=samples2, axis=2) - \
            F.expand_dims(x=samples2, axis=1)
        sum_k_yy = F.sum(F.exp(
            -F.sum(F.absolute(k_yy), axis=-1, keepdims=True) / (2.0 * sigma)),
                         axis=(1, 2))

        mmd_squared = \
            sum_k_xx / (n * n) - 2.0 * sum_k_xy / (m * n) + sum_k_yy / (m * m)
        return F.sqrt(mmd_squared + 1e-6)
Exemple #2
    def _feature_repl(hs_flatten, pairs, ckeys, lengths):
        xp = chainer.cuda.get_array_module(hs_flatten)
        begins, ends = pairs.T
        begins_ = xp.asarray(begins)
        ends_ = xp.asarray(ends)
        ckeys_ = xp.asarray(ckeys)

        h_b = F.embed_id(begins_, hs_flatten)
        h_b_pre = F.embed_id(begins_ - 1, hs_flatten, ignore_label=-1)
        out_of_span = np.insert(lengths[:-1].cumsum(), 0, 0) - 1
        is_out_of_span = np.isin(begins - 1, out_of_span)
        h_b_pre = F.where(
            xp.asarray(is_out_of_span)[:, None], xp.zeros_like(h_b_pre.data),
        h_e = F.embed_id(ends_, hs_flatten)
        h_e_post = F.embed_id(ends_ + 1, hs_flatten, hs_flatten.shape[0])
        out_of_span = lengths.cumsum()
        is_out_of_span = np.isin(ends + 1, out_of_span)
        h_e_post = F.where(
            xp.asarray(is_out_of_span)[:, None], xp.zeros_like(h_e_post.data),
        h_k_pre = F.embed_id(ckeys_ - 1, hs_flatten)
        h_k_post = F.embed_id(ckeys_ + 1, hs_flatten)

        repl1 = F.absolute(h_b_pre * (h_b - h_k_post))
        repl2 = F.absolute(h_e_post * (h_e - h_k_pre))
        return repl1, repl2
Exemple #3
    def __call__(self, x):
        x = F.average_pooling_2d(x, 2, 2, 0)
        depth_smoothness = F.convolution_2d(x, self.diff)
        depth_smoothness = F.sum(F.absolute(depth_smoothness), axis=1, keepdims=True)

        edge = F.convolution_2d(x, self.laplacian)
        loss = F.exp(-F.absolute(edge)) * depth_smoothness
        return F.mean(loss)
def total_variation2(x):
    xp = cuda.get_array_module(x.data)
    wh = xp.asarray([[[[1], [-1]]]], dtype=x.dtype)
    ww = xp.asarray([[[[1, -1]]]], dtype=x.dtype)
    dx = F.convolution_2d(x, W=wh)
    dy = F.convolution_2d(x, W=ww)
    #    dx = x[:, 1:, :, :] - x[:, :-1, :, :]
    #    dy = x[:, :, 1:, :] - x[:, :, :-1, :]
    return F.average(F.absolute(dx)) + F.average(F.absolute(dy))
    def update_core(self):
        batch = self.get_iterator('main').next()
        batchsize = len(batch)

        # Step1 GeneratorExit(" error")
        z = Variable(xp.asarray(self.generator.make_hidden(batchsize))) / 255.
        x_gen = self.generator(z)
        y_gen = self.critic(x_gen)
        # Step2 real
        x_real = Variable(xp.array(batch)) / 255.
        y_real = self.critic(x_real)

        # Step3 Compute loss for wgan_gp
        eps = xp.random.uniform(0, 1, (batchsize, 1, 1, 1)).astype("f")
        x_mid = eps * x_real + (1.0 - eps) * x_gen
        x_mid_v = Variable(x_mid.data)
        y_mid = self.critic(x_mid_v)
        dydx = chainer.grad([y_mid], [x_mid_v], enable_double_backprop=True)[0]
        dydx = F.sqrt(1e-08+F.sum(F.square(dydx), axis=1))
        loss_gp = self.lam * F.mean_squared_error(dydx, xp.ones_like(dydx.data))
        loss_cri = F.sum(-y_real) / batchsize
        loss_cri += F.sum(y_gen) / batchsize

        # extra step calculate regularization term about the last layer
        loss_sp = self.lam2 * F.absolute(F.sum(self.critic.inter.W) - 1)
        loss_all = loss_cri + loss_gp + loss_sp

        # Step4 Update critic
        loss_all.backward(loss_scale = 0.001)

        # Step5 Update generator
        if self.iteration < 2500 and self.iteration % 100 == 0:
            loss_gen = F.sum(-y_gen) / batchsize
            loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1)
            loss_gen += loss_sp
            loss_gen.backward(loss_scale = 0.001)
            chainer.reporter.report({'loss/generator': loss_gen})

        if self.iteration > 2500 and self.iteration % self.n_c == 0:
            loss_gen = F.sum(-y_gen) / batchsize
            loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1)
            loss_gen += loss_sp
            loss_gen.backward(loss_scale = 0.001)
            chainer.reporter.report({'loss/generator': loss_gen})

        # Step6 Report
        chainer.reporter.report({'loss/critic': loss_cri})
Exemple #6
 def __call__(self, img_error, dis_error, dis_output, test=False):
     h = F.reshape(F.absolute(img_error),
                   (img_error.data.shape[0], 3 * 128 * 128))
     h = self.l_img(h)
     g = F.reshape(F.absolute(dis_error),
                   (dis_error.data.shape[0], 512 * 8 * 8))
     g = self.l_dis(g)
     f = F.reshape(dis_output, (dis_output.data.shape[0], 512 * 8 * 8))
     f = self.l_fdis(f)
     ghf = F.sigmoid(self.l_FL(F.concat((h, g, f), axis=1)))
     return ghf
Exemple #7
    def update_core(self):
        vae_optimizer = self.get_optimizer('opt_vae')
        xp = self.vae.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        x = chainer.dataset.concat_examples(batch, device=self.device)

        latent_dist = self.vae.encode(x)

        # reconstruction loss
        rec_loss = 0
        for _ in range(self.vae.k):
            reconstructions = self.vae(x, sigmoid=False, mode="sample")
            rec_loss += F.bernoulli_nll(x, reconstructions) \
                / (self.vae.k * batchsize)
        ### latent loss
        # latent loss for continuous
        cont_capacity_loss = 0
        if self.vae.is_continuous:
            mu, ln_var = latent_dist['cont']
            kl_cont_loss = gaussian_kl_divergence(mu, ln_var) / batchsize
            # Anealing loss
            cont_min, cont_max, cont_num_iters, cont_gamma = \
            cont_cap_now = (cont_max - cont_min) * self.iteration / float(cont_num_iters) + cont_min
            cont_cap_now = min(cont_cap_now, cont_max)
            cont_capacity_loss = cont_gamma * F.absolute(cont_cap_now - kl_cont_loss)

        # latent loss for discrete
        disc_capacity_loss = 0
        if self.vae.is_discrete:
            kl_disc_loss = kl_multiple_discrete_loss(latent_dist['disc'])
            # Anealing loss
            disc_min, disc_max, disc_num_iters, disc_gamma = \
            disc_cap_now = (disc_max - disc_min) * self.iteration / float(disc_num_iters) + disc_min
            disc_cap_now = min(disc_cap_now, disc_max)
            # Require float conversion here to not end up with numpy float
            disc_theoretical_max = 0
            for disc_dim in self.vae.latent_spec["disc"]:
                disc_theoretical_max += xp.log(disc_dim)
            disc_cap_now = min(disc_cap_now, disc_theoretical_max.astype("float32"))
            disc_capacity_loss = disc_gamma * F.absolute(disc_cap_now - kl_disc_loss)

        joint_vae_loss = rec_loss + cont_capacity_loss + disc_capacity_loss


        chainer.reporter.report({"rec_loss": rec_loss, "cont_loss": cont_capacity_loss,
                                "disc_loss": disc_capacity_loss, "vae_loss": joint_vae_loss, })
    def compute_disp_smooth(self, img, pred_disp):
        def gradient(input_img):
            D_dy = input_img[:, :, 1:] - input_img[:, :, :-1]
            D_dx = input_img[:, :, :, 1:] - input_img[:, :, :, :-1]
            return D_dx, D_dy

        i_dx, i_dy = gradient(img)
        i_dx = F.mean(i_dx, axis=1, keepdims=True)
        i_dy = F.mean(i_dy, axis=1, keepdims=True)
        d_dx, d_dy = gradient(pred_disp)
        return F.mean(F.absolute(d_dx) * F.exp(-F.absolute(i_dx))) \
                      + F.mean(F.absolute(d_dy) * F.exp(-F.absolute(i_dy)))
Exemple #9
def loss_func_tv_l1(x_out):
    xp = cuda.get_array_module(x_out.data)
    b, ch, h, w = x_out.data.shape
    Wx = xp.zeros((ch, ch, 2, 2), dtype="f")
    Wy = xp.zeros((ch, ch, 2, 2), dtype="f")
    for i in range(ch):
        Wx[i, i, 0, 0] = -1
        Wx[i, i, 0, 1] = 1
        Wy[i, i, 0, 0] = -1
        Wy[i, i, 1, 0] = 1
    return F.sum(F.absolute(F.convolution_2d(x_out, W=Wx))) + F.sum(
        F.absolute(F.convolution_2d(x_out, W=Wy)))
Exemple #10
    def _loss(self, fake_batch_obs, fake_batch_action,
              true_batch_obs, true_batch_action):
        if self.obs_normalizer is not None:
            normalized_obs = self.obs_normalizer(fake_batch_obs, update=False)
            infer_fake = self.model(normalized_obs, fake_batch_action)
            infer_fake = self.model(fake_batch_obs, fake_batch_action)
        if self.noisy_label:
            n = fake_batch_obs.shape[0]
            fake_loss = -F.average(
                F.log(F.absolute(1 - (self.xp.random.rand(n)
                                      * self.noisy_label_range)
                                 - F.sigmoid(infer_fake))
                      + self.discriminator_value_offset))
            fake_loss = -F.average(F.log(1
                                         - F.sigmoid(infer_fake)
                                         + self.discriminator_value_offset))

        if self.obs_normalizer is not None:
            normalized_obs = self.obs_normalizer(true_batch_obs, update=True)
            infer_true = self.model(normalized_obs, true_batch_action)
            infer_true = self.model(true_batch_obs, true_batch_action)
        if self.noisy_label:
            n = true_batch_obs.shape[0]
            true_loss = -F.average(
                F.log(F.absolute(1 - (self.xp.random.rand(n)
                                      * self.noisy_label_range)
                                 - F.sigmoid(infer_true))
                      + self.discriminator_value_offset))
            true_loss = -F.average(F.log(F.sigmoid(infer_true)
                                         + self.discriminator_value_offset))

        entropy = (self._get_entropy(infer_fake) / 2
                   + self._get_entropy(infer_true) / 2)
        loss = (fake_loss + true_loss
                - entropy * self.entropy_coef)

        # Update stats
        self.accuracy_gen = np.average(
            chainer.cuda.to_cpu(infer_fake.array) < 0)
        self.accuracy_exp = np.average(
            chainer.cuda.to_cpu(infer_true.array) > 0)
        self.average_entropy *= self.entropy_decay
        self.average_entropy += (1.0 - self.entropy_decay) * chainer.cuda.to_cpu(entropy.array)  # noqa
        self.average_loss *= self.loss_decay
        self.average_loss += (1.0 - self.loss_decay) * \

        return loss
Exemple #11
    def get_disparity_smoothness(self, disp, img):
        disp_gradients_x = self.gradient_x(disp)
        disp_gradients_y = self.gradient_y(disp)

        img_gradients_x = self.gradient_x(img)
        img_gradients_y = self.gradient_y(img)

        weight_x = F.exp(-F.mean(F.absolute(disp_gradients_x), axis=1, keep_dims=True))
        weight_y = F.exp(-F.mean(F.absolute(disp_gradients_y), axis=1, keep_dims=True))

        smoothness_x = disp_gradients_x * weight_x
        smoothness_y = disp_gradients_y * weight_y
        return smoothness_x + smoothness_y
Exemple #12
    def update_core(self):

        xp = cuda.cupy

        batch = self.get_iterator('main').next()
        batchsize = len(batch)

        # Step1 Generate
        z = Variable(xp.asarray(self.generator.make_hidden(batchsize)))
        x_gen = self.generator(z)
        y_gen = self.critic(x_gen)

        # Step2 real
        x_real = Variable(xp.array(batch)) / 255.
        y_real = self.critic(x_real)

        # Step3 Compute loss for DCGAN
        loss_cri = F.sum(F.softplus(-y_real)) / batchsize
        loss_cri += F.sum(F.softplus(y_gen)) / batchsize
        loss_sp = self.lam2 * F.absolute(F.sum(self.critic.inter.W) - 1)
        loss_all = loss_cri + loss_sp

        # Step4 Update critic

        # Step5 Update generator
        if self.iteration < 2500 and self.iteration % 100 == 0:
            loss_gen = F.sum(F.softplus(-y_gen)) / batchsize
            loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1)
            loss_gen += loss_sp
            chainer.reporter.report({'loss/generator': loss_gen})

        if self.iteration > 2500 and self.iteration % self.n_c == 0:
            loss_gen = F.sum(F.softplus(-y_gen)) / batchsize
            loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1)
            loss_gen += loss_sp
            chainer.reporter.report({'loss/generator': loss_gen})

        # Step6 Report
        chainer.reporter.report({'loss/critic': loss_cri})
def occupancy_grid_1d(points, *, pitch, origin, dimension):
    assert points.shape == (points.shape[0], )
    d_IJ = OccupancyGrid1D(pitch=pitch, origin=origin,
    m_IJ = F.relu(1 - F.absolute(d_IJ))
    m = F.max(m_IJ, axis=0)
    return m
Exemple #14
 def __call__(self, x1, x2, train=True):
     if train:
         batchsize = x1.shape[0]
         xp = cupy
         alpha = chainer.Variable(xp.random.rand(batchsize, dtype=xp.float32))
         alpha = 0.5 - F.absolute(0.5 - alpha)
         alpha = alpha.reshape(batchsize, 1, 1 ,1)
         h1 = F.relu(self.conv1(x1))
         h2 = F.relu(self.conv1(x2))
         h1 = F.relu(self.conv2(h1))
         h2 = F.relu(self.conv2(h2))
         h1 = F.relu(self.conv3(h1))
         h2 = F.relu(self.conv3(h2))
         h1 = self.conv_z(h1)
         h2 = self.conv_z(h2)
         c =  alpha*h1+(1.0-alpha)*h2
         y1 = self.z_deconv(h1)
         y2 = self.z_deconv(h2)
         yc = self.z_deconv(c)
         y1 = F.relu(self.deconv1(y1))
         y2 = F.relu(self.deconv1(y2))
         yc = F.relu(self.deconv1(yc))
         y1 = F.relu(self.deconv2(y1))
         y2 = F.relu(self.deconv2(y2))
         yc = F.relu(self.deconv2(yc))
         y1 = self.deconv3(y1)
         y2 = self.deconv3(y2)
         yc = self.deconv3(yc)
         return F.sigmoid(y1), F.sigmoid(y2), F.sigmoid(yc), alpha, h1, h2
         y = F.relu(self.z_deconv(x1))
         y = F.relu(self.deconv1(y))
         y = F.relu(self.deconv2(y))
         y = self.deconv3(y)
         return F.sigmoid(y)
def loss_comp_low(x, y, threshold, norm='l1'):
    if norm == 'l1':
        return (F.sum(((x.array < threshold) ^ (y.array < threshold)) *
                      F.absolute(x - y)))
        return (F.sum(
            ((x.array < threshold) ^ (y.array < threshold)) * ((x - y)**2)))
def test_backward_silhouette():
    """Backward if non-zero gradient is out of a face."""

    grad_ref = [
        [1.6725862, -0.26021874, 0.],
        [1.41986704, -1.64284933, 0.],
        [0., 0., 0.],
    vertices = [[0.8, 0.8, 1.], [0.0, -0.5, 1.], [0.2, -0.4, 1.]]
    faces = [[0, 1, 2]]

    vertices = cp.array(vertices, 'float32')
    faces = cp.array(faces, 'int32')
    grad_ref = cp.array(grad_ref, 'float32')
    vertices, faces, grad_ref = utils.to_minibatch((vertices, faces, grad_ref))
    pxi = 35
    pyi = 25

    renderer = Renderer()
    renderer.image_size = 64
    renderer.anti_aliasing = False
    renderer.fill_back = False
    renderer.perspective = False
    vertices = chainer.Variable(vertices)
    images = renderer.render_silhouettes(vertices, faces)
    loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1))
    chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
def test_backward_silhouette_ch_2():
    """Backward if non-zero gradient is on a face."""

    vertices = np.array([[0.8, 0.8, 1.], [-0.5, -0.8, 1.], [0.8, -0.8, 1.]])
    faces = np.array([[0, 1, 2]])
    pyi = 40
    pxi = 50
    grad_ref = np.array([
        [0.98646867, 1.04628897, 0.],
        [-1.03415668, -0.10403691, 0.],
        [3.00094461, -1.55173182, 0.],

    renderer = Renderer()
    renderer.image_size = 64
    renderer.anti_aliasing = False
    renderer.perspective = False

    # Prepare chainer inputs
    vertices = cp.array(vertices, 'float32')
    faces = cp.array(faces, 'int32')
    grad_ref = cp.array(grad_ref, 'float32')
    vertices, faces, grad_ref = utils.to_minibatch((vertices, faces, grad_ref))
    vertices = chainer.Variable(vertices)
    images = renderer.render_silhouettes(vertices, faces)
    loss = cf.sum(cf.absolute(images[:, pyi, pxi]))

    chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
Exemple #18
    def predict(self,
        xs0, xs1 = xs  # premise, hypothesis
        if get_embed:
            ys0, exs0 = self.encoder(xs0, get_embed=True)
            ys1, exs1 = self.encoder(xs1, get_embed=True)
            ys0 = self.encoder(xs0, get_embed=False)
            ys1 = self.encoder(xs1, get_embed=False)

        ys0 = [F.max(y, axis=0) for y in ys0]
        ys1 = [F.max(y, axis=0) for y in ys1]
        ratio = 0.0 if no_dropout else self.dropout
        ys0 = F.dropout(F.stack(ys0, axis=0), ratio=ratio)
        ys1 = F.dropout(F.stack(ys1, axis=0), ratio=ratio)
        ys = F.concat([ys0, ys1, F.absolute(ys0 - ys1), ys0 * ys1], axis=1)
        ys = self.output(ys, no_dropout)
        if softmax:
            ys = F.softmax(ys).data
        elif argmax:
            ys = self.xp.argmax(ys.data, axis=1)
        if get_embed:
            return ys, exs0, exs1
        return ys
def test_backward_case1():
    """Backward if non-zero gradient is out of a face."""

    vertices = [[0.8, 0.8, 1.], [0.0, -0.5, 1.], [0.2, -0.4, 1.]]
    faces = [[0, 1, 2]]
    pxi = 35
    pyi = 25
    grad_ref = [
        [1.6725862, -0.26021874, 0.],
        [1.41986704, -1.64284933, 0.],
        [0., 0., 0.],

    renderer = Renderer()
    renderer.image_size = 64
    renderer.anti_aliasing = False
    renderer.perspective = False
    renderer.light_intensity_ambient = 1.0
    renderer.light_intensity_directional = 0.0

    vertices = cp.array(vertices, 'float32')
    faces = cp.array(faces, 'int32')
    textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32')
    grad_ref = cp.array(grad_ref, 'float32')
    vertices, faces, textures, grad_ref = utils.to_minibatch(
        (vertices, faces, textures, grad_ref))
    vertices = chainer.Variable(vertices)

    images = renderer.render(vertices, faces, textures)
    images = cf.mean(images, axis=1)
    loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1))

    chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
Exemple #20
    def update_core(self):
        Enc_optimizer = self.get_optimizer('Enc')
        Dec_optimizer = self.get_optimizer('Dec')
        Critic_optimizer = self.get_optimizer('Critic')

        batch1 = self.get_iterator('main').next()
        batch2 = random.sample(batch1, len(batch1))
        x1 = Variable(self.converter(batch1, self.device))
        x2 = Variable(self.converter(batch2, self.device))
        xp = chainer.backend.get_array_module(x1.data)
        batchsize = len(batch1)
        alpha = chainer.Variable(xp.random.rand(batchsize, dtype=xp.float32))
        alpha = 0.5 - F.absolute(0.5 - alpha)
        if self.net == 'conv':
            alpha = alpha.reshape(batchsize, 1, 1 ,1)
            alpha = alpha.reshape(batchsize, 1)

        z1 = self.Enc(x1)
        z2 = self.Enc(x2)
        zc =  alpha*z1+(1.0-alpha)*z2
        yc = self.Dec(zc)
        y1 = self.Dec(z1)
        y2 = self.Dec(z2)
        cdis_c = self.Critic(yc)
        cdis_y1 = self.Critic(self.gam*x1+(1-self.gam)*y1)
        cdis_y2 = self.Critic(self.gam*x2+(1-self.gam)*y2)
        Critic_optimizer.update(self.loss_Critic, cdis_c, alpha, cdis_y1, cdis_y2)
        Enc_optimizer.update(self.loss_Enc, x1, x2, y1, y2, cdis_c)
        Dec_optimizer.update(self.loss_Dec, x1, x2, y1, y2, cdis_c)
    def __call__(self, x):
        batch_size = x.shape[0]

        # predict 337 vertices [bs, 337, 3]
        h = cf.relu(self.linear1(x))
        h = cf.relu(self.linear2(h))
        vertices = self.linear_bias(h) * self.scaling
        vertices = vertices.reshape((batch_size, -1, 3))

        # add base sphere and normalize
        base = self.vertices_base * self.obj_scale
        base = self.xp.broadcast_to(base[None, :, :], vertices.shape)
        vertices = vertices + base
        vertices = self.object_size * cf.tanh(vertices) * 0.99

        # z <- abs(z)
        xy = vertices[:, :, :2]
        z = cf.absolute(vertices[:, :, 2:3])
        vertices = cf.concat((xy, z), axis=2)

        # assign to 642 vertices
        # bias: [bs, 337, 3]
        # vertices_matrix: [642 * 3, 337 * 3]
        vertices = cf.reshape(vertices, (batch_size, -1))
        vertices_matrix = self.xp.tile(self.vertices_matrix[None, :, :],
                                       (batch_size, 1, 1))
        vertices = cf.matmul(vertices_matrix, vertices[:, :, None])
        vertices = cf.reshape(vertices, (batch_size, -1, 3))

        return vertices, self.faces
Exemple #22
    def _compute_ddqn_losses(self, exp_batch, errors_out=None):
        """Compute the Q-learning losses for a batch of experiences

          exp_batch (dict): A dict of batched arrays of transitions
          Computed loss from the minibatch of experiences
        y, t = self._compute_y_and_ts(exp_batch)

        del errors_out[:]
        delta = F.absolute(y - t)
        if delta.ndim == 2:
            delta = F.sum(delta, axis=1)
        delta = cuda.to_cpu(delta.array)
        for e in delta:

        is_1_step = self.xp.abs(1. - exp_batch["is_n_step"])
        loss_1step = compute_weighted_value_loss(
            y, t, exp_batch['weights'],
        loss_nstep = compute_weighted_value_loss(
            y, t, exp_batch['weights'],
        return loss_nstep, loss_1step
    def __call__(self, x):
        h = cf.relu(self.linear1_bn(self.linear1(x)))
        h = cf.relu(self.linear2_bn(self.linear2(h)))
        bias = cf.reshape(self.linear_bias(h), (-1, self.num_vertices, 3))
        bias *= self.scaling
        base = self.vertices_base
        base = cf.broadcast_to(base[None, :, :], bias.shape)
        vertices = base + bias
        if self.symmetric:
            xy = vertices[:, :, :2]  # [bs, nv, 2]
            z = cf.absolute(vertices[:, :, 2:3])  # [bs, nv, 1]
            vertices = cf.concat((xy, z), axis=2)

            vertices = cf.transpose(
                cf.tensordot(vertices, self.symmetric_matrix, axes=(1, 0)),
                (0, 2, 1))

            xy = vertices[:, :, :2]  # [bs, nv, 2]
            z = vertices[:, :, 2:3]  # [bs, nv, 1]
            z = z * self.z_sign[None, :, None]
            vertices = cf.concat((xy, z), axis=2)

        vertices = cf.tanh(vertices) * self.tanh_scale

        return vertices, self.faces
    def test_backward_case2(self):
        vertices = [[0.8, 0.8, 1.], [-0.5, -0.8, 1.], [0.8, -0.8, 1.]]
        faces = [[0, 1, 2]]
        pyi = 40
        pxi = 50

        renderer = neural_renderer.Renderer()
        renderer.image_size = 64
        renderer.anti_aliasing = False
        renderer.perspective = False

        vertices = chainer.Variable(cp.array(vertices, 'float32'))
        faces = cp.array(faces, 'int32')
        images = renderer.render_silhouettes(vertices[None, :, :],
                                             faces[None, :, :])
        loss = cf.sum(cf.absolute(images[:, pyi, pxi]))

        for i in range(3):
            for j in range(2):
                axis = 'x' if j == 0 else 'y'
                vertices2 = cp.copy(vertices.data)
                vertices2[i, j] -= 1. / vertices.grad[i, j]
                images = renderer.render_silhouettes(vertices2[None, :, :],
                                                     faces[None, :, :])
                image = np.tile(images[0].data.get()[:, :, None], (1, 1, 3))
                image[pyi, pxi] = [1, 0, 0]
                ref = scipy.misc.imread(
                    './tests/data/rasterize_silhouettes_case2_v%d_%s.png' %
                    (i, axis))
                ref = ref.astype('float32') / 255
                chainer.testing.assert_allclose(ref, image)
Exemple #25
    def get_onehot_grad(self, xs, ys=None):
        if ys is None:
            with chainer.using_config('train', False):
                ys = self.predict(xs, argmax=True)
        u, exs_prem = self.encoder.get_grad(xs[0])
        v, exs_hypo = self.encoder.get_grad(xs[1])
        encodings = F.concat((u, v, F.absolute(u - v), u * v), axis=1)
        outputs = self.output(self.mlp(encodings, no_dropout=True))
        loss = F.softmax_cross_entropy(outputs, ys)

        exs = exs_hypo
        lengths = [len(x) for x in xs[1]]

        if isinstance(exs, tuple):
            exs_grad = chainer.grad([loss], exs)
            ex_sections = np.cumsum([ex.shape[0] for ex in exs[:-1]])
            exs = F.concat(exs, axis=0)
            exs_grad = F.concat(exs_grad, axis=0)
            onehot_grad = F.sum(exs_grad * exs, axis=1)
            onehot_grad = F.split_axis(onehot_grad, ex_sections, axis=0)
            exs_grad = chainer.grad([loss], [exs])[0]
            # (batch_size, n_dim, max_length, 1)
            assert exs_grad.shape == exs.shape
            onehot_grad = F.squeeze(F.sum(exs_grad * exs, 1), 2)
            onehot_grad = [x[:l] for x, l in zip(onehot_grad, lengths)]
        return onehot_grad
    def __call__(self, x):
        Calucurate Minibatch Discrimination using broardcast.

        x: Variable
           input vector shape is (N, num_units)
        batch_size = x.shape[0]
        xp = x.xp
        x = F.reshape(x, (batch_size, -1))
        activation = F.reshape(self.t(x), (-1, self.b, self.c))

        m = F.reshape(activation, (-1, self.b, self.c))
        m = F.expand_dims(m, 3)
        m_T = F.transpose(m, (3, 1, 2, 0))
        m, m_T = F.broadcast(m, m_T)
        l1_norm = F.sum(F.absolute(m-m_T), axis=2)

        # eraser to erase l1 norm with themselves
        eraser = F.expand_dims(xp.eye(batch_size, dtype="f"), 1)
        eraser = F.broadcast_to(eraser, (batch_size, self.b, batch_size))

        o_X = F.sum(F.exp(-(l1_norm + 1e6 * eraser)), axis=2)

        # concatunate along channels or units
        return F.concat((x, o_X), axis=1)
Exemple #27
    def predict(self,
        dknn_layers = []
        u = self.encoder(xs[0], dknn=False, no_dropout=no_dropout)
        v = self.encoder(xs[1], dknn=False, no_dropout=no_dropout)
        # concatenate results as done in infersent
        encodings = F.concat((u, v, F.absolute(u - v), u * v), axis=1)
        dknn_layers = [encodings]

        if dknn:
            outputs, _dknn_layers = self.mlp(encodings,
            dknn_layers = dknn_layers + _dknn_layers
            outputs = self.mlp(encodings, dknn=False, no_dropout=no_dropout)

        outputs = self.output(outputs)
        if softmax:
            outputs = F.softmax(outputs).data
        elif argmax:
            outputs = self.xp.argmax(outputs.data, axis=1)
        if dknn:
            return outputs, dknn_layers
            return outputs
Exemple #28
def nlogn_loss(prediction, label):

    residual = prediction * 255 - label * 255
    diff_abs = F.absolute(residual) + 1
    loss = F.mean(diff_abs * F.log2(diff_abs) / 256)

    return loss
Exemple #29
    def _compute_loss(self, exp_batch, errors_out=None):
        """Compute the Q-learning loss for a batch of experiences

          exp_batch (dict): A dict of batched arrays of transitions
          Computed loss from the minibatch of experiences
        y, t = self._compute_y_and_t(exp_batch)

        if errors_out is not None:
            del errors_out[:]
            delta = F.absolute(y - t)
            if delta.ndim == 2:
                delta = F.sum(delta, axis=1)
            delta = cuda.to_cpu(delta.array)
            for e in delta:

        if 'weights' in exp_batch:
            return compute_weighted_value_loss(
            return compute_value_loss(y,
def _smooth_l1_loss(x, t, in_weight, sigma):
    sigma2 = sigma**2
    diff = in_weight * (x - t)
    abs_diff = F.absolute(diff)
    flag = (abs_diff.data < (1. / sigma2)).astype(np.float32)
    y = (flag * (sigma2 / 2.) * F.square(diff) + (1 - flag) *
         (abs_diff - 0.5 / sigma2))
    return F.sum(y)
def _smooth_l1_loss(x, t, in_weight, sigma):
    sigma2 = sigma ** 2
    diff = in_weight * (x - t)
    abs_diff = F.absolute(diff)
    flag = (abs_diff.array < (1. / sigma2)).astype(np.float32)

    y = (flag * (sigma2 / 2.) * F.square(diff) +
         (1 - flag) * (abs_diff - 0.5 / sigma2))

    return F.sum(y)
Exemple #32
def get_normalized_vector(d, xp=None):
    shape = tuple(range(1, len(d.shape)))
    if xp is not None:
        d /= (1e-12 + xp.max(xp.abs(d), shape, keepdims=True))
        d /= xp.sqrt(1e-6 + xp.sum(d ** 2, shape, keepdims=True))
        d_term = 1e-12 + F.max(F.absolute(d), shape, keepdims=True)
        d /= F.broadcast_to(d_term, d.shape)
        d_term = F.sqrt(1e-6 + F.sum(d ** 2, shape, keepdims=True))
        d /= F.broadcast_to(d_term, d.shape)
    return d
Exemple #33
    def read(address):
        #map from the reals to the hypercube of dimesion n
        index = F.tanh(address)
        #map from a point to the nearest corner of the hypercube
        f = lambda x: x > 0
        mainIndex = np.vectorize(f,index.data,cache=True)

        mainValue = F.select_item(array,lookup(mainIndex))
        scaleFactor =F.exp(F.sum(F.log(F.absolute(x))))

        return mainValue * scaleFactor
    def test_backward_case2(self):
        """Backward if non-zero gradient is on a face."""

        vertices = [
            [0.8, 0.8, 1.],
            [-0.5, -0.8, 1.],
            [0.8, -0.8, 1.]]
        faces = [[0, 1, 2]]
        pyi = 40
        pxi = 50
        grad_ref = [
            [0.98646867, 1.04628897, 0.],
            [-1.03415668, - 0.10403691, 0.],
            [3.00094461, - 1.55173182, 0.],

        renderer = neural_renderer.Renderer()
        renderer.image_size = 64
        renderer.anti_aliasing = False
        renderer.perspective = False
        renderer.light_intensity_ambient = 1.0
        renderer.light_intensity_directional = 0.0

        vertices = cp.array(vertices, 'float32')
        faces = cp.array(faces, 'int32')
        textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32')
        grad_ref = cp.array(grad_ref, 'float32')
        vertices, faces, textures, grad_ref = utils.to_minibatch((vertices, faces, textures, grad_ref))
        vertices = chainer.Variable(vertices)

        images = renderer.render(vertices, faces, textures)
        images = cf.mean(images, axis=1)
        loss = cf.sum(cf.absolute(images[:, pyi, pxi]))

        grad_ref = cp.array(grad_ref, 'float32')
        chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
    def test_backward_case1(self):
        """Backward if non-zero gradient is out of a face."""

        vertices = [
            [0.8, 0.8, 1.],
            [0.0, -0.5, 1.],
            [0.2, -0.4, 1.]]
        faces = [[0, 1, 2]]
        pxi = 35
        pyi = 25
        grad_ref = [
            [1.6725862, -0.26021874, 0.],
            [1.41986704, -1.64284933, 0.],
            [0., 0., 0.],

        renderer = neural_renderer.Renderer()
        renderer.image_size = 64
        renderer.anti_aliasing = False
        renderer.perspective = False
        renderer.light_intensity_ambient = 1.0
        renderer.light_intensity_directional = 0.0

        vertices = cp.array(vertices, 'float32')
        faces = cp.array(faces, 'int32')
        textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32')
        grad_ref = cp.array(grad_ref, 'float32')
        vertices, faces, textures, grad_ref = utils.to_minibatch((vertices, faces, textures, grad_ref))
        vertices = chainer.Variable(vertices)

        images = renderer.render(vertices, faces, textures)
        images = cf.mean(images, axis=1)
        loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1))

        chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
Exemple #36
def main():
    parser = argparse.ArgumentParser(description='GradNorm')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--n-iter', '-it', type=int, default=5000)
    parser.add_argument('--mode', '-m', choices=('grad_norm', 'equal_weight'),
    args = parser.parse_args()

    sigmas = [1, 10]
    n_task = len(sigmas)
    epsilons = np.random.normal(
        scale=3.5, size=(n_task, 100, 250)).astype(np.float32)
    dataset = RegressionDataset(sigmas, epsilons)

    model = RegressionTrainChain(RegressionChain(n_task))

    if args.gpu >= 0:

    optimizer = chainer.optimizers.Adam(alpha=1e-2)

    train_iter = chainer.iterators.SerialIterator(dataset, 200)

    xp = model.xp
    weights = []
    task_losses = []
    loss_ratios = []
    final_layer_names = ['task_{}'.format(i) for i in range(n_task)]
    for t in range(args.n_iter):
        batch = train_iter.next()
        x, ts = chainer.dataset.convert.concat_examples(batch, device=args.gpu)

        task_loss = model(x, ts)
        weighted_task_loss = model.weight * task_loss
        if t == 0:
            initial_task_loss = task_loss.data
        loss = F.mean(weighted_task_loss)
        # Ignore a gradient to the coefficient vector, which
        # is computed from the standard loss.
        if args.mode == 'grad_norm':
            # Use |\nabla_W w_i * L_i | = w_i |\nabla_W L_i|
            gygw_norms = []
            for i, layer_name in enumerate(final_layer_names):
                l = getattr(model.model, layer_name)
                gygw = chainer.grad([task_loss[i]], [l.W])[0].data
            gygw_norms = xp.stack(gygw_norms)
            norms = model.weight * gygw_norms

            alpha = 0.16
            mean_norm = xp.mean(norms.data)
            loss_ratio = task_loss.data / initial_task_loss
            inverse_train_rate = loss_ratio / xp.mean(loss_ratio)

            diff = norms - (inverse_train_rate ** alpha) * mean_norm
            grad_norm_loss = F.mean(F.absolute(diff))

            # For debugging purpose only
            # from chainer import computational_graph
            # import os
            # cg = computational_graph.build_computational_graph(
            #     [grad_norm_loss]).dump()
            # with open('grad_weight_loss_cg', 'w') as f:
            #     f.write(cg)


        # Renormalize
        normalize_coeff = n_task / xp.sum(model.weight.data)
        model.weight.data[:] = model.weight.data * normalize_coeff

        # Record
        loss_ratios.append(np.mean(task_losses[-1] / task_losses[0]))

        if t % 100 == 0:
            print('{}/{}:  loss_ratio={}, weights={} task_loss={}'.format(
                t, args.n_iter, loss_ratios[-1], model.weight.data, task_loss.data))
    task_losses = np.array(task_losses)
    weights = np.array(weights)

    fig = plt.figure()
    ax1 = fig.add_subplot(1, 4, 1)
    ax1.set_title('loss (task 0)')
    ax2 = fig.add_subplot(1, 4, 2)
    ax2.set_title('loss (task 1)')
    ax3 = fig.add_subplot(1, 4, 3)
    ax3.set_title('sum of normalized losses')
    ax4 = fig.add_subplot(1, 4, 4)
    ax4.set_title('change of weights over time')
    ax1.plot(task_losses[:, 0])
    ax2.plot(task_losses[:, 1])
    ax4.plot(weights[:, 0])
    ax4.plot(weights[:, 1])