Beispiel #1
0
 def __call__(self, x):
     # chainer requires explicit broadcast for avoiding latent bugs
     u = F.mean(x, -1, keepdims=True)
     u = F.broadcast_to(u, x.shape)
     s = F.mean((x - u) ** 2, -1, keepdims=True)
     s = F.broadcast_to(s, x.shape)
     x = (x - u) / F.sqrt(s + self.e)
     return F.bias(F.scale(x, self.g, axis=2), self.b, axis=2)
    def forward(self, inputs, device):
        x, y = inputs

        mean = functions.mean(x, axis=1)
        d = x - mean[:, None]
        var = functions.mean(d * d, axis=1)
        inv_std = functions.rsqrt(var + self.eps)

        dummy_gamma = self.backend_config.xp.ones(
            self.shape[0], dtype=self.dtype)

        return gn_module._MulInvStd(
            self.eps, mean.array, inv_std.array, dummy_gamma).apply((x, y))
Beispiel #3
0
    def __call__(self, x):
        q_z = self.encoder(x)
        z = q_z.sample(self.k)
        p_x = self.decoder(z)
        p_z = self.prior()

        reconstr = F.mean(p_x.log_prob(
            F.broadcast_to(x[None, :], (self.k,) + x.shape)))
        kl_penalty = F.mean(chainer.kl_divergence(q_z, p_z))
        loss = - (reconstr - self.beta * kl_penalty)
        reporter.report({'loss': loss}, self)
        reporter.report({'reconstr': reconstr}, self)
        reporter.report({'kl_penalty': kl_penalty}, self)
        return loss
Beispiel #4
0
 def update_policy():
     # Maximize Q(s,policy(s))
     q = Q(obs, policy(obs))
     q = q[:]  # Avoid https://github.com/chainer/chainer/issues/2744
     loss = - F.mean(q)
     policy.cleargrads()
     loss.backward()
     opt_policy.update()
 def f_loss_grad(x):
     set_flat_params(self, x)
     self.cleargrads()
     values = self.compute_baselines(obs)
     loss = F.mean(F.square(values - targets))
     loss.backward()
     flat_grad = get_flat_grad(self)
     return loss.data.astype(np.float64), flat_grad.astype(np.float64)
Beispiel #6
0
    def predict(self, xs):
        # Encoding
        logits, exs = self._encode(xs)

        # Discretization
        D = F.gumbel_softmax(logits, self.tau, axis=2)
        gumbel_output = D.reshape(-1, self.M * self.K)
        with chainer.no_backprop_mode():
            maxp = F.mean(F.max(D, axis=2))
            reporter.report({'maxp': maxp.data}, self)

        # Decoding
        y_hat = self._decode(gumbel_output)
        return y_hat, exs
    def test_backward_case2(self):
        """Backward if non-zero gradient is on a face."""

        vertices = [
            [0.8, 0.8, 1.],
            [-0.5, -0.8, 1.],
            [0.8, -0.8, 1.]]
        faces = [[0, 1, 2]]
        pyi = 40
        pxi = 50
        grad_ref = [
            [0.98646867, 1.04628897, 0.],
            [-1.03415668, - 0.10403691, 0.],
            [3.00094461, - 1.55173182, 0.],
        ]

        renderer = neural_renderer.Renderer()
        renderer.image_size = 64
        renderer.anti_aliasing = False
        renderer.perspective = False
        renderer.light_intensity_ambient = 1.0
        renderer.light_intensity_directional = 0.0

        vertices = cp.array(vertices, 'float32')
        faces = cp.array(faces, 'int32')
        textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32')
        grad_ref = cp.array(grad_ref, 'float32')
        vertices, faces, textures, grad_ref = utils.to_minibatch((vertices, faces, textures, grad_ref))
        vertices = chainer.Variable(vertices)

        images = renderer.render(vertices, faces, textures)
        images = cf.mean(images, axis=1)
        loss = cf.sum(cf.absolute(images[:, pyi, pxi]))
        loss.backward()

        grad_ref = cp.array(grad_ref, 'float32')
        chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
    def test_backward_case1(self):
        """Backward if non-zero gradient is out of a face."""

        vertices = [
            [0.8, 0.8, 1.],
            [0.0, -0.5, 1.],
            [0.2, -0.4, 1.]]
        faces = [[0, 1, 2]]
        pxi = 35
        pyi = 25
        grad_ref = [
            [1.6725862, -0.26021874, 0.],
            [1.41986704, -1.64284933, 0.],
            [0., 0., 0.],
        ]

        renderer = neural_renderer.Renderer()
        renderer.image_size = 64
        renderer.anti_aliasing = False
        renderer.perspective = False
        renderer.light_intensity_ambient = 1.0
        renderer.light_intensity_directional = 0.0

        vertices = cp.array(vertices, 'float32')
        faces = cp.array(faces, 'int32')
        textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32')
        grad_ref = cp.array(grad_ref, 'float32')
        vertices, faces, textures, grad_ref = utils.to_minibatch((vertices, faces, textures, grad_ref))
        vertices = chainer.Variable(vertices)

        images = renderer.render(vertices, faces, textures)
        images = cf.mean(images, axis=1)
        loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1))
        loss.backward()

        chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
Beispiel #9
0
 def _head_to_tail(self, pool5):
     block5 = self.rcnn_top(pool5)  # B, 1024, 1, 1
     fc7 = F.mean(F.mean(block5, 3), 2)  # B, 1024
     return fc7
Beispiel #10
0
	def __call__(self, x):
		return x / sqrt(mean(x ** 2, axis=1, keepdims=True) + 1e-8)
 def encode_phrase(self, X):
     X = [F.mean(F.embed_id(x, self.w_vec), axis=0, keepdims=True) for x in X]
     return F.vstack(X)
Beispiel #12
0
def get_posi_from_img(img, threshold=0.2):
    """画像 img = [1][3 or 1][imgH][imgW] ⇒ 点位置 posi = [N][y,x] """
    img_p = get_local_max_point(F.mean(img, axis=1, keepdims=True).data,
                                threshold=threshold)
    posi = conv_point_to_posi(img_p)
    return posi
Beispiel #13
0
def loss_hinge_dis(dis_fake, dis_real):
    loss = F.mean(F.relu(1. - dis_real))
    loss += F.mean(F.relu(1. + dis_fake))

    return loss
Beispiel #14
0
 def gp_loss(self, x, z):
     h = F.mean(x) / x.shape[0]
     grad, = chainer.grad([h], [z], enable_double_backprop=True)
     return F.mean(F.batch_l2_norm_squared(grad))
Beispiel #15
0
def feature_vector_normalize(x):
    alpha = 1.0 / F.sqrt(F.mean(x * x, axis=1, keepdims=True) + 1e-8)
    y = F.broadcast_to(alpha, x.data.shape) * x
    return y
def train(width, height, depth, start_alpha=0):
    g = generator(512, 512, 100)
    try:
        serializers.load_npz("generator.model", g)
        print("generator loaded")
    except:
        pass
    d = discriminator()
    try:
        serializers.load_npz("discriminator.model", d)
        print("discriminator loaded")
    except:
        pass

    g_opt = chainer.optimizers.Adam(alpha=0.001, beta1=0.0, beta2=0.99)
    g_opt.setup(g)
    g_opt.add_hook(chainer.optimizer.WeightDecay(0.0005))

    d_opt = chainer.optimizers.Adam(alpha=0.001, beta1=0.0, beta2=0.99)
    d_opt.setup(d)
    d_opt.add_hook(chainer.optimizer.WeightDecay(0.0005))

    X_train, tags = data_import(16 * (2**depth), 16 * (2**depth))
    '''
    X_train = (X_train.astype(np.float32) - 127.5)/127.5
    X_train = X_train.transpose(0,3,1,2)
    '''
    print(X_train.shape)
    tags = tags.astype(np.float32)

    num_batches = int(X_train.shape[0] / BATCH_SIZE)
    alpha = start_alpha
    for epoch in range(NUM_EPOCH):

        for index in range(num_batches):
            if alpha < 1.0:
                alpha = alpha + 5e-4
            '''
            x = xs[(j * bm):((j + 1) * bm)]
            t = ts[(j * bm):((j + 1) * bm)]
            '''
            image_batch = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]
            image_batch = (image_batch.astype(np.float32) - 127.5) / 127.5
            image_batch = image_batch.transpose(0, 3, 1, 2)
            tag_batch = tags[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]

            noise = np.random.normal(0, 0.5, [len(image_batch), 100])
            z = Variable(noise.astype(np.float32))

            x = g(z, tag_batch, depth, alpha)
            if index % 10 == 0:
                generated_images = x.data * 127.5 + 127.5
                generated_images = generated_images.transpose(0, 2, 3, 1)
                save_generated_image(generated_images,
                                     "%04d_%04d.png" % (epoch, index))

            yl = d(x, tag_batch, depth, alpha)
            #print(yl)
            #g_loss=F.mean_squared_error(yl, Variable(np.ones((len(image_batch),1), dtype=np.float32)))
            #d_loss=F.mean_squared_error(yl, Variable(np.zeros((len(image_batch),1), dtype=np.float32)))

            yl2 = d(image_batch, tag_batch, depth, alpha)
            #print(yl2)
            #d_loss+=F.mean_squared_error(yl2, Variable(np.ones((len(image_batch),1), dtype=np.float32)))
            d_loss = -F.sum(yl2 - yl) / len(image_batch)
            d_loss += F.mean(0.001 * yl * yl)
            g_loss = -F.sum(yl) / len(image_batch)
            '''
            mean=F.mean(x,axis=0)
            dev=x-F.broadcast_to(mean, x.shape)
            devdev=dev*dev
            var=F.mean(devdev)
            
            g_loss-= var
            '''

            g.cleargrads()
            g_loss.backward()
            g_opt.update()

            d.cleargrads()
            d_loss.backward()
            d_opt.update()
            print(
                "epoch %d, batch: %d, g_loss: %f, d_loss: %f, alpha: %f, depth: %d"
                % (epoch, index, g_loss.data, d_loss.data, alpha, depth))

        serializers.save_npz('generator.model', g)
        serializers.save_npz('discriminator.model', d)
Beispiel #17
0
 def loss_func_adv_gen(self, y_fake):
     return F.mean(y_fake)
Beispiel #18
0
 def loss_func_adv_dis_real(self, y_real):
     return F.mean(y_real)
Beispiel #19
0
 def loss_func_adv_dis_fake(self, y_fake):
     return F.mean(y_fake)
Beispiel #20
0
        fake_2 = F.average_pooling_2d(fake, 3, 2, 1)
        fake_4 = F.average_pooling_2d(fake_2, 3, 2, 1)

        dis_fake, _ = discriminator(F.concat([fake, line]))
        dis2_fake, _ = discriminator_2(F.concat([fake_2, line_2]))
        dis4_fake, _ = discriminator_4(F.concat([fake_4, line_4]))
        dis_color, _ = discriminator(F.concat([color, line]))
        dis2_color, _ = discriminator_2(F.concat([color_2, line_2]))
        dis4_color, _ = discriminator_4(F.concat([color_4, line_4]))

        fake.unchain_backward()
        fake_2.unchain_backward()
        fake_4.unchain_backward()

        adver_loss = F.mean(F.softplus(-dis_color)) + F.mean(
            F.softplus(dis_fake))
        adver_loss += F.mean(F.softplus(-dis2_color)) + F.mean(
            F.softplus(dis2_fake))
        adver_loss += F.mean(F.softplus(-dis4_color)) + F.mean(
            F.softplus(dis4_fake))

        discriminator.cleargrads()
        discriminator_2.cleargrads()
        discriminator_4.cleargrads()
        discriminator.to_gpu()
        adver_loss.backward()
        dis_opt.update()
        dis2_opt.update()
        dis4_opt.update()
        adver_loss.unchain_backward()
Beispiel #21
0
 def normalize(self, z):
     return z / sqrt(mean(z**2, axis=1, keepdims=True) + 1e-8)
Beispiel #22
0
 def make_input_x(self, x, mask, xp):
     x_fill = F.mean(x, axis=(2, 3))[:, :, xp.newaxis, xp.newaxis]
     x_shape = x.shape
     return x * F.broadcast_to((1 - mask), x_shape) + F.broadcast_to(
         x_fill, x_shape) * F.broadcast_to(mask, x_shape)
Beispiel #23
0
def batch_pit_n_speaker_loss(ys, ts, n_speakers_list):
    """
    PIT loss over mini-batch.
    Args:
      ys: B-length list of predictions (pre-activations)
      ts: B-length list of labels
      n_speakers_list: list of n_speakers in batch
    Returns:
      loss: (1,)-shape mean cross entropy over mini-batch
      labels: B-length list of permuted labels
    """
    max_n_speakers = ts[0].shape[1]
    xp = chainer.backend.get_array_module(ys[0])
    # (B, T, C)
    ys = F.pad_sequence(ys, padding=-1)

    losses = []
    for shift in range(max_n_speakers):
        # rolled along with speaker-axis
        ts_roll = [xp.roll(t, -shift, axis=1) for t in ts]
        ts_roll = F.pad_sequence(ts_roll, padding=-1)
        # loss: (B, T, C)
        loss = F.sigmoid_cross_entropy(ys, ts_roll, reduce='no')
        # sum over time: (B, C)
        loss = F.sum(loss, axis=1)
        losses.append(loss)
    # losses: (B, C, C)
    losses = F.stack(losses, axis=2)
    # losses[b, i, j] is a loss between
    # `i`-th speaker in y and `(i+j)%C`-th speaker in t

    perms = xp.array(
        list(permutations(range(max_n_speakers))),
        dtype='i',
    )
    # y_ind: [0,1,2,3]
    y_ind = xp.arange(max_n_speakers, dtype='i')
    #  perms  -> relation to t_inds      -> t_inds
    # 0,1,2,3 -> 0+j=0,1+j=1,2+j=2,3+j=3 -> 0,0,0,0
    # 0,1,3,2 -> 0+j=0,1+j=1,2+j=3,3+j=2 -> 0,0,1,3
    t_inds = xp.mod(perms - y_ind, max_n_speakers)

    losses_perm = []
    for t_ind in t_inds:
        losses_perm.append(F.mean(losses[:, y_ind, t_ind], axis=1))
    # losses_perm: (B, Perm)
    losses_perm = F.stack(losses_perm, axis=1)

    # masks: (B, Perms)
    def select_perm_indices(num, max_num):
        perms = list(permutations(range(max_num)))
        sub_perms = list(permutations(range(num)))
        return [[x[:num] for x in perms].index(perm) for perm in sub_perms]

    masks = xp.full_like(losses_perm.array, xp.inf)
    for i, t in enumerate(ts):
        n_speakers = n_speakers_list[i]
        indices = select_perm_indices(n_speakers, max_n_speakers)
        masks[i, indices] = 0
    losses_perm += masks

    min_loss = F.sum(F.min(losses_perm, axis=1))
    n_frames = np.sum([t.shape[0] for t in ts])
    min_loss = min_loss / n_frames

    min_indices = xp.argmin(losses_perm.array, axis=1)
    labels_perm = [t[:, perms[idx]] for t, idx in zip(ts, min_indices)]
    labels_perm = [
        t[:, :n_speakers]
        for t, n_speakers in zip(labels_perm, n_speakers_list)
    ]

    return min_loss, labels_perm
Beispiel #24
0
 def bs_reg(self):
     bs_re = F.mean(F.square(self.linear.W))
     return bs_re
    def get_loss(self, batch_data):
        config = self.config
        batch_pos = batch_data / 127.5 - 1
        bbox = random_bbox(config)
        mask = bbox2mask(bbox, batch_data.shape[0], config, self.xp)
        batch_incomplete = batch_pos * (1 - mask)
        x1, x2, offset_flow = self.inpaintnet(batch_incomplete, mask, config)
        if config.PRETRAIN_COARSE_NETWORK:
            batch_predicted = x1
        else:
            batch_predicted = x2
        losses = {}
        # apply mask and complete image
        batch_complete = batch_predicted * mask + batch_incomplete * (1 - mask)
        # local patches
        local_patch_batch_pos = local_patch(batch_pos, bbox)
        local_patch_x1 = local_patch(x1, bbox)
        local_patch_x2 = local_patch(x2, bbox)
        local_patch_batch_complete = local_patch(batch_complete, bbox)
        local_patch_mask = local_patch(mask, bbox)
        l1_alpha = config.COARSE_L1_ALPHA
        losses["l1_loss"] = l1_alpha * F.mean(
            F.absolute(local_patch_batch_pos - local_patch_x1) *
            spatial_discounting_mask(config, self.xp))
        if not config.PRETRAIN_COARSE_NETWORK:
            losses['l1_loss'] += F.mean(
                F.absolute(local_patch_batch_pos - local_patch_x2) *
                spatial_discounting_mask(config, self.xp))
        losses['ae_loss'] = l1_alpha * F.mean(
            F.absolute(batch_pos - x1) * (1. - mask))
        if not config.PRETRAIN_COARSE_NETWORK:
            losses['ae_loss'] += F.mean(
                F.absolute(batch_pos - x2) * (1. - mask))
        losses['ae_loss'] /= F.mean(1. - mask)

        # gan
        batch_pos_neg = F.concat([batch_pos, batch_complete], axis=0)
        # local deterministic patch
        local_patch_batch_pos_neg = F.concat(
            [local_patch_batch_pos, local_patch_batch_complete], 0)
        if config.GAN_WITH_MASK:
            batch_pos_neg = F.concat([batch_pos_neg, mask], axis=1)
        # wgan with gradient penalty
        if config.GAN == 'wgan_gp':
            # seperate gan
            pos_neg_local, pos_neg_global = self.discriminator(
                local_patch_batch_pos_neg, batch_pos_neg)
            pos_local, neg_local = F.split_axis(pos_neg_local, 2, axis=0)
            pos_global, neg_global = F.split_axis(pos_neg_global, 2, axis=0)
            # wgan loss
            g_loss_local, d_loss_local = gan_wgan_loss(pos_local, neg_local)
            g_loss_global, d_loss_global = gan_wgan_loss(
                pos_global, neg_global)
            losses[
                'g_loss'] = config.GLOBAL_WGAN_LOSS_ALPHA * g_loss_global + g_loss_local
            losses['d_loss'] = d_loss_global + d_loss_local
            # gp
            interpolates_local = random_interpolates(
                local_patch_batch_pos, local_patch_batch_complete)
            interpolates_global = random_interpolates(batch_pos,
                                                      batch_complete)
            dout_local, dout_global = self.discriminator(
                interpolates_local, interpolates_global)
            # apply penalty
            penalty_local = gradients_penalty(interpolates_local,
                                              dout_local,
                                              mask=local_patch_mask)
            penalty_global = gradients_penalty(interpolates_global,
                                               dout_global,
                                               mask=mask)
            losses['gp_loss'] = config.WGAN_GP_LAMBDA * (penalty_local +
                                                         penalty_global)
            losses['d_loss'] = losses['d_loss'] + losses['gp_loss']

        if config.PRETRAIN_COARSE_NETWORK:
            losses['g_loss'] = 0
        else:
            losses['g_loss'] = config.GAN_LOSS_ALPHA * losses['g_loss']
        losses['g_loss'] += config.L1_LOSS_ALPHA * losses['l1_loss']
        if config.AE_LOSS:
            losses['g_loss'] += config.AE_LOSS_ALPHA * losses['ae_loss']
        return losses
Beispiel #26
0
def loss_hinge_gen(dis_fake):
    loss = -F.mean(dis_fake)

    return loss
def gan_sngan_loss(pos, neg, d_loss_only=False):
    # SN-PatchGAN loss with hinge loss
    d_loss = F.mean(F.relu(1 - pos) + F.relu(1 + neg))
    g_loss = None if d_loss_only else -F.mean(neg)
    return g_loss, d_loss
    def calculate_logistic_loss(self, y, t):
        xp = chainer.cuda.get_array_module(t)
        if xp != numpy:
            xp.cuda.Device(t.device).use()
        nr_mix = y.shape[1] // 3

        logit_probs = y[:, :nr_mix]
        means = y[:, nr_mix:2 * nr_mix]
        log_scales = y[:, 2 * nr_mix:3 * nr_mix]
        log_scales = F.maximum(
            log_scales, self.scalar_to_tensor(log_scales, self.log_scale_min))

        t = F.broadcast_to(127.5 * t, means.shape)

        centered_t = t - means
        inv_std = F.exp(-log_scales)
        plus_in = inv_std * (centered_t + 127.5 / (self.quantize - 1))
        cdf_plus = F.sigmoid(plus_in)
        min_in = inv_std * (centered_t - 127.5 / (self.quantize - 1))
        cdf_min = F.sigmoid(min_in)

        log_cdf_plus = plus_in - F.softplus(plus_in)
        log_one_minus_cdf_min = -F.softplus(min_in)

        cdf_delta = cdf_plus - cdf_min

        # mid_in = inv_std * centered_t
        # log_pdf_mid = mid_in - log_scales - 2 * F.softplus(mid_in)

        log_probs = F.where(
            # condition
            t.array < self.scalar_to_tensor(t, 127.5 * -0.999),

            # true
            log_cdf_plus,

            # false
            F.where(
                # condition
                t.array > self.scalar_to_tensor(t, 127.5 * 0.999),

                # true
                log_one_minus_cdf_min,

                # false
                F.log(
                    F.maximum(cdf_delta,
                              self.scalar_to_tensor(cdf_delta, 1e-12)))
                # F.where(
                #     # condition
                #     cdf_delta.array > self.scalar_to_tensor(cdf_delta, 1e-5),

                #     # true
                #     F.log(F.maximum(
                #         cdf_delta, self.scalar_to_tensor(cdf_delta, 1e-12))),

                #     # false
                #     log_pdf_mid - self.xp.log((self.quantize - 1) / 2))
            ))

        log_probs = log_probs + F.log_softmax(logit_probs)
        loss = -F.mean(F.logsumexp(log_probs, axis=1))
        return loss
Beispiel #29
0
def square_loss(ys, ts):
    # return F.mean(F.sqrt((ys - ts) ** 2 + 1e-5), axis=(0, 2))
    return F.mean((ys - ts) ** 2 + 1e-5, axis=(0, 2))
Beispiel #30
0
def mean_clipped_loss(y, t):
    return F.mean(F.huber_loss(y, t, delta=1.0, reduce='no'))
Beispiel #31
0
def mean_clipped_loss(y, t):
    return F.mean(F.huber_loss(y, t, delta=1.0, reduce='no'))
Beispiel #32
0
	def __call__(self, x, ys, yb):
		s = broadcast_to(ys.reshape(ys.shape + (1, 1)), ys.shape + x.shape[2:])
		b = broadcast_to(yb.reshape(yb.shape + (1, 1)), yb.shape + x.shape[2:])
		e = x - broadcast_to(mean(x, axis=1, keepdims=True), x.shape)
		sd = broadcast_to(sqrt(mean(e ** 2, axis=1, keepdims=True) + 1e-8), x.shape)
		return s * e / sd + b
Beispiel #33
0
def feature_vector_normalization(x, eps=1e-8):
    # x: (B, C, H, W)
    alpha = 1.0 / F.sqrt(F.mean(x * x, axis=1, keepdims=True) + eps)
    return F.broadcast_to(alpha, x.data.shape) * x
Beispiel #34
0
            inp = prepare_dataset(inp)
            input_box.append(inp)
            img = dir_path + "/" + str(index) + ".png"
            img = prepare_dataset(img)
            frame_box.append(img)

        x = chainer.as_variable(xp.array(input_box).astype(xp.float32))
        t = chainer.as_variable(xp.array(frame_box).astype(xp.float32))
        embed = feature_extractor(t) - feature_extractor(x)
        c = feature_embed(embed)

        z = F.concat([x, c], axis=1)
        y = predictor(z)
        y_dis = discriminator_content(y)
        t_dis = discriminator_content(t)
        dis_loss = F.mean(F.softplus(-t_dis)) + F.mean(F.softplus(y_dis))

        c_g = feature_extractor(y) - feature_extractor(make_diff(y))
        c_dis = discriminator_sequence(embed)
        c_g_dis = discriminator_sequence(c_g)
        dis_loss += F.mean(F.softplus(-c_dis)) + F.mean(F.softplus(c_g_dis))

        c_g.unchain_backward()

        discriminator_content.cleargrads()
        discriminator_sequence.cleargrads()
        dis_loss.backward()
        dis_c_opt.update()
        dis_s_opt.update()
        dis_loss.unchain_backward()
def loss_softmax_cross_entropy(predict, ground_truth):
    eps = 1e-16
    cross_entropy = -F.mean(F.log(predict + eps) * ground_truth)
    return cross_entropy
    def update_core(self):
        gen_optimizer = self.get_optimizer('opt_gen')
        dis_optimizer = self.get_optimizer('opt_dis')
        xp = self.gen.xp

        for i in range(self.n_dis):
            batch = self.get_iterator('main').next()
            batchsize = len(batch)
            x = []
            for j in range(batchsize):
                x.append(np.asarray(batch[j]).astype("f"))
            x_real = Variable(xp.asarray(x))
            h_real = self.dis(x_real)

            z = Variable(xp.asarray(self.gen.make_hidden(batchsize)))
            x_fake1 = self.gen(z)
            h_fake1 = self.dis(x_fake1)

            z2 = Variable(xp.asarray(self.gen.make_hidden(batchsize)))
            x_fake2 = self.gen(z2)
            h_fake2 = self.dis(x_fake2)

            def l2_distance(a, b):
                return F.sqrt(F.sum((a - b) ** 2, axis=1, keepdims=True))

            def backward_l2_distance(g, a, b):
                out = F.broadcast_to(l2_distance(a, b), a.data.shape)
                g = F.broadcast_to(g, a.data.shape)
                return g * (a - b) / out, g * (b - a) / out

            def energy_distance(r, f1, f2):
                ret = l2_distance(r, f1)
                ret += l2_distance(r, f2)
                ret -= l2_distance(f1, f2)
                return F.mean(ret)

            def critic(a, b):
                return l2_distance(a, b) - l2_distance(a, xp.zeros_like(a.data))

            def backward_critic(g, a, b):
                ga0, gb0 = backward_l2_distance(g, a, b)
                ga1, gb1 = backward_l2_distance(g, a, xp.zeros_like(a.data))
                return ga0 - ga1, gb0 - gb1

            critic_real = critic(h_real, h_fake2)
            critic_fake = critic(h_fake1, h_fake2)

            loss_surrogate = F.mean(critic_real - critic_fake)

            if i == 0:
                loss_gen = energy_distance(h_real, h_fake1, h_fake2)
                self.gen.cleargrads()
                loss_gen.backward()
                gen_optimizer.update()
                chainer.reporter.report({'loss_gen': loss_gen})
            x_fake1.unchain_backward()
            x_fake2.unchain_backward()

            eps = xp.random.uniform(0, 1, size=batchsize).astype("f")[:, None, None, None]
            x_mid = eps * x_real + (1.0 - eps) * x_fake1
            h_mid = Variable(self.dis(x_mid).data)
            critic_mid = critic(h_mid, h_fake2.data)

            # calc gradient penalty
            g = Variable(xp.ones_like(critic_mid.data))
            dydh, _ = backward_critic(g, h_mid, h_fake2.data)
            dydx = self.dis.differentiable_backward(dydh)
            dydx_norm = F.sqrt(F.sum(dydx ** 2, axis=(1, 2, 3)))
            loss_gp = self.lam * F.mean_squared_error(dydx_norm, xp.ones_like(dydx_norm.data))

            self.dis.cleargrads()
            (-loss_surrogate).backward()
            loss_gp.backward()
            dis_optimizer.update()

            chainer.reporter.report({'loss_dis': loss_surrogate})
            chainer.reporter.report({'loss_gp': loss_gp})
            chainer.reporter.report({'g': F.mean(dydx_norm)})
Beispiel #37
0
def run_n_games(optimizer, learner, opponent, num_games):
    states.default_start_position()

    # Create one list of features (aka state tensors) and one of moves for each game being played.
    features1_tensors = [[] for _ in range(num_games)]
    features2_tensors = [[] for _ in range(num_games)]
    labels_tensors = [[] for _ in range(num_games)]
    values_tensors = [[] for _ in range(num_games)]

    # List of booleans indicating whether the 'learner' player won.
    learner_won = [None] * num_games

    # Start all odd games with moves by 'opponent'. Even games will have 'learner' black.
    learner_color = [BLACK if i % 2 == 0 else WHITE for i in range(num_games)]
    odd_features1 = np.empty((num_games, 2 * 14, 9, 9), dtype=np.float32)
    odd_features2 = np.empty((num_games, 2 * MAX_PIECES_IN_HAND_SUM + 1, 9, 9),
                             dtype=np.float32)
    states.make_odd_input_features(odd_features1, odd_features2)
    x1 = Variable(cuda.to_gpu(odd_features1))
    x2 = Variable(cuda.to_gpu(odd_features2))
    with chainer.no_backprop_mode():
        with chainer.using_config('train', False):
            y = opponent(x1, x2)
    y_data = cuda.to_cpu(y.data)
    states.do_odd_moves(y_data)

    current = learner
    other = opponent
    unfinished_states_num = num_games
    move_number_sum = 0
    while unfinished_states_num > 0:
        move_number_sum += unfinished_states_num

        # Get next moves by current player for all unfinished states.
        features1 = np.empty((unfinished_states_num, FEATURES1_NUM, 9, 9),
                             dtype=np.float32)
        features2 = np.empty((unfinished_states_num, FEATURES2_NUM, 9, 9),
                             dtype=np.float32)
        unfinished_list = states.make_unfinished_input_features(
            features1, features2)
        x1 = Variable(cuda.to_gpu(features1))
        x2 = Variable(cuda.to_gpu(features2))
        with chainer.no_backprop_mode():
            with chainer.using_config('train', False):
                y = current(x1, x2)
        y_data = cuda.to_cpu(y.data)

        labels = np.empty((unfinished_states_num), dtype=np.int32)
        values = np.empty((unfinished_states_num), dtype=np.float32)
        unfinished_states_num = states.do_unfinished_moves_and_eval(
            current is learner, y_data, labels, values)

        # 特徴を保存
        if current is learner:
            for i, idx in enumerate(unfinished_list):
                features1_tensors[idx].append(features1[i])
                features2_tensors[idx].append(features2[i])
                labels_tensors[idx].append(labels[i])
                values_tensors[idx].append(values[i])

        # Swap 'current' and 'other' for next turn.
        current, other = other, current

    learner_won = np.empty(num_games, dtype=np.int32)
    states.get_learner_wons(learner_won)

    # Train on all game's results
    features1_tensor_all = []
    features2_tensor_all = []
    labels_tensor_all = []
    rewards_tensor_all = []
    for features1_tensor, features2_tensor, labels_tensor, values_tensor, won in zip(
            features1_tensors, features2_tensors, labels_tensors,
            values_tensors, learner_won.astype(np.float32)):
        features1_tensor_all.extend(features1_tensor)
        features2_tensor_all.extend(features2_tensor)
        labels_tensor_all.extend(labels_tensor)
        rewards_tensor_all.extend(
            list(won - np.array(values_tensor, dtype=np.float32)))

    x1 = Variable(cuda.to_gpu(np.array(features1_tensor_all,
                                       dtype=np.float32)))
    x2 = Variable(cuda.to_gpu(np.array(features2_tensor_all,
                                       dtype=np.float32)))
    t = Variable(cuda.to_gpu(np.array(labels_tensor_all, dtype=np.int32)))
    z = Variable(cuda.to_gpu(np.array(rewards_tensor_all, dtype=np.float32)))

    y = learner(x1, x2)

    learner.cleargrads()
    loss = F.mean(F.softmax_cross_entropy(y, t, reduce='no') * z)
    loss.backward()

    optimizer.update()

    # Return the win ratio.
    return np.average(
        learner_won), float(move_number_sum) / num_games, loss.data
Beispiel #38
0
    def __call__(self, h, adj):
        xp = self.xp
        # (minibatch, atom, channel)
        mb, atom, ch = h.shape
        # (minibatch, atom, EDGE_TYPE * heads * out_dim)
        h = self.message_layer(h)
        # (minibatch, atom, EDGE_TYPE, heads, out_dim)
        h = functions.reshape(h, (mb, atom, self.n_edge_types, self.n_heads,
                                  self.out_channels))
        # concat all pairs of atom
        # (minibatch, 1, atom, heads, out_dim)
        h_i = functions.reshape(h, (mb, 1, atom, self.n_edge_types,
                                    self.n_heads, self.out_channels))
        # (minibatch, atom, atom, heads, out_dim)
        h_i = functions.broadcast_to(h_i, (mb, atom, atom, self.n_edge_types,
                                           self.n_heads, self.out_channels))

        # (minibatch, atom, 1, EDGE_TYPE, heads, out_dim)
        h_j = functions.reshape(h, (mb, atom, 1, self.n_edge_types,
                                    self.n_heads, self.out_channels))
        # (minibatch, atom, atom, EDGE_TYPE, heads, out_dim)
        h_j = functions.broadcast_to(h_j, (mb, atom, atom, self.n_edge_types,
                                           self.n_heads, self.out_channels))

        # (minibatch, atom, atom, EDGE_TYPE, heads, out_dim * 2)
        e = functions.concat([h_i, h_j], axis=5)

        # (minibatch, EDGE_TYPE, heads, atom, atom, out_dim * 2)
        e = functions.transpose(e, (0, 3, 4, 1, 2, 5))
        # (minibatch * EDGE_TYPE * heads, atom * atom, out_dim * 2)
        e = functions.reshape(e, (mb * self.n_edge_types * self.n_heads,
                                  atom * atom, self.out_channels * 2))
        # (minibatch * EDGE_TYPE * heads, atom * atom, 1)
        e = self.attention_layer(e)

        # (minibatch, EDGE_TYPE, heads, atom, atom)
        e = functions.reshape(e, (mb, self.n_edge_types, self.n_heads, atom,
                                  atom))
        e = functions.leaky_relu(e, self.negative_slope)

        # (minibatch, EDGE_TYPE, atom, atom)
        if isinstance(adj, chainer.Variable):
            cond = adj.array.astype(xp.bool)
        else:
            cond = adj.astype(xp.bool)
        # (minibatch, EDGE_TYPE, 1, atom, atom)
        cond = xp.reshape(cond, (mb, self.n_edge_types, 1, atom, atom))
        # (minibatch, EDGE_TYPE, heads, atom, atom)
        cond = xp.broadcast_to(cond, e.array.shape)
        # TODO(mottodora): find better way to ignore non connected
        e = functions.where(cond, e,
                            xp.broadcast_to(xp.array(-10000), e.array.shape)
                            .astype(xp.float32))
        # In Relational Graph Attention Networks eq.(7)
        # ARGAT: take the softmax over the logits across node neighborhoods
        # irrespective of relation
        if self.softmax_mode == 'across':
            # (minibatch, heads, atom, EDGE_TYPE, atom)
            e = functions.transpose(e, (0, 2, 3, 1, 4))
            # (minibatch, heads, atom, EDGE_TYPE * atom)
            e = functions.reshape(e, (mb, self.n_heads, atom,
                                      self.n_edge_types * atom))
            # (minibatch, heads, atom, EDGE_TYPE * atom)
            alpha = functions.softmax(e, axis=3)
            if self.dropout_ratio >= 0:
                alpha = functions.dropout(alpha, ratio=self.dropout_ratio)
            # (minibatch, heads, atom, EDGE_TYPE, atom)
            alpha = functions.reshape(alpha, (mb, self.n_heads, atom,
                                              self.n_edge_types, atom))
            # (minibatch, EDGE_TYPE, heads, atom, atom)
            alpha = functions.transpose(alpha, (0, 3, 1, 2, 4))

        # In Relational Graph Attention Networks eq.(6)
        # WIRGAT: take the softmax over the logits independently for each
        # relation
        elif self.softmax_mode == 'within':
            alpha = functions.softmax(e, axis=4)
            if self.dropout_ratio >= 0:
                alpha = functions.dropout(alpha, ratio=self.dropout_ratio)
        else:
            raise ValueError("{} is invalid. Please use 'across' or 'within'"
                             .format(self.softmax_mode))

        # before: (minibatch, atom, EDGE_TYPE, heads, out_dim)
        # after: (minibatch, EDGE_TYPE, heads, atom, out_dim)
        h = functions.transpose(h, (0, 2, 3, 1, 4))
        # (minibatch, EDGE_TYPE, heads, atom, out_dim)
        h_new = functions.matmul(alpha, h)
        # (minibatch, heads, atom, out_dim)
        h_new = functions.sum(h_new, axis=1)
        if self.concat_heads:
            # (heads, minibatch, atom, out_dim)
            h_new = functions.transpose(h_new, (1, 0, 2, 3))
            # (minibatch, atom, heads * out_dim)
            h_new = functions.concat(h_new, axis=2)
        else:
            # (minibatch, atom, out_dim)
            h_new = functions.mean(h_new, axis=1)
        return h_new
 def energy_distance(r, f1, f2):
     ret = l2_distance(r, f1)
     ret += l2_distance(r, f2)
     ret -= l2_distance(f1, f2)
     return F.mean(ret)
Beispiel #40
0
 def __call__(self, xs):
     y_hat, input_embeds = self.predict(xs)
     loss = 0.5 * F.sum((y_hat - input_embeds) ** 2, axis=1)
     loss = F.mean(loss)
     reporter.report({'loss': loss.data}, self)
     return loss
Beispiel #41
0
 def compute_marginal_entropy(self, p_batch):
     return self.compute_entropy(functions.mean(p_batch, axis=0))
Beispiel #42
0
# train
itr = 0
sum_loss = 0
eval_interval = 1000
for e in range(args.epoch):
    np.random.shuffle(train_data)

    itr_epoch = 0
    sum_loss_epoch = 0
    for i in range(0, len(train_data) - args.batchsize, args.batchsize):
        x1, x2, t, z = mini_batch(train_data[i:i + args.batchsize])
        y = model(x1, x2)

        model.cleargrads()
        loss = F.mean(F.softmax_cross_entropy(y, t, reduce='no') * z)
        loss.backward()
        optimizer.update()

        itr += 1
        sum_loss += loss.data
        itr_epoch += 1
        sum_loss_epoch += loss.data

        # print train loss
        if optimizer.t % eval_interval == 0:
            logging.info('epoch = {}, iteration = {}, loss = {}'.format(
                optimizer.epoch + 1, optimizer.t, sum_loss / itr))
            itr = 0
            sum_loss = 0
Beispiel #43
0
def main():
    parser = argparse.ArgumentParser(description='GradNorm')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--n-iter', '-it', type=int, default=5000)
    parser.add_argument('--mode', '-m', choices=('grad_norm', 'equal_weight'),
                        default='grad_norm')
    args = parser.parse_args()

    np.random.seed(123)
    sigmas = [1, 10]
    n_task = len(sigmas)
    epsilons = np.random.normal(
        scale=3.5, size=(n_task, 100, 250)).astype(np.float32)
    dataset = RegressionDataset(sigmas, epsilons)

    model = RegressionTrainChain(RegressionChain(n_task))

    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = chainer.optimizers.Adam(alpha=1e-2)
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(dataset, 200)

    xp = model.xp
    weights = []
    task_losses = []
    loss_ratios = []
    final_layer_names = ['task_{}'.format(i) for i in range(n_task)]
    for t in range(args.n_iter):
        batch = train_iter.next()
        x, ts = chainer.dataset.convert.concat_examples(batch, device=args.gpu)

        task_loss = model(x, ts)
        weighted_task_loss = model.weight * task_loss
        if t == 0:
            initial_task_loss = task_loss.data
        loss = F.mean(weighted_task_loss)
        model.cleargrads()
        loss.backward()
        # Ignore a gradient to the coefficient vector, which
        # is computed from the standard loss.
        model.weight.cleargrad()
        if args.mode == 'grad_norm':
            # Use |\nabla_W w_i * L_i | = w_i |\nabla_W L_i|
            gygw_norms = []
            for i, layer_name in enumerate(final_layer_names):
                l = getattr(model.model, layer_name)
                gygw = chainer.grad([task_loss[i]], [l.W])[0].data
                gygw_norms.append(xp.linalg.norm(gygw))
            gygw_norms = xp.stack(gygw_norms)
            norms = model.weight * gygw_norms

            alpha = 0.16
            mean_norm = xp.mean(norms.data)
            loss_ratio = task_loss.data / initial_task_loss
            inverse_train_rate = loss_ratio / xp.mean(loss_ratio)

            diff = norms - (inverse_train_rate ** alpha) * mean_norm
            grad_norm_loss = F.mean(F.absolute(diff))
            grad_norm_loss.backward()

            # For debugging purpose only
            # from chainer import computational_graph
            # import os
            # cg = computational_graph.build_computational_graph(
            #     [grad_norm_loss]).dump()
            # with open('grad_weight_loss_cg', 'w') as f:
            #     f.write(cg)

        optimizer.update()

        # Renormalize
        normalize_coeff = n_task / xp.sum(model.weight.data)
        model.weight.data[:] = model.weight.data * normalize_coeff

        # Record
        task_losses.append(chainer.backends.cuda.to_cpu(task_loss.data))
        loss_ratios.append(np.mean(task_losses[-1] / task_losses[0]))
        weights.append(chainer.backends.cuda.to_cpu(model.weight.data))

        if t % 100 == 0:
            print('{}/{}:  loss_ratio={}, weights={} task_loss={}'.format(
                t, args.n_iter, loss_ratios[-1], model.weight.data, task_loss.data))
    task_losses = np.array(task_losses)
    weights = np.array(weights)

    fig = plt.figure()
    ax1 = fig.add_subplot(1, 4, 1)
    ax1.set_title('loss (task 0)')
    ax2 = fig.add_subplot(1, 4, 2)
    ax2.set_title('loss (task 1)')
    ax3 = fig.add_subplot(1, 4, 3)
    ax3.set_title('sum of normalized losses')
    ax4 = fig.add_subplot(1, 4, 4)
    ax4.set_title('change of weights over time')
    ax1.plot(task_losses[:, 0])
    ax2.plot(task_losses[:, 1])
    ax3.plot(loss_ratios)
    ax4.plot(weights[:, 0])
    ax4.plot(weights[:, 1])
    plt.show()