Exemple #1
0
    def __call__(self, images):
        hs = []
        for model in self.models:
            hs.append(model.forward_backbone(images))

        h_segs = []
        for (h, model) in zip(hs, self.models):
            h_segs.append(model.forward_seg(h))
        h_segs = F.stack(h_segs)
        h_seg_avg = F.average(h_segs,
                              axis=0,
                              weights=self.xp.asarray(self.seg_weight))

        if self.ensemble_seg:
            h_segs = [h_seg_avg] * len(self.models)

        h_hors, h_vers = [], []
        for i in range(len(self.models)):
            h_hor, h_ver = self.models[i].forward_edge(hs[i], h_segs[i])
            h_hors.append(h_hor)
            h_vers.append(h_ver)
        h_hors = F.stack(h_hors)
        h_hor_avg = F.average(h_hors,
                              axis=0,
                              weights=self.xp.asarray(self.edge_weight))
        h_vers = F.stack(h_vers)
        h_ver_avg = F.average(h_vers,
                              axis=0,
                              weights=self.xp.asarray(self.edge_weight))

        return h_seg_avg, h_hor_avg, h_ver_avg
def loss_comp_low(x, y, threshold, norm='l2'):
    mask = ((x.array <= threshold) ^ (y.array <= threshold)).astype(
        x.xp.float32)
    if norm == 'l1':
        return (F.average(mask * F.absolute_error(x, y)))
    else:
        return (F.average(mask * F.squared_error(x, y)))
Exemple #3
0
    def __call__(self, x):
        """
            Apply the LayerNormalization on in the input "x"
            Args:
                x (float[][]): input tensor to re-center and re-scale (layer normalize)
            Returns:
                float[][]
        """
        if self.hidden_size is None:
            self._initialize_params(x.shape)

        # Layer Normalization parameters
        mu = F.average(x, axis=1, keepdims=True)
        mu = F.broadcast_to(mu, x.shape)
        sigma = F.sqrt(
            F.average(F.square(x - mu), axis=1, keepdims=True) + self.epsilon)
        sigma = F.broadcast_to(sigma, x.shape)

        # Transformation
        outputs = (x - mu) / sigma
        # Affine transformation
        outputs = (outputs * self.gain) + self.bias
        #outputs = F.scale(outputs, self.gain)
        #outputs = F.bias(outputs, self.bias)

        return outputs
Exemple #4
0
    def shared_middle(self, batch_size, width_rgb, width_flow, rpn_scores_rgb, rpn_locs_rgb, rpn_scores_flow, rpn_locs_flow,
                      anchor_rgb, gt_segments_rgb, labels, seg_info):
        #  rpn_scores_rgb shape = (N, W_rgb * A, 2) rpn_scores_flow shape = (N, W_flow * A, 2)
        n_anchor = anchor_rgb.shape[1]
        rpn_locs_flow = F.transpose(rpn_locs_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2))  # (B, 2, W_flow, A)
        rpn_locs_flow = F.resize_images(rpn_locs_flow, (width_rgb, n_anchor))   # (B, 2, W_rgb, A)
        # B, W_rgb, A, 2 => B, W_rgb * A, 2
        rpn_locs_flow = F.reshape(F.transpose(rpn_locs_flow, axes=(0, 2, 3 ,1)), shape=(batch_size, width_rgb * n_anchor, 2))
        rpn_locs = F.average(F.stack([rpn_locs_rgb, rpn_locs_flow]), axis=0)

        rpn_scores_flow = F.transpose(rpn_scores_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2))
        rpn_scores_flow  = F.resize_images(rpn_scores_flow, (width_rgb, n_anchor)) # (B, 2, W_rgb, A)
        # B, W_rgb, A, 2 => B, W_rgb * A, 2
        rpn_scores_flow = F.reshape(F.transpose(rpn_scores_flow, axes=(0, 2, 3, 1)),
                                  shape=(batch_size, width_rgb * n_anchor, 2))
        rpn_scores = F.average(F.stack([rpn_scores_rgb,rpn_scores_flow]), axis=0)
        #  merge over!

        rois, roi_indices = self.time_seg_train_chain_rgb.nms_process(batch_size, width_rgb,
                                                                      n_anchor, rpn_scores, rpn_locs, anchor_rgb)

        sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label = self.time_seg_train_chain_rgb.proposal_target_creator(
            rois, roi_indices, gt_segments_rgb, labels, seg_info,
            self.time_seg_train_chain_rgb.loc_normalize_mean, self.time_seg_train_chain_rgb.loc_normalize_std)
        return sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label
Exemple #5
0
    def log_prob(self, z, log_det_jacobians):
        ln_var_adj = self.ln_var * self.xp.ones([self.adj_size])
        ln_var_x = self.ln_var * self.xp.ones([self.x_size])
        log_det_jacobians[0] = log_det_jacobians[0] - F.log(
            self.xp.array([self.x_size], dtype=self.xp.float32))
        log_det_jacobians[1] = log_det_jacobians[1] - F.log(
            self.xp.array([self.adj_size], dtype=self.xp.float32))

        negative_log_likelihood_adj = F.average(
            F.sum(F.gaussian_nll(z[1],
                                 self.xp.zeros(self.adj_size,
                                               dtype=self.xp.float32),
                                 ln_var_adj,
                                 reduce="no"),
                  axis=1) - log_det_jacobians[1])
        negative_log_likelihood_x = F.average(
            F.sum(F.gaussian_nll(z[0],
                                 self.xp.zeros(self.x_size,
                                               dtype=self.xp.float32),
                                 ln_var_x,
                                 reduce="no"),
                  axis=1) - log_det_jacobians[0])

        negative_log_likelihood_adj /= self.adj_size
        negative_log_likelihood_x /= self.x_size

        if negative_log_likelihood_x.array < 0:
            log.warning("negative nll for x!")

        return [negative_log_likelihood_x, negative_log_likelihood_adj]
Exemple #6
0
def loss_grad_d(diff):
    xp = cuda.get_array_module(diff.data)
    grad = xp.tile(
        xp.asarray([[[[1, 0, -1], [2, 0, -2], [1, 0, -1]]]], dtype=diff.dtype),
        (diff.data.shape[1], 1, 1))
    dx = F.convolution_2d(diff, grad)
    dy = F.convolution_2d(diff, xp.transpose(grad, (0, 1, 3, 2)))
    return F.average(dx**2) + F.average(dy**2)
Exemple #7
0
    def update_core(self):
        def _update(optimizer, loss):
            optimizer.target.cleargrads()
            loss.backward()
            optimizer.update()

        xp = self.generator.xp
        if self.iteration < 50:
            n_critic = 100
        else:
            n_critic = 5
        # update critic n_critic times
        for _ in range(n_critic):
            # real image
            x_real = self.next_batch(self.x)
            y_real = self.critic(x_real)
            loss1 = -F.average(y_real)

            # fake image
            z = self.next_batch(self.z)
            x_fake = self.generator(z)
            y_fake = self.critic(x_fake)
            loss2 = F.average(y_fake)

            # gp
            # using chainer.grad here
            eps = xp.random.uniform(0, 1,
                                    size=self.batchsize).astype("f")[:, None,
                                                                     None,
                                                                     None]
            x_mid = eps * x_real + (1.0 - eps) * x_fake
            y_mid = self.critic(x_mid)
            grad, = chainer.grad([y_mid], [x_mid], enable_double_backprop=True)
            grad = F.sqrt(F.batch_l2_norm_squared(grad))
            loss_gp = self.lam * F.mean_squared_error(grad,
                                                      xp.ones_like(grad.data))

            # compute loss
            critic_loss = loss1 + loss2 + loss_gp

            # update critic
            _update(self.optimizer_critic, critic_loss)

            chainer.reporter.report({
                'critic/loss/real': loss1,
                'critic/loss/fake': loss2,
                'critic/loss/gp': loss_gp,
                'critic/loss': critic_loss,
                'Wasserstein': -loss1 - loss2,
            })

        # update generator 1 time
        z = self.next_batch(self.z)
        x_fake = self.generator(z)
        y_fake = self.critic(x_fake)
        gen_loss = -F.average(y_fake)
        _update(self.optimizer_generator, gen_loss)
        chainer.report({'generator/loss': gen_loss})
Exemple #8
0
    def __call__(self, hs, rs, ts, ys):
        """Calculate the loss between outputs and ys.

        Args:
            hs: The heads of facts.
            rs: The relations of facts.
            ts: The tails of facts.
            ys: The labels which indicate whether the facts are correct.

        Returns:
            loss: The cross-entropy loss for outputs and ys.

        """
        batch_size, max_length_h = hs.shape
        _, max_length_t = ts.shape

        hhs = self.concept_encoder(hs)
        hts = self.concept_encoder(ts)
        hrs = self.relation_encoder(rs)

        # embedding vectors which corresponds to PAD should be zeros
        hhs = hhs * (hs != PAD)[:, :, None]
        hts = hts * (ts != PAD)[:, :, None]

        # calculate average over embeddings
        hhs = F.average(hhs, axis=1)
        hts = F.average(hts, axis=1)

        # transform concept representations
        l_hhs = F.tanh(
            F.dropout(self.l_concept(hhs), ratio=self.n_dropout)
        )
        l_hts = F.tanh(
            F.dropout(self.l_concept(hts), ratio=self.n_dropout)
        )

        # reshape hrs
        hrs = F.reshape(
            hrs,
            (batch_size, self.n_relation_units, self.n_relation_units)
        )

        # calculate bilinear outputs
        outputs = F.flatten(
            F.batch_matmul(
                F.batch_matmul(
                    l_hhs,
                    hrs,
                    transa=True
                ),
                l_hts
            )
        )

        loss = F.sigmoid_cross_entropy(outputs, ys)
        chainer.report({'loss': loss.data}, self)
        return loss
def total_variation2(x):
    xp = cuda.get_array_module(x.data)
    wh = xp.asarray([[[[1], [-1]]]], dtype=x.dtype)
    ww = xp.asarray([[[[1, -1]]]], dtype=x.dtype)
    dx = F.convolution_2d(x, W=wh)
    dy = F.convolution_2d(x, W=ww)
    #    dx = x[:, 1:, :, :] - x[:, :-1, :, :]
    #    dy = x[:, :, 1:, :] - x[:, :, :-1, :]
    return F.average(F.absolute(dx)) + F.average(F.absolute(dy))
Exemple #10
0
    def update_core(self):
        gen_opt = self.get_optimizer("gen")
        cri_opt = self.get_optimizer("cri")
        generator = gen_opt.target
        critic = cri_opt.target
        batch_size = self.get_iterator("main").batch_size

        # バッチ(本物)を取得
        x_real = self.get_iterator("main").next()
        x_real = Variable(np.stack(x_real))
        if chainer.config.user_gpu >= 0:
            x_real.to_gpu()

        xp = x_real.xp

        # update critic
        upd_num = self.n_cri[
            1] if self.iteration <= 25 or self.iteration % 500 == 0 else self.n_cri[
                0]
        for i in range(upd_num):
            z = xp.random.uniform(size=(batch_size, Z_DIM)).astype(np.float32)
            x_fake = generator(Variable(z))

            cri_loss = F.average(critic(x_fake) -
                                 critic(x_real))  # Wasserstein距離の逆符号

            # gradient penalty
            eps = xp.random.uniform(size=(batch_size, 1, 1,
                                          1)).astype(np.float32)
            x_fusion = eps * x_real + (1 - eps) * x_fake  # (N,1,H,W)
            g_critic = chainer.grad(
                [critic(x_fusion)], [x_fusion],
                enable_double_backprop=True)[0]  # (N,1,H,W)
            gp = F.batch_l2_norm_squared(g_critic)
            gp = F.average((F.sqrt(gp) - 1)**2)
            total_loss = cri_loss + self.gp_lam * gp

            critic.cleargrads()
            total_loss.backward()
            cri_opt.update()

        # update generator
        z = xp.random.uniform(size=(batch_size, Z_DIM)).astype(np.float32)
        x_fake = generator(Variable(z))
        gen_loss = -F.average(critic(x_fake))

        generator.cleargrads()
        critic.cleargrads()
        gen_loss.backward()
        gen_opt.update()

        chainer.report({
            "generator/loss": gen_loss,
            "critic/loss": cri_loss,
            "main/wdist": -cri_loss
        })
    def __call__(self, s, q, s_mask, q_mask):
        """
        s_bar, _, _ = self.pred_bilstm(None, None, s)

        s_bar_new = F.concat(s_bar, axis=1)

        q_bar, _, _ = self.pred_bilstm(None, None, q)

        q_bar_new = F.concat(q_bar, axis=1)
        """

        _, _, s_bar = self.pred_bilstm(None, None, s)  # get list of [seq, dim]
        s_bar_new = F.stack(s_bar, axis=0)  # turn list to 3d tensor

        _, _, q_bar = self.pred_bilstm(None, None, q)  # get list of [seq, dim]
        q_bar_new = F.stack(q_bar, axis=0)  # turn list to 3d tensor
        # mean-max pooling

        s_sum = F.sum(s_mask, axis=-1)
        q_sum = F.sum(q_mask, axis=-1)

        s_batch, s_seq = s_mask.shape
        s_mask_broad = F.broadcast_to(F.reshape(s_mask, (s_batch, s_seq, 1)),
                                      (s_batch, s_seq, s_bar_new.shape[-1]))
        s_broad = s_bar_new * s_mask_broad
        """
        s_infinit_matrix = self.xp.ones((s_batch, s_seq, s_bar_new.shape[-1]), dtype=self.xp.float32) * -1 * self.xp.inf
        s_cond = s_mask_broad.data.astype(self.xp.bool)

        s_broad_max = F.where(s_cond, s_bar_new, s_infinit_matrix)
        """
        s_mean = F.average(s_broad, axis=1)  # [batch_size, dim]
        s_max = F.maxout(
            F.reshape(
                s_bar_new,
                (s_bar_new.shape[0], s_bar_new.shape[1] * s_bar_new.shape[2])),
            s_bar_new.shape[-1])  # [batch_size, dim]

        q_batch, q_seq = q_mask.shape
        q_broad = q_bar_new * F.broadcast_to(
            F.reshape(q_mask, (q_batch, q_seq, 1)),
            (q_batch, q_seq, q_bar_new.shape[-1]))
        q_mean = F.average(q_broad, axis=1)  # [batch_size, dim]
        q_max = F.maxout(
            F.reshape(
                q_bar_new,
                (q_bar_new.shape[0], q_bar_new.shape[1] * q_bar_new.shape[2])),
            q_bar_new.shape[-1])  # [batch_size, dim]

        summarized_vector = F.concat([s_mean, s_max, q_mean, q_max], axis=1)

        s_linear_output = self.gelu(self.L(summarized_vector))

        y = F.softmax(s_linear_output)

        return y
def loss_grad_d(diff):
    xp = cuda.get_array_module(diff.data)
    grad = xp.tile(
        xp.asarray([[[[1, 0, -1], [2, 0, -2], [1, 0, -1]]]], dtype=diff.dtype),
        (diff.data.shape[1], 1, 1))
    dx = F.convolution_2d(diff, grad)
    dy = F.convolution_2d(diff, xp.transpose(grad, (0, 1, 3, 2)))
    #        target = self.xp.zeros_like(dx.data)
    #        return 0.5*(F.mean_squared_error(dx,target)+F.mean_squared_error(dy,target))
    return F.average(dx**2) + F.average(dy**2)
    def update_core(self):
        # train critic
        for t in range(self.n_c):
            # read data
            batch = self._iterators['main'].next()
            x = self.converter(batch, self.device)
            m = x.shape[0]
            H, W = x.shape[2], x.shape[3]
            xp = chainer.cuda.get_array_module(x)

            # generate
            z = self.generator.make_z(m)
            x_tilde = self.generator(z)

            # sampling along straight lines
            e = xp.random.uniform(0., 1., (m, 1, 1, 1))
            x_hat = e * x + (1 - e) * x_tilde

            # compute loss
            loss_gan = F.average(self.critic(x_tilde) - self.critic(x))
            grad, = chainer.grad([self.critic(x_hat)], [x_hat],
                                 enable_double_backprop=True)
            grad = F.sqrt(F.batch_l2_norm_squared(grad))

            loss_grad = self.l * F.mean_squared_error(grad,
                                                      xp.ones_like(grad.data))
            loss_critic = loss_gan + loss_grad

            # update critic
            self.critic.cleargrads()
            loss_critic.backward()
            self._optimizers['critic'].update()

            # report
            chainer.reporter.report({
                'wasserstein distance': -loss_gan,
                'loss/grad': loss_grad
            })

        # train generator
        # read data
        batch = self._iterators['main'].next()
        x = self.converter(batch, self.device)

        # generate and compute loss
        z = self.generator.make_z(m)
        loss_generator = F.average(-self.critic(self.generator(z)))

        # update generator
        self.generator.cleargrads()
        loss_generator.backward()
        self._optimizers['generator'].update()

        # report
        chainer.reporter.report({'loss/generator': loss_generator})
Exemple #14
0
    def __call__(self, x, c):
        mu = F.average(x, axis=0).reshape(1, x.shape[1], x.shape[2], x.shape[3])
        sigma = F.average((x-F.tile(mu, (x.shape[0], 1, 1, 1)))**2, axis=0)
        x_hat = (x-F.tile(mu, (x.shape[0], 1, 1, 1)))/F.sqrt(F.tile(sigma+self.eps, (x.shape[0], 1, 1, 1)))

        h = F.relu(self.c0(c))
        w = self.cw(h)
        b = self.cb(h)
        #ones = chainer.as_variable(xp.ones_like(w, dtype=xp.float32))
        h = w * x_hat + b

        return h
Exemple #15
0
def power_loss(x, t, frame_length=1024, hop_length=512, time_axis_mean=False):
    # ..., FFT axis
    Xr, Xi = stft(x, frame_length, hop_length)
    Xa = Xr**2 + Xi**2
    Tr, Ti = stft(t, frame_length, hop_length)
    Ta = Tr**2 + Ti**2

    if time_axis_mean:
        Xa = F.average(Xa, -1)
        Ta = F.average(Ta, -1)

    return F.mean_squared_error(Xa, Ta)
Exemple #16
0
    def _loss(self, fake_batch_obs, fake_batch_action,
              true_batch_obs, true_batch_action):
        if self.obs_normalizer is not None:
            normalized_obs = self.obs_normalizer(fake_batch_obs, update=False)
            infer_fake = self.model(normalized_obs, fake_batch_action)
        else:
            infer_fake = self.model(fake_batch_obs, fake_batch_action)
        if self.noisy_label:
            n = fake_batch_obs.shape[0]
            fake_loss = -F.average(
                F.log(F.absolute(1 - (self.xp.random.rand(n)
                                      * self.noisy_label_range)
                                 - F.sigmoid(infer_fake))
                      + self.discriminator_value_offset))
        else:
            fake_loss = -F.average(F.log(1
                                         - F.sigmoid(infer_fake)
                                         + self.discriminator_value_offset))

        if self.obs_normalizer is not None:
            normalized_obs = self.obs_normalizer(true_batch_obs, update=True)
            infer_true = self.model(normalized_obs, true_batch_action)
        else:
            infer_true = self.model(true_batch_obs, true_batch_action)
        if self.noisy_label:
            n = true_batch_obs.shape[0]
            true_loss = -F.average(
                F.log(F.absolute(1 - (self.xp.random.rand(n)
                                      * self.noisy_label_range)
                                 - F.sigmoid(infer_true))
                      + self.discriminator_value_offset))
        else:
            true_loss = -F.average(F.log(F.sigmoid(infer_true)
                                         + self.discriminator_value_offset))

        entropy = (self._get_entropy(infer_fake) / 2
                   + self._get_entropy(infer_true) / 2)
        loss = (fake_loss + true_loss
                - entropy * self.entropy_coef)

        # Update stats
        self.accuracy_gen = np.average(
            chainer.cuda.to_cpu(infer_fake.array) < 0)
        self.accuracy_exp = np.average(
            chainer.cuda.to_cpu(infer_true.array) > 0)
        self.average_entropy *= self.entropy_decay
        self.average_entropy += (1.0 - self.entropy_decay) * chainer.cuda.to_cpu(entropy.array)  # noqa
        self.average_loss *= self.loss_decay
        self.average_loss += (1.0 - self.loss_decay) * \
            chainer.cuda.to_cpu(loss.array)

        return loss
Exemple #17
0
    def __call__(self, x, e=None):
        gap = F.average(x, axis=(2, 3))
        gmp = F.max(x, axis=(2, 3))
        gap = self.ext(F.relu(self.sqz(gap)))
        gmp = self.ext(F.relu(self.sqz(gmp)))
        x = F.sigmoid(gap + gmp)[:, :, None, None] * x

        gap = F.average(x, axis=1)[:, None]
        gmp = F.max(x, axis=1)[:, None]
        h = self.conv(F.concat([gap, gmp]))
        h = F.sigmoid(h) * x

        return h
    def update_core(self):
        xp = self.gen.xp
        self._iter += 1

        opt_d = self.get_optimizer('dis')
        for i in range(self._dis_iter):
            d_fake = self.get_fake_image_batch()
            d_real = self.get_real_image_batch()

            y_fake = self.dis(Variable(d_fake), test=False)
            y_real = self.dis(Variable(d_real), test=False)

            w1 = F.average(y_fake - y_real)

            loss_dis = w1

            if self._mode == 'gp':
                eta = np.random.rand()
                c = (d_real * eta + (1.0 - eta) * d_fake).astype('f')
                y = self.dis(Variable(c), test=False, retain_forward=True)

                g = xp.ones_like(y.data)
                grad_c = self.dis.differentiable_backward(Variable(g))
                grad_c_l2 = F.sqrt(F.sum(grad_c**2, axis=(1, 2, 3)))

                loss_gp = loss_l2(grad_c_l2, 1.0)

                loss_dis += self._lambda_gp * loss_gp

            opt_d.zero_grads()
            loss_dis.backward()
            opt_d.update()

            if self._mode == 'clip':
                self.dis.clip()

        chainer.report({'loss': loss_dis, 'loss_w1': w1}, self.dis)

        z_in = self.get_latent_code_batch()
        x_out = self.gen(Variable(z_in), test=False)

        opt_g = self.get_optimizer('gen')
        y_fake = self.dis(x_out, test=False)
        loss_gen = -F.average(y_fake)

        chainer.report({'loss': loss_gen}, self.gen)

        opt_g.zero_grads()
        loss_gen.backward()
        opt_g.update()
Exemple #19
0
 def risk(self, Xt1, Xt2):
     Xa, Xb, Xc, A, B = self.prepare(Xt1, Xt2)
     p, n = self.p, self.n
     a = A*A + p*p + n*n
     b = A*B + 2*p*n
     c = B*B + p*p + n*n
     coe = 1 / (a*c - b*b)
     r_a = p*(c*p-b*n) * self.loss(self.f(Xa)) + \
           n*(a*n-b*p) * self.loss(-self.f(Xa))
     r_b = p*(c*A-b*B) * self.loss(self.f(Xb)) + \
           n*(a*B-b*A) * self.loss(-self.f(Xb))
     r_c = p*(c*n-b*p) * self.loss(self.f(Xc)) + \
           n*(a*p-b*n) * self.loss(-self.f(Xc))
     return coe * (F.average(r_a) + F.average(r_b) + F.average(r_c))
Exemple #20
0
    def calc_loss(self, grids, image_size):
        top_left_x, top_right_x, _, top_left_y, _, bottom_left_y = self.get_corners(grids, image_size)

        # penalize upside down images
        distance = top_left_y - bottom_left_y
        loss_values = F.maximum(distance, self.xp.zeros_like(distance))
        up_down_loss = F.average(loss_values)

        # penalize images that are vertically mirrored
        distance = top_left_x - top_right_x
        loss_values = F.maximum(distance, self.xp.zeros_like(distance))
        left_right_loss = F.average(loss_values)

        return up_down_loss + left_right_loss
    def __call__(self, x, t):
        y_list = self.predictor(x)
        _len, _cls = y_list.shape
        if self.sm_fuse:
            _sm = F.reshape(F.log_softmax(y_list), (self.n_kernel, _len // self.n_kernel, _cls))
            ave_y = F.average(_sm, axis=0)
            loss = - F.average(F.select_item(ave_y, t))
        else:
            loss = F.average(F.softmax_cross_entropy(y_list, F.tile(t, self.n_kernel)))

        conf = F.average(
            F.reshape(y_list, (self.n_kernel, _len // self.n_kernel, _cls)), axis=0)
        chainer.report(
            {'loss': loss, 'accuracy': F.accuracy(conf, t)}, self)
        return loss
 def __call__(self, x):
     b, c, height, width = x.data.shape
     h = F.average(x, axis=(2, 3))  # Global pooling
     h = F.relu(self.l1(h))
     h = F.sigmoid(self.l2(h))
     return (F.transpose(F.broadcast_to(h, (height, width, b, c)),
                         (2, 3, 0, 1)))
Exemple #23
0
 def __call__(self, x):
     # バッチ内の文書ごとに、各文をembedding (並列化するには???)
     sent_rep = [self.sen_enc(doc) for doc in x]  # x: ミニバッチ, doc: 1ラベルの複数文
     # 1文ずつBiLSTMに読み込む
     last_h, last_c, ys = self.encoder(None, None, sent_rep)
     # 最終層の各文の状態を平均したものを返す
     return [F.average(x, axis=0) for x in ys]
Exemple #24
0
    def __init__(self,
                 n_layer,
                 n_class=None,
                 pretrained_model=None,
                 mean=None,
                 initialW=None,
                 fc_kwargs={},
                 arch='fb'):
        if arch == 'fb':
            stride_first = False
            conv1_no_bias = True
        elif arch == 'he':
            stride_first = True
            # Kaiming He uses bias only for ResNet50
            conv1_no_bias = n_layer != 50
        else:
            raise ValueError('arch is expected to be one of [\'he\', \'fb\']')
        blocks = self._blocks[n_layer]

        param, path = utils.prepare_pretrained_model(
            {
                'n_class': n_class,
                'mean': mean
            }, pretrained_model, self._models[arch][n_layer], {
                'n_class': 1000,
                'mean': _imagenet_mean
            })
        self.mean = param['mean']

        if initialW is None:
            initialW = initializers.HeNormal(scale=1., fan_option='fan_out')
        if 'initialW' not in fc_kwargs:
            fc_kwargs['initialW'] = initializers.Normal(scale=0.01)
        if pretrained_model:
            # As a sampling process is time-consuming,
            # we employ a zero initializer for faster computation.
            initialW = initializers.constant.Zero()
            fc_kwargs['initialW'] = initializers.constant.Zero()
        kwargs = {'initialW': initialW, 'stride_first': stride_first}

        super(ResNet, self).__init__()
        with self.init_scope():
            self.conv1 = Conv2DBNActiv(None,
                                       64,
                                       7,
                                       2,
                                       3,
                                       nobias=conv1_no_bias,
                                       initialW=initialW)
            self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2)
            self.res2 = ResBlock(blocks[0], None, 64, 256, 1, **kwargs)
            self.res3 = ResBlock(blocks[1], None, 128, 512, 2, **kwargs)
            self.res4 = ResBlock(blocks[2], None, 256, 1024, 2, **kwargs)
            self.res5 = ResBlock(blocks[3], None, 512, 2048, 2, **kwargs)
            self.pool5 = lambda x: F.average(x, axis=(2, 3))
            self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs)
            self.prob = F.softmax

        if path:
            chainer.serializers.load_npz(path, self)
Exemple #25
0
def calc_style_mean_std(feature, eps=1e-5):
    mean = F.mean(feature, axis=1).reshape(feature.shape[0], 1)
    sigma = F.average((feature - F.tile(mean, (1, 256)))**2, axis=1) + eps
    std = F.sqrt(sigma).reshape(feature.shape[0], 1, 1, 1)
    mean = F.reshape(mean, (feature.shape[0], 1, 1, 1))

    return mean, std
Exemple #26
0
def proxy_nca_loss(x, proxy, labels):
    """Proxy-NCA loss function.

    Args:
        x (:class:`~chainer.Variable`):
            L2 normalized anchor points whose shape is (B, D), where B is the
            batch size and D is the number of dimensions of feature vector.
        proxy (:class:`~chainer.Variable` or :class:`~chainer.Parameter`):
            Proxies whose shape is (K, D), where K is the number of classes
            in the dataset.
        labels (:class:`numpy.ndarray`):
            Class labels associated to x. The shape is (B,) and dtype is int.
            Note that the class IDs must be 0, 1, ..., K-1.

    Returns:
        :class:`~chainer.Variable`: Loss value.

    See: `No Fuss Distance Metric Learning using Proxies \
        <http://openaccess.thecvf.com/content_ICCV_2017/papers/\
        Movshovitz-Attias_No_Fuss_Distance_ICCV_2017_paper.pdf>`_
    """
    proxy = F.normalize(proxy)
    distance = squared_distance_matrix(x, proxy)
    d_posi = distance[np.arange(len(x)), labels]

    # For each row, remove one element corresponding to the positive distance
    B, K = distance.shape  # batch size and the number of classes
    mask = np.tile(np.arange(K), (B, 1)) != labels[:, None]
    d_nega = distance[mask].reshape(B, K - 1)

    log_denominator = F.logsumexp(-d_nega, axis=1)
    loss = d_posi + log_denominator
    return F.average(loss)
Exemple #27
0
    def compute_batch_loss(self, batch, weights):
        """Compute gradients on a list of trajectories.

        Args:
            batch -- a TrajectoryBatch
            weights -- a list of weights for trajectories in the batch

        Returns a loss value
        """

        weights = self._xp.array(weights)
        for step, step_batch in batch.step_batches(self._gpu_device):
            policies, values = self._model(step_batch.states)
            values *= (1 - step_batch.terminals).reshape(values.shape)
            logprobs = F.select_item(policies, step_batch.actions)
            batch.set_logprobs_and_values(step, logprobs, values)

        losses = []
        for trajectory, logprobs, values in batch:
            losses.append(
                self.compute_trajectory_loss(trajectory, logprobs, values))

        losses = F.stack(losses)
        loss = F.average(losses * weights)
        loss.backward()

        return np.asscalar(cuda.to_cpu(loss.data))
Exemple #28
0
 def extract(self, images, layers=['fc5']):
     self._layer_names = layers
     x = chainer.Variable(self.xp.asarray(images))
     h = self(x).data
     _len, _cls = h.shape
     h = F.average(F.reshape(h, (16, _len // 16, _cls)), axis=0)
     return chainer.cuda.to_cpu(h.data)
Exemple #29
0
 def extract(self, images, layers=['fc']):
     self._layer_names = layers
     x = chainer.Variable(self.xp.asarray(images))
     h = self(x).data
     h = F.stack(F.split_axis(h, 16, axis=0))
     h = F.average(F.softmax(h, axis=2), axis=0)
     return chainer.cuda.to_cpu(h.data)
Exemple #30
0
    def check_forward(self, x_data, axis, weights):
        x = chainer.Variable(x_data)
        if self.use_weights:
            w = chainer.Variable(weights)
            w_data = self.w
        else:
            w = None
            w_data = None
        y = functions.average(x, axis=axis, weights=w, keepdims=self.keepdims)
        self.assertEqual(y.data.dtype, self.dtype)
        y_expect = numpy.average(
            self.x, axis=axis, weights=w_data)
        if self.keepdims:
            # numpy.average does not support keepdims
            if axis is None:
                axis = list(six.moves.range(x_data.ndim))
            elif isinstance(axis, int):
                axis = axis,
            shape = list(x_data.shape)
            for i in six.moves.range(len(shape)):
                if i in axis or i - len(shape) in axis:
                    shape[i] = 1
            y_expect = y_expect.reshape(shape)

        if self.dtype == numpy.float16:
            options = {'atol': 1e-3, 'rtol': 1e-3}
        else:
            options = {}

        self.assertEqual(y_expect.shape, y.shape)
        testing.assert_allclose(y_expect, y.data, **options)
    def check_forward(self, x_data, axis, weights):
        if self.use_weights and isinstance(self.axis, tuple):
            # This condition is not supported
            return

        x = chainer.Variable(x_data)
        if self.use_weights:
            w = chainer.Variable(weights)
            w_data = self.w
        else:
            w = None
            w_data = None
        y = functions.average(x, axis=axis, weights=w, keepdims=self.keepdims)
        self.assertEqual(y.data.dtype, self.dtype)
        y_expect = numpy.average(self.x, axis=axis, weights=w_data)
        if self.keepdims:
            # numpy.average does not support keepdims
            if axis is None:
                axis = list(six.moves.range(x_data.ndim))
            elif isinstance(axis, int):
                axis = axis,
            shape = list(x_data.shape)
            for i in six.moves.range(len(shape)):
                if i in axis or i - len(shape) in axis:
                    shape[i] = 1
            y_expect = y_expect.reshape(shape)

        if self.dtype == numpy.float16:
            options = {'atol': 5e-3, 'rtol': 5e-3}
        else:
            options = {}

        self.assertEqual(y_expect.shape, y.shape)
        testing.assert_allclose(y_expect, y.data, **options)
Exemple #32
0
    def check_forward(self, x_data, axis, weights):
        x = chainer.Variable(x_data)
        if self.use_weights:
            w = chainer.Variable(weights)
            w_data = self.w
        else:
            w = None
            w_data = None
        y = functions.average(x, axis=axis, weights=w)
        self.assertEqual(y.data.dtype, self.dtype)
        y_expect = numpy.average(self.x, axis=axis, weights=w_data)

        if self.dtype == numpy.float16:
            options = {'atol': 1e-3, 'rtol': 1e-3}
        else:
            options = {}

        testing.assert_allclose(y_expect, y.data, **options)
Exemple #33
0
 def f(x):
     return functions.average(x, axis=axis)
Exemple #34
0
 def test_duplicate_value_negative(self):
     x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype)
     with self.assertRaises(ValueError):
         functions.average(x, axis=(1, -2))
Exemple #35
0
 def f(x):
     return functions.average(x, axis=axis, keepdims=self.keepdims)
Exemple #36
0
 def f(x, w):
     return functions.average(
         x, axis=axis, weights=w, keepdims=self.keepdims)
Exemple #37
0
 def f(x, w):
     return functions.average(x, axis=axis, weights=w)
Exemple #38
0
 def test_weights_and_axis(self):
     x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype)
     w = numpy.random.uniform(-1, 1, 6).reshape(2, 3).astype(self.dtype)
     with self.assertRaises(ValueError):
         functions.average(x, axis=(0, 1), weights=w)