Beispiel #1
0
  def _encode(self, x_list):
    batch_size = len(x_list[0])
    source_length = len(x_list)

    # Encoding
    fc = bc = f = b = _zeros((batch_size, self.hidden_size))
    i_list = [self.x_i(_mkivar(x)) for x in x_list]
    f_list = []
    b_list = []
    for i in i_list:
      fc, f = F.lstm(fc, self.i_f(i) + self.f_f(f))
      f_list.append(f)
    for i in reversed(i_list):
      bc, b = F.lstm(bc, self.i_b(i) + self.b_b(b))
      b_list.append(b)
    b_list.reverse()

    # Making concatenated matrix
    # {f,b}_mat: shape = [batch, srclen, hidden]
    f_mat = F.concat([F.expand_dims(f, 1) for f in f_list], 1)
    b_mat = F.concat([F.expand_dims(b, 1) for b in b_list], 1)
    # fb_mat: shape = [batch, srclen, 2 * hidden]
    fb_mat = F.concat([f_mat, b_mat], 2)
    # fbe_mat: shape = [batch * srclen, atten]
    fbe_mat = self.fb_e(
        F.reshape(fb_mat, [batch_size * source_length, 2 * self.hidden_size]))

    return fb_mat, fbe_mat, fc, bc, f_list[-1], b_list[0]
Beispiel #2
0
def clipped_loss(x, t):
    diff = x - t
    abs_loss = abs(diff)
    squared_loss = diff ** 2
    abs_loss = F.expand_dims(abs_loss, 1)
    squared_loss = F.expand_dims(squared_loss, 1)
    return F.sum(F.min(F.concat((abs_loss, squared_loss), axis=1), axis=1))
Beispiel #3
0
 def __call__(self, x):
     input_shape = x.shape
     x = F.average_pooling_2d(x, ksize=x.shape[2:])
     x = F.reshape(x, shape=(x.shape[0], -1))
     x = self.init_fc(x)
     x = self.main_fcs(x)
     x = self.final_fc(x)
     x = F.broadcast_to(F.expand_dims(F.expand_dims(x, axis=2), axis=3),
                        input_shape)
     return x
 def attention_layer(self, features, features_proj, Xp):
     h = F.expand_dims(self.w_att(Xp), 1)
     features_proj = F.normalize(features_proj, axis=-1)
     h = F.normalize(h, axis=-1)
     h_att = F.relu(features_proj + F.broadcast_to(h, features_proj.shape)) # (N, self.D, self.C) + (N, 1, self.C)
     out_att = self.w(F.reshape(h_att, (-1, self.C))) # (Nxself.D, self.C) -> (Nxself.D, 1)
     out_att = F.reshape(out_att, (-1, self.D)) # (N, self.D)
     alpha = F.softmax(out_att) # (N, self.D)
     context = F.sum(features * F.broadcast_to(F.expand_dims(alpha, 1), features.shape), axis=2) # (N, self.C, self.D) * (N, 1, self.D)
     return context, alpha
Beispiel #5
0
    def __call__(self, x, t, index):
        h = self.predict(x)
        self.history = np.append(self.history, np.array([np.mean(h.data, axis=0)]), axis=0)

        h = F.select_item(h, index)             # choose the action[index] in each column
        error_abs = abs(h - t)
        error = F.concat((F.expand_dims(error_abs ** 2, 1), F.expand_dims(error_abs, 1)), axis=1)
        # 1 < error_abs <=> error ** 2 > error,  error < 1 <=> error ** 2 < error
        self.loss = F.sum(F.min(error, axis=1)) / np.float32(len(error_abs))
        return self.loss
 def __call__(self, x):
     w = F.average_pooling_2d(x, ksize=x.shape[2:])
     w = self.fc1(w)
     if self.use_conv2:
         w = self.activ(w)
         w = self.fc2(w)
     w = self.sigmoid(w)
     w = F.broadcast_to(F.expand_dims(F.expand_dims(w, axis=2), axis=3), x.shape)
     x = x * w
     return x
Beispiel #7
0
    def pool(self, WX, skip_mask=None):
        Z, F, O, I = None, None, None, None

        # f-pooling
        if len(self._pooling) == 1:
            assert len(WX) == 2
            Z, F = WX
            Z = functions.tanh(Z)
            F = self.zoneout(F)

        # fo-pooling
        if len(self._pooling) == 2:
            assert len(WX) == 3
            Z, F, O = WX
            Z = functions.tanh(Z)
            F = self.zoneout(F)
            O = functions.sigmoid(O)

        # ifo-pooling
        if len(self._pooling) == 3:
            assert len(WX) == 4
            Z, F, O, I = WX
            Z = functions.tanh(Z)
            F = self.zoneout(F)
            O = functions.sigmoid(O)
            I = functions.sigmoid(I)

        assert Z is not None
        assert F is not None

        T = Z.shape[2]
        for t in xrange(T):
            zt = Z[:, :, t]
            ft = F[:, :, t]
            ot = 1 if O is None else O[:, :, t]
            it = 1 - ft if I is None else I[:, :, t]
            xt = 1 if skip_mask is None else skip_mask[:, t,
                                                       None]  # will be used for seq2seq to skip PAD

            if self.ct is None:
                self.ct = (1 - ft) * zt * xt
            else:
                self.ct = ft * self.ct + it * zt * xt
            self.ht = self.ct if O is None else ot * self.ct

            if self.H is None:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat(
                    (self.H, functions.expand_dims(self.ht, 2)), axis=2)

            if self._test:
                self.H.unchain_backward()

        return self.H
Beispiel #8
0
    def __call__(self, x, enc_out=None, mask=None):
        """
            args
                x: paralleled main features in the model
                   Variable in (batch, hidden_dim, length)
                u: hidden features from Encoder
                   Variable in (batch, hidden_dim, length)
                mask: padding-mask or future-mask
                   xp-array in (batch, length, length)
                   an element takes 'False' when pad/future, otherwise 'True'
            returns
        """
        # ksize-1-convolution results in parallel linear projections
        if self.self_attention:
            qkv = F.squeeze(self.W(F.expand_dims(x, axis=3)), axis=3)
            query, key, value = F.split_axis(qkv, 3, axis=1)
        else:
            query = F.squeeze(self.W_Q(F.expand_dims(x, axis=3)), axis=3)
            kv = F.squeeze(self.W_KV(F.expand_dims(enc_out, axis=3)), axis=3)
            key, value = F.split_axis(kv, 2, axis=1)

        # make q,k,v into (batch*parallel, dim/parallel, length)shape
        query = F.concat(F.split_axis(query, self.parallel_num, axis=1),
                         axis=0)
        key = F.concat(F.split_axis(key, self.parallel_num, axis=1), axis=0)
        value = F.concat(F.split_axis(value, self.parallel_num, axis=1),
                         axis=0)
        mask = self.xp.concatenate([mask] * self.parallel_num, axis=0)

        attention_weight = F.batch_matmul(query, key, transa=True) * self.scale
        attention_weight = F.where(
            mask, attention_weight,
            self.xp.full(attention_weight.shape, -np.inf, dtype=np.float32))
        attention_weight = F.softmax(attention_weight, axis=2)
        attention_weight = F.dropout(attention_weight, self.dropout_rate)
        attention_weight = F.where(
            self.xp.isnan(attention_weight.data),
            self.xp.full(attention_weight.shape, 0, dtype=np.float32),
            attention_weight)
        self.attention_weight = copy.deepcopy(attention_weight.data)

        # attention: (batch, q-length, k-length) -> (batch, 1, q-length, k-length)
        # value: (batch, dim/parallel, k-length) -> (batch, dim/parallel, 1, k-length)
        attention_weight, value = F.broadcast(attention_weight[:, None],
                                              value[:, :, None])
        weighted_sum = F.sum(attention_weight * value, axis=3)
        weighted_sum = F.concat(F.split_axis(weighted_sum,
                                             self.parallel_num,
                                             axis=0),
                                axis=1)

        weighted_sum = F.squeeze(self.linear(
            F.expand_dims(weighted_sum, axis=3)),
                                 axis=3)
        return weighted_sum
Beispiel #9
0
def kl_div(mu1, lv1, lv2):
    # KL Divergence between given normal and prior at N(0, sigma_2)
    # Prior assumes mean at zero
    # lns2 - lns1 + (s2^2 + (u1 - u2)**2)/ 2s2**2 - 0.5
    if len(lv1.shape) == 2:
        lv1 = F.expand_dims(lv1, 0)
        mu1 = F.expand_dims(mu1, 0)
    lv2 = F.broadcast_to(lv2, lv1.shape)
    v12 = F.exp(lv1)**2.0
    v22 = F.exp(lv2)**2.0
    return lv2 - lv1 + .5 * v12 / v22 + .5 * mu1**2. / v22 - .5
	def get_gcam(self, end_output, activations, shape, label):
		self.cleargrads()
		class_id = self.set_init_grad(end_output, label)
		end_output.backward(retain_grad=True)
		grad = activations.grad_var
		grad = F.average_pooling_2d(grad, (grad.shape[-2], grad.shape[-1]), 1)
		grad = F.expand_dims(F.reshape(grad, (grad.shape[0]*grad.shape[1], grad.shape[2], grad.shape[3])), 0)
		weights = activations
		weights = F.expand_dims(F.reshape(weights, (weights.shape[0]*weights.shape[1], weights.shape[2], weights.shape[3])), 0)
		gcam = F.resize_images(F.relu(F.convolution_2d(weights, grad, None, 1, 0)), shape)
		return gcam, class_id
 def calcLoss(G, X, S):
     GXr = G*xp.real(X).astype(np.float32)
     GXi = G*xp.imag(X).astype(np.float32)
     Sr = xp.real(S).astype(np.float32)
     Si = xp.imag(S).astype(np.float32)
     gxL = [F.expand_dims(iDGTcf.iDGT(GXr[ii],GXi[ii],windowDG,shiftLenG,fftLenG), axis=0) for ii in range(len(G))]
     sL = [F.expand_dims(iDGTcf.iDGT(Sr[ii],Si[ii],windowDG,shiftLenG,fftLenG), axis=0) for ii in range(len(G))]
     gx = F.vstack(gxL)
     s = F.vstack(sL)
     loss = F.mean_absolute_error(gx,s)
     return loss
Beispiel #12
0
 def __call__(self, x):
     att1 = F.average_pooling_2d(x, ksize=x.shape[2:])
     att1 = self.mlp(att1)
     att2 = F.max_pooling_2d(x, ksize=x.shape[2:])
     att2 = self.mlp(att2)
     att = att1 + att2
     att = F.sigmoid(att)
     att = F.broadcast_to(F.expand_dims(F.expand_dims(att, axis=2), axis=3),
                          x.shape)
     x = x * att
     return x
Beispiel #13
0
    def __call__(self, X, ht_enc, H_enc, skip_mask=None, test=False):
        self._test = test
        WX = self.W(X)
        Vh = self.V(ht_enc)
        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        self.contexts = []
        for t in xrange(T):
            z = Z[:, :, t]
            f = F[:, :, t]
            if t == 0:
                ct = (1 - f) * z
                self.contexts.append(ct)
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_getas = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t]
            geta = 0 if skip_mask is None else softmax_getas[
                ..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[...,
                                                         None]  # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + geta
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[:, :, t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if test:
                self.ht.unchain_backward()

            if t == 0:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat(
                    (self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
Beispiel #14
0
    def __call__(self, xs, ys):
        eos = self.xp.array([EOS], 'i')

        xs = [self.denoiseInput(x[::-1], self.denoising_rate)
              for x in xs]  # denoising

        #ys_d = [self.wordDropout(y, self.word_dropout) for y in ys] # word dropout
        ys_d = [self.denoiseInput(y, self.word_dropout)
                for y in ys]  # word dropout
        ys_in = [F.concat([eos, y], axis=0) for y in ys_d]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # Both xs and ys_in are lists of arrays.
        exs = sequence_embed(self.embed_x, xs)
        eys = sequence_embed(self.embed_y, ys_in)

        batch = len(xs)
        # None represents a zero vector in an encoder.
        hx, at = self.encoder(None, exs)  # layer x batch x n_units
        hx_t = F.transpose(hx, (1, 0, 2))  # batch x layer x n_units
        mu = self.W_mu(hx_t)  # batch x n_latent
        ln_var = self.W_ln_var(hx_t)
        #print(mu.shape)
        #print(hx_t.shape)

        rec_loss = 0
        concat_ys_out = F.concat(ys_out, axis=0)
        for _ in range(self.k):
            z = F.gaussian(mu, ln_var)
            z_e = F.expand_dims(z, 2)  # batch x n_latent x 1
            Wz = self.W_h(z_e)  # batch x (layer x unit)
            #print('Wz: {}, {}'.format(Wz.shape, type(Wz)))
            hys = F.split_axis(Wz, self.n_layers, 1)  # layer x batch x unit
            #print('hys, {}'.format([x.shape for x in hys]))
            c_hy = F.concat([F.expand_dims(hy, 0) for hy in hys],
                            0)  # layer x batch x unit
            #print('c_hy: {}'.format(c_hy.shape))
            _, os = self.decoder(c_hy, eys)
            #print(len(os))
            concat_os = F.concat(os, axis=0)
            rec_loss += F.sum(
                F.softmax_cross_entropy(self.W(concat_os),
                                        concat_ys_out,
                                        reduce='no')) / (self.k * batch)
        latent_loss = self.C * F.gaussian_kl_divergence(mu, ln_var) / batch

        loss = rec_loss + latent_loss

        chainer.report({'loss': loss.data}, self)
        n_words = concat_ys_out.shape[0]
        perp = self.xp.exp(loss.data * batch / n_words)
        chainer.report({'perp': perp}, self)

        return loss
Beispiel #15
0
    def translate(self, xs, max_length=100):
        xs = numpy.insert(xs, 0, 2)
        xs = numpy.append(xs, 0)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            exs = self.embed_x(Variable(self.xp.array(xs,
                                                      dtype=self.xp.int32)))

            h = F.expand_dims(exs, axis=0)
            h = F.expand_dims(h, axis=0)
            h = F.transpose(h, (0, 1, 3, 2))
            for i in range(self.stack):
                h = self.gcnn[i](h)
            h = F.squeeze(h, axis=1)
            h = F.squeeze(h, axis=0)
            h = F.transpose(h, (1, 0))

            ys = self.xp.full(1, 2, self.xp.int32)
            result = []
            hx = None
            cx = None
            hx2 = None
            cx2 = None

            for i in range(max_length):
                eys = self.embed_y(ys)
                eyys = self.embed_yy(ys)
                eys2 = [eys]
                eyys2 = [eyys]
                hx, cx, ss = self.decoder(hx, cx, eys2)
                hx2, cx2, ss2 = self.decoder2(hx2, cx2, eyys2)

                batch_A = F.matmul(h, ss[0], transb=True) * self.scale_score
                batch_A = F.softmax(batch_A, axis=0)
                if self.weight:
                    with open("weight/wei.txt", "a", encoding="utf-8") as f:
                        for j in range(len(batch_A)):
                            f.write(str(batch_A[j][0].data) + "\n")
                        f.write("--------------\n")
                s = F.matmul(batch_A, h, transa=True)
                t = (self.We(s) + self.Ws(ss2[0]))
                ys = self.xp.argmax(t.data, axis=1).astype(self.xp.int32)
                if ys[0] == 0:
                    break
                result.append(ys)
        result = cuda.to_cpu(
            self.xp.concatenate([self.xp.expand_dims(x, 0) for x in result]).T)
        # Remove EOS taggs
        outs = []
        for y in result:
            inds = numpy.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Beispiel #16
0
 def __call__(self, x, hc=None):
     w = F.average_pooling_2d(x, ksize=x.shape[2:])
     w = w.reshape((w.shape[0], -1))
     if hc is None:
         h = [self.xp.zeros_like(w.array, dtype=w.dtype)] * self.num_layers
         c = [self.xp.zeros_like(w.array, dtype=w.dtype)] * self.num_layers
     else:
         h, c = hc
     h, c = self.lstm(w, h, c)
     w = F.expand_dims(F.expand_dims(h[-1], axis=-1), axis=-1)
     x = x * w
     return x, (h, c)
Beispiel #17
0
def loss_information(enc, x):
    p_logit = enc(x)
    p = F.sigmoid(p_logit)
    p_ave = F.sum(p, axis=0) / x.data.shape[0]

    cond_ent = F.sum(-p * F.log(p + 1e-8) -
                     (1 - p) * F.log(1 - p + 1e-8)) / p.data.shape[0]
    marg_ent = F.sum(-p_ave * F.log(p_ave + 1e-8) -
                     (1 - p_ave) * F.log(1 - p_ave + 1e-8))

    p_ave = F.reshape(p_ave, (1, len(p_ave.data)))

    p_ave_separated = F.separate(p_ave, axis=1)
    p_separated = F.separate(F.expand_dims(p, axis=2), axis=1)

    p_ave_list_i = []
    p_ave_list_j = []

    p_list_i = []
    p_list_j = []

    for i in range(n_bit - 1):
        p_ave_list_i.extend(list(p_ave_separated[i + 1:]))
        p_list_i.extend(list(p_separated[i + 1:]))

        p_ave_list_j.extend([p_ave_separated[i] for n in range(n_bit - i - 1)])
        p_list_j.extend([p_separated[i] for n in range(n_bit - i - 1)])

    p_ave_pair_i = F.expand_dims(F.concat(tuple(p_ave_list_i), axis=0), axis=1)
    p_ave_pair_j = F.expand_dims(F.concat(tuple(p_ave_list_j), axis=0), axis=1)

    p_pair_i = F.expand_dims(F.concat(tuple(p_list_i), axis=1), axis=2)
    p_pair_j = F.expand_dims(F.concat(tuple(p_list_j), axis=1), axis=2)

    p_pair_stacked_i = F.concat(
        (p_pair_i, 1 - p_pair_i, p_pair_i, 1 - p_pair_i), axis=2)
    p_pair_stacked_j = F.concat(
        (p_pair_j, p_pair_j, 1 - p_pair_j, 1 - p_pair_j), axis=2)

    p_ave_pair_stacked_i = F.concat(
        (p_ave_pair_i, 1 - p_ave_pair_i, p_ave_pair_i, 1 - p_ave_pair_i),
        axis=1)
    p_ave_pair_stacked_j = F.concat(
        (p_ave_pair_j, p_ave_pair_j, 1 - p_ave_pair_j, 1 - p_ave_pair_j),
        axis=1)

    p_product = F.sum(p_pair_stacked_i * p_pair_stacked_j, axis=0) / len(
        p.data)
    p_ave_product = p_ave_pair_stacked_i * p_ave_pair_stacked_j
    pairwise_mi = 2 * F.sum(p_product * F.log(
        (p_product + 1e-8) / (p_ave_product + 1e-8)))

    return cond_ent, marg_ent, pairwise_mi
Beispiel #18
0
    def __call__(self, h):
        shape = h.shape
        h = F.reshape(h, (shape[0], np.prod(shape[1:])))
        h_ns = F.batch_l2_norm_squared(h)
        bs = shape[0]
        h0 = F.broadcast_to(F.expand_dims(h_ns, 0), (bs, bs))
        h1 = F.broadcast_to(F.expand_dims(h_ns, 1), (bs, bs))
        hh = F.linear(h, h)
        D = h0 + h1 - 2 * hh
        D = F.sum(D) / np.prod(h.shape)

        return D
Beispiel #19
0
 def __call__(self, x):
     w = F.average_pooling_2d(x, ksize=x.shape[2:])
     if not self.use_conv:
         w = F.reshape(w, shape=(w.shape[0], -1))
     w = self.conv1(w) if self.use_conv else self.fc1(w)
     w = self.activ(w)
     w = self.conv2(w) if self.use_conv else self.fc2(w)
     w = self.sigmoid(w)
     if not self.use_conv:
         w = F.expand_dims(F.expand_dims(w, axis=2), axis=3)
     x = x * w
     return x
Beispiel #20
0
    def compute_attention(self, query, key):
        """
        :param query: with shape of (mb, N_1, hidden_dim)
        :param key: with shape of (mb, N_2, hidden_dim)
        :return: attn: attention weights (mb, N_1, N_2)
        """

        energy_layer = self.energy_layer
        mb, N_1, hidden_dim = query.shape
        N_2 = key.shape[1]
        # # query: (mb, N_1, 1, hidden_dim)
        # query = functions.expand_dims(query, axis=2)
        # # query: (mb, N_1, N_2, hidden_dim)
        # query = functions.tile(query, reps=(1, 1, N_2, 1))
        # # query: (mb * N_1 * N_2, hidden_dim)
        # query = functions.reshape(query, (mb * N_1 * N_2, hidden_dim))
        # query: (mb * N_1 hidden_dim)
        # # key: (mb, 1, N_2, hidden_dim)
        # key = functions.expand_dims(key, axis=1)
        # # key: (mb, N_1, N_2, hidden_dim)
        # key = functions.tile(key, reps=(1, N_1, 1, 1))
        # # key: (mb * N_1 * N_2, hidden_dim)
        # key = functions.reshape(key, (mb * N_1 * N_2, hidden_dim))
        # key: (mb * N_2, hidden_dim)
        # energy: (mb * N_1 * N_2, 1)
        # energy = self.activation(energy_layer(key, query))

        query_real, query_imag = self.fourier_transform(query)
        key_real, key_imag = self.fourier_transform(key)
        query_real = functions.reshape(functions.tile(functions.expand_dims(
            query_real, axis=2),
                                                      reps=(1, 1, N_2, 1)),
                                       shape=(mb * N_1 * N_2, hidden_dim))
        query_imag = functions.reshape(functions.tile(functions.expand_dims(
            query_imag, axis=2),
                                                      reps=(1, 1, N_2, 1)),
                                       shape=(mb * N_1 * N_2, hidden_dim))
        key_real = functions.reshape(functions.tile(functions.expand_dims(
            key_real, axis=1),
                                                    reps=(1, N_1, 1, 1)),
                                     shape=(mb * N_1 * N_2, hidden_dim))
        key_imag = functions.reshape(functions.tile(functions.expand_dims(
            key_imag, axis=1),
                                                    reps=(1, N_1, 1, 1)),
                                     shape=(mb * N_1 * N_2, hidden_dim))

        energy = self.activation(
            energy_layer(key_real, query_real) +
            energy_layer(key_imag, query_imag))

        energy = functions.reshape(energy, (mb, N_1, N_2))
        return energy
Beispiel #21
0
    def __call__(self, x0, x1, l0, l1, train=True):
        """ Forward computation.

        Args:
            x0: Chainer variable in shape (B, T0) where B is the batch size,
                T is the number of tokens in each data. Each element should be
                given as the index of embedding.
            x1: Chainer variable in shape (B, T1)

        Returns:

        """
        t0 = x0.shape[1]
        t1 = x1.shape[1]
        # a: (B, T0, M)
        a = self.emb(x0)
        # b: (B, T1, M)
        b = self.emb(x1)

        if not self._train_embedding:
            a.unchain_backward()
            b.unchain_backward()
            a = self._token_wise_linear(a, self.emb_proj, l0, train, self.xp)
            b = self._token_wise_linear(b, self.emb_proj, l1, train, self.xp)
        # Apply perceptron layer to each feature vectors ... eq. 1
        # (B, Ti, M) -> (B * Ti, M) -> (B * Ti, F) -> (B, Ti, F)
        a_f = self._token_wise_linear(a, self.f, l0, train, self.xp)
        b_f = self._token_wise_linear(b, self.f, l1, train, self.xp)
        # for each batch, calculate a_f[b]
        # e: (B, T0, T1)
        e = F.batch_matmul(a_f, b_f, transb=True)

        # att_*: (B, T0, T1)
        att_b, att_a = self._length_aware_softmax(e, l0, l1, self.xp)
        # sum((B, T0, T1).(B, T0, T1, M)) -> beta: (B, T0, M) ... eq. 2
        b_tiled = F.tile(F.expand_dims(b, 1), (1, t0, 1, 1))
        att_b = F.expand_dims(att_b, 3)
        beta = F.sum(F.broadcast_to(att_b, b_tiled.shape) * b_tiled, axis=2)
        # sum((B, T0, T1).(N, T0, T1, M)) -> beta: (B, T1, M) ... eq. 2
        a_tiled = F.tile(F.expand_dims(a, 2), (1, 1, t1, 1))
        att_a = F.expand_dims(att_a, 3)
        alpha = F.sum(F.broadcast_to(att_a, a_tiled.shape) * a_tiled, axis=1)

        # Make comparison, [(B, Ti, M), (B, Ti, M)] -> (B, M')
        v1 = self._compare(a, beta, l0, train, self.xp)
        v2 = self._compare(b, alpha, l1, train, self.xp)

        # (B, M' + M') -> (B, n_class)  ... eq. 4 & 5
        v = F.concat((v1, v2), axis=1)
        y = self.h(v)

        return y
Beispiel #22
0
def mixture_of_discretized_logistics_nll(x, y):
    """
    Args:
        x: (b, c, n, n)
        y: (b, 10*n_mix, n, n)
    """
    xp = get_array_module(x)
    n_mix = y.shape[1] // 10
    logit_prob = y[:, :n_mix, :, :]
    y = F.reshape(y[:, n_mix:, :, :], x.shape + (n_mix * 3, ))
    mean = y[:, :, :, :, 0:n_mix]
    log_scale = y[:, :, :, :, n_mix:2 * n_mix]
    log_scale = F.maximum(log_scale, -7 * xp.ones(log_scale.shape, dtype='f'))
    coeff = F.tanh(y[:, :, :, :, 2 * n_mix:3 * n_mix])

    x = xp.repeat(xp.expand_dims(x, 4), n_mix, 4)
    m1 = F.expand_dims(mean[:, 0, :, :, :], 1)
    m2 = F.expand_dims(
        mean[:, 1, :, :, :] + coeff[:, 0, :, :, :] * x[:, 0, :, :, :], 1)
    m3 = F.expand_dims(
        (mean[:, 2, :, :, :] + coeff[:, 1, :, :, :] * x[:, 0, :, :, :] +
         coeff[:, 2, :, :, :] * x[:, 1, :, :, :]), 1)
    mean = F.concat([m1, m2, m3])
    centered_x = x - mean
    inv_std = F.exp(-log_scale)
    max_in = inv_std * (centered_x + 1. / 255.)
    cdf_max = F.sigmoid(max_in)
    min_in = inv_std * (centered_x - 1. / 255.)
    cdf_min = F.sigmoid(min_in)
    log_cdf_max = max_in - F.softplus(max_in)  # 0
    log_one_minus_cdf_min = -F.softplus(min_in)  # 255
    cdf_delta = cdf_max - cdf_min  # 0 ~ 255
    mid_in = inv_std * centered_x
    log_pdf_mid = mid_in - log_scale - 2. * F.softplus(mid_in)  # mid

    log_prob = F.where(
        x < -0.999, log_cdf_max,
        F.where(
            x > 0.999, log_one_minus_cdf_min,
            F.where(
                cdf_delta.array > 1e-5,
                F.log(
                    F.maximum(cdf_delta,
                              xp.ones(cdf_delta.shape, dtype='f') * 1e-12)),
                log_pdf_mid - xp.log(127.5))))

    log_prob = F.transpose(F.sum(log_prob, 1), (0, 3, 1, 2))
    log_prob = log_prob + log_prob_from_logit(logit_prob)

    loss = F.logsumexp(log_prob, 1)
    loss = F.sum(loss, axis=(1, 2))
    return -F.mean(loss)
 def __call__(self):
     """Applies the linear layer.
     Args:
         x (~chainer.Variable): Batch of input vectors.
     Returns:
         ~chainer.Variable: Output of the linear layer.
     """
     norm = F.batch_l2_norm_squared(self.W)**0.5
     norm_broadcasted = F.broadcast_to(F.expand_dims(norm, 1),
                                       self.W.data.shape)
     g_broadcasted = F.broadcast_to(F.expand_dims(self.g, 1),
                                    self.W.data.shape)
     return g_broadcasted * self.W / norm_broadcasted
    def __call__(self, edge, node, triplet):
        num_atom = edge.shape[1]

        hn1 = F.tile(F.expand_dims(self.Wn1(node), 1), (1, num_atom, 1, 1))
        hn2 = F.tile(F.expand_dims(self.Wn1(node), 2), (1, 1, num_atom, 1))

        ht1 = self.Wt2(F.sum(zero_plus(self.Wt1(triplet)), axis=1))

        concat = F.concat([hn1, hn2, ht1, edge], axis=3)

        add = zero_plus(self.We2(zero_plus(self.We1(concat))))

        return edge + self.bn(add)
def bger(x, y):
    """ Batch outer product

    :param x:
    :param y:
    :return:
    """
    if x.dtype == 'int' and y.dtype == 'int':
        x_float = F.cast(x, 'float32')
        y_float = F.cast(y, 'float32')
        res_float = F.expand_dims(x_float, 2) @ F.expand_dims(y_float, 1)
        return F.cast(res_float, 'int')
    return F.expand_dims(x, 2) @ F.expand_dims(y, 1)
 def scoreDot(self, atts, ys):
     xs = [self.W3(att) for att in atts]
     xs_T = [F.transpose(x, (1, 0))for x in xs]
     dots = [F.matmul(y, x)for x, y in zip(xs_T, ys)]
     aws = [F.softmax(dot, 1) for dot in dots]
     cts = []
     for x, aw in zip(xs, aws):  # split batch
         aw = F.expand_dims(aw, 1)
         x = F.tile(F.expand_dims(x, 0), (aw.shape[0], 1, 1))
         ct = F.batch_matmul(aw, x)
         cts.append(F.reshape(ct, (ct.shape[0], ct.shape[2])))
     ds = [F.tanh(self.Wc1(ct) + self.Wc2(y)) for y, ct in zip(ys, cts)]
     return ds
Beispiel #27
0
 def __call__(self, xs):
     xs = chainer.dataset.convert.concat_examples(xs, padding=0)
     xs = F.transpose(xs,(1,0,2))
     for i in range(len(xs)):
         if i==0:
             hs = self.l1(xs[0])
             hs = F.expand_dims(hs,0)
         else:
             hw = self.l1(xs[i])
             hw = F.expand_dims(hw,0)
             hs = F.concat((hs,hw), axis=0)
     h=F.mean(hs,axis=0)
     return h
    def __call__(self,input_data,hx=None):
        if np.any(hx):
            hx = hx.reshape(1,-1,self.h1.out_size)
        input_x = [Variable(x) for x in input_data]
        hx,cx,y = self.h1(hx,None,input_x)
        y2 = [F.concat(x, axis=0) for x in F.pad_sequence(y,length=17, padding=0.)]
        y2 = F.concat([F.expand_dims(x,axis=0) for x in y2],axis=0)
        
        out = self.hy(F.concat([F.expand_dims(item[-1],axis=0) for item in y],axis=0))

        atn = self.atn(y2)

        return F.concat([F.expand_dims(a*o,axis=0) for a,o in zip(atn,out)],axis=0)
Beispiel #29
0
 def __call__(self, x, pid):
     x = self.bn(x)
     x = F.swapaxes(x, axis1=1, axis2=3)
     y = F.expand_dims(F.expand_dims(pid, axis=-1), axis=-1)
     y = F.tile(y, reps=(1, 1, self.audio_window_size, 1))
     x = F.concat((x, y), axis=1)
     x = self.branch(x)
     x = F.reshape(x, shape=(x.shape[0], -1))
     x = F.concat((x, pid), axis=1)
     x = self.fc1(x)
     x = F.tanh(x)
     x = self.fc2(x)
     return x
Beispiel #30
0
 def __call__(self, x):
     """Applies the linear layer.
     Args:
         x (~chainer.Variable): Batch of input vectors.
     Returns:
         ~chainer.Variable: Output of the linear layer.
     """
     norm = F.batch_l2_norm_squared(self.W) ** 0.5
     norm_broadcasted = F.broadcast_to(
         F.expand_dims(norm, 1), self.W.data.shape)
     g_broadcasted = F.broadcast_to(
         F.expand_dims(self.g, 1), self.W.data.shape)
     return F.linear(x, g_broadcasted * self.W / norm_broadcasted, self.b)
Beispiel #31
0
    def __call__(self, imgs, questions):
        feat = self.feat_extractor(imgs)

        # Append relative coordinates to each location in the feature maps.
        n, c, h, w = feat.shape
        spatial_area = h * w

        xp = self.xp
        coords_h = xp.linspace(-1, 1, h, dtype=feat.dtype)
        coords_w = xp.linspace(-1, 1, w, dtype=feat.dtype)
        coords_hh, coords_ww = xp.meshgrid(coords_h, coords_w)
        coords_hh = coords_hh[None]
        coords_ww = coords_ww[None]
        coords = xp.concatenate((coords_hh, coords_ww), axis=0)
        coords = coords.reshape(2, -1)
        coords = coords[None]  # (1, 2, spatial_area * spatial_area)
        coords = xp.repeat(coords, n, axis=0)

        # Coordinates may be cached here but the performance gain is not
        # significant so it is skipped in favor of readability.

        feat = feat.reshape(n, c, spatial_area)
        h = F.concat((feat, coords), axis=1)  # (n, c + 2, spatial_area)

        # Create coordinate pairs (differentiable meshgrid).
        h_hh = F.expand_dims(h, 2)
        h_ww = F.expand_dims(h, 3)
        h_hh = F.repeat(h_hh, spatial_area, axis=2)
        h_ww = F.repeat(h_ww, spatial_area, axis=3)
        h = F.concat((h_hh, h_ww), axis=1)

        # Append questions to each coordinate pair.
        questions = questions.astype(imgs.dtype)
        questions = questions[:, :, None, None]
        questions = F.tile(questions, (1, 1, spatial_area, spatial_area))
        h = F.concat((h, questions), axis=1)
        # (n, (c + 2) * 2 + questions_length, spatial_area, spatial_area)

        # g.
        h = F.transpose(h, (0, 2, 3, 1))
        h = F.reshape(h, (n * spatial_area * spatial_area, -1))
        h = self.g(h)
        h = F.reshape(h, (n, spatial_area * spatial_area, -1))
        h = F.sum(h, axis=1)

        h = self.f(h)

        # Logits.
        h = self.fc(h)

        return h
Beispiel #32
0
    def __call__(self, imgs, questions):
        feat = self.feat_extractor(imgs)

        # Append relative coordinates to each location in the feature maps.
        n, c, h, w = feat.shape
        spatial_area = h * w

        xp = self.xp
        coords_h = xp.linspace(-1, 1, h, dtype=feat.dtype)
        coords_w = xp.linspace(-1, 1, w, dtype=feat.dtype)
        coords_hh, coords_ww = xp.meshgrid(coords_h, coords_w)
        coords_hh = coords_hh[None]
        coords_ww = coords_ww[None]
        coords = xp.concatenate((coords_hh, coords_ww), axis=0)
        coords = coords.reshape(2, -1)
        coords = coords[None]  # (1, 2, spatial_area * spatial_area)
        coords = xp.repeat(coords, n, axis=0)

        # Coordinates may be cached here but the performance gain is not
        # significant so it is skipped in favor of readability.

        feat = feat.reshape(n, c, spatial_area)
        h = F.concat((feat, coords), axis=1)  # (n, c + 2, spatial_area)

        # Create coordinate pairs (differentiable meshgrid).
        h_hh = F.expand_dims(h, 2)
        h_ww = F.expand_dims(h, 3)
        h_hh = F.repeat(h_hh, spatial_area, axis=2)
        h_ww = F.repeat(h_ww, spatial_area, axis=3)
        h = F.concat((h_hh, h_ww), axis=1)

        # Append questions to each coordinate pair.
        questions = questions.astype(imgs.dtype)
        questions = questions[:, :, None, None]
        questions = F.tile(questions, (1, 1, spatial_area, spatial_area))
        h = F.concat((h, questions), axis=1)
        # (n, (c + 2) * 2 + questions_length, spatial_area, spatial_area)

        # g.
        h = F.transpose(h, (0, 2, 3, 1))
        h = F.reshape(h, (n * spatial_area * spatial_area, -1))
        h = self.g(h)
        h = F.reshape(h, (n, spatial_area * spatial_area, -1))
        h = F.sum(h, axis=1)

        h = self.f(h)

        # Logits.
        h = self.fc(h)

        return h
Beispiel #33
0
    def forward_rnn_encode_proj(self, X):
        # Reset rnn state
        self.reset_rnn_state()
        # Get input shape
        in_size, batch_size, in_dim = X.shape
        enc_states = X
        for currL in range(len(self.rnn_enc)):
            for i in range(in_size):
                temp_f = F.expand_dims(
                    F.dropout(self[self.rnn_enc[currL]](enc_states[i]),
                              ratio=self.cfg["dropout"]["rnn"]), 0)
                # if bi-directional
                if self.bi_rnn:
                    temp_r = F.expand_dims(
                        F.dropout(self[self.rnn_rev_enc[currL]](
                            enc_states[-1]),
                                  ratio=self.cfg["dropout"]["rnn"]), 0)

                if i > 0:
                    h_fwd = F.concat((h_fwd, temp_f), axis=0)
                    if self.bi_rnn:
                        h_rev = F.concat((h_rev, temp_r), axis=0)
                else:
                    h_fwd = temp_f
                    if self.bi_rnn:
                        h_rev = temp_r
            # end current rnn layer
            if self.bi_rnn:
                h_rev = F.flipud(h_rev)
                rnn_states = F.concat((h_fwd, h_rev), axis=2)
            else:
                rnn_states = h_fwd
            """
            Apply linear projection
            """
            # print(f"Applying rnn {currL}")
            if currL < (len(self.rnn_enc) - 1):
                # print(f"Applying linear linear_proj {currL}")
                for i in range(0, in_size):
                    currH = F.relu(self[f"enc_proj{currL}_bn"](
                        self[f"enc_proj{currL}"](rnn_states[i])))
                    if i > 0:
                        enc_states = F.concat(
                            (enc_states, F.expand_dims(currH, 0)), axis=0)
                    else:
                        enc_states = F.expand_dims(currH, 0)
                # end for all hidden states
        # end all layers

        # Make the batch size as the first dimension
        self.enc_states = F.swapaxes(enc_states, 0, 1)
Beispiel #34
0
    def __call__(self, xi):
        hc0 = F.leaky_relu(self.c0(xi))
        hc1 = F.leaky_relu(self.bnc1(self.c1(hc0), test=not self.train))
        hc2 = F.leaky_relu(self.bnc2(self.c2(hc1), test=not self.train))
        hc3 = F.leaky_relu(self.bnc3(self.c3(hc2), test=not self.train))
        hc4 = F.leaky_relu(self.bnc4(self.c4(hc3), test=not self.train))
        hc5 = F.leaky_relu(self.bnc5(self.c5(hc4), test=not self.train))
        hc6 = F.leaky_relu(self.bnc6(self.c6(hc5), test=not self.train))
        hc7 = F.leaky_relu(self.bnc7(self.c7(hc6), test=not self.train))
        hc8 = F.leaky_relu(self.bnc8(self.c8(hc7), test=not self.train))

        h = F.expand_dims(hc8,2)
        h = F.relu(F.dropout(self.bndc00(self.dc00(h), test=not self.train), 0.5, train=self.train_dropout))
        hc7 = F.expand_dims(hc7,2)
        hc7 = F.broadcast_to(hc7, hc7.data.shape[:2]+(h.data.shape[2],)+hc7.data.shape[3:])
        h = F.concat((h,hc7),1)
        h = F.relu(F.dropout(self.bndc0(self.dc0(h), test=not self.train), 0.5, train=self.train_dropout))
        hc6 = F.expand_dims(hc6,2)
        hc6 = F.broadcast_to(hc6, hc6.data.shape[:2]+(h.data.shape[2],)+hc6.data.shape[3:])
        h = F.concat((h,hc6),1)
        h = F.relu(F.dropout(self.bndc1(self.dc1(h), test=not self.train), 0.5, train=self.train_dropout))
        hc5 = F.expand_dims(hc5,2)
        hc5 = F.broadcast_to(hc5, hc5.data.shape[:2]+(h.data.shape[2],)+hc5.data.shape[3:])
        h = F.concat((h,hc5),1)
        h = F.relu(self.bndc2(self.dc2(h), test=not self.train))
        hc4 = F.expand_dims(hc4,2)
        hc4 = F.broadcast_to(hc4, hc4.data.shape[:2]+(h.data.shape[2],)+hc4.data.shape[3:])
        h = F.concat((h,hc4),1)
        h = F.relu(self.bndc3(self.dc3(h), test=not self.train))
        hc3 = F.expand_dims(hc3,2)
        hc3 = F.broadcast_to(hc3, hc3.data.shape[:2]+(h.data.shape[2],)+hc3.data.shape[3:])
        h = F.concat((h,hc3),1)
        h = F.relu(self.bndc4(self.dc4(h), test=not self.train))
        hc2 = F.expand_dims(hc2,2)
        hc2 = F.broadcast_to(hc2, hc2.data.shape[:2]+(h.data.shape[2],)+hc2.data.shape[3:])
        h = F.concat((h,hc2),1)
        h = F.relu(self.bndc5(self.dc5(h), test=not self.train))
        hc1 = F.expand_dims(hc1,2)
        hc1 = F.broadcast_to(hc1, hc1.data.shape[:2]+(h.data.shape[2],)+hc1.data.shape[3:])
        h = F.concat((h,hc1),1)
        h = F.relu(self.bndc6(self.dc6(h), test=not self.train))
        hc0 = F.expand_dims(hc0,2)
        hc0 = F.broadcast_to(hc0, hc0.data.shape[:2]+(h.data.shape[2],)+hc0.data.shape[3:])
        h = F.concat((h,hc0),1)
        h = self.dc7(h)

        xi_ = F.expand_dims(xi,2)
        xi_ = F.broadcast_to(xi_, h.data.shape)

        h = F.sigmoid(h+xi_)
        return h
Beispiel #35
0
    def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
        pad = self._kernel_size - 1
        WX = self.W(X)[:, :, -pad - 1, None]
        Vh = self.V(ht_enc)

        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if self.contexts is None:
                ct = (1 - f) * z
                self.contexts = [ct]
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t - T]
            bias = 0 if skip_mask is None else softmax_bias[
                ..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[...,
                                                         None]  # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if self.H is None:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat(
                    (self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
Beispiel #36
0
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """
        
        g, x, y = F.broadcast(*[self.gamma, x, y])
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Beispiel #37
0
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """
        
        g, x, y = F.broadcast(*[self.gamma, x, y])
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        u = x_g_norm - 2 * x_g_y_g+ y_g_norm
        print(np.min(u.data))
        print(len((np.where(u.data < 0)[0])), np.prod(u.data.shape))
        time.sleep(0.5)
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
 def create_encoder_states_matrix(self, hs):
     batch_size, dim = hs[0].data.shape
     hs_3d = list(map(lambda h: F.expand_dims(h, 1), hs))  # [(batch_size, 1, dim)]
     hs_3d_concat = F.concat(hs_3d, axis=1)  # (batch_size, input_length, dim)
     hs_3d_concat_linear = self.decoder.phi2_linear(F.reshape(hs_3d_concat, (-1, dim)))  # (batch_size * input_length, dim)
     hs_3d_concat_linear_tanh = F.tanh(F.reshape(hs_3d_concat_linear, (batch_size, -1, dim)))     # (batch_size, input_length, dim)
     return hs_3d_concat_linear_tanh
Beispiel #39
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.expand_dims(x, self.axis)
     self.assertEqual(y.data.shape, self.out_shape)
     y_expect = numpy.expand_dims(cuda.to_cpu(x_data), self.axis)
     self.assertEqual(y.data.dtype, self.dtype)
     numpy.testing.assert_array_equal(cuda.to_cpu(y.data), y_expect)
Beispiel #40
0
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """

        g = F.broadcast_to(
            F.gaussian(
                np.array([0], dtype=np.float32),
                np.array([np.exp(1)], dtype=np.float32)), x.shape)
            
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Beispiel #41
0
def ordinal_loss(y, mask):
    xp = cuda.get_array_module(y.data)
    volatile = y.volatile
    b, c, n = y.data.shape
    max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape)
    y = y - max_y
    sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape)
    down_tri = np.tri(c, dtype=np.float32)
    up_tri = down_tri.T
    w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile)
    w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile)
    h = F.exp(F.expand_dims(y, -1))
    h1 = F.convolution_2d(h, w1)
    h1 = F.convolution_2d(F.log(h1), w1)
    h2 = F.convolution_2d(h, w2)
    h2 = F.convolution_2d(F.log(h2), w2)
    h = F.reshape(h1 + h2, (b, c, n))
    return F.sum((h - sum_y - y) * mask) / b
Beispiel #42
0
 def __call__(self, embeded_x, m_prev, h_prev, x):
     batch_size = embeded_x.shape[0]
     lstm_in = self.W(embeded_x) + self.U(h_prev)
     m_tmp, h_tmp = F.lstm(m_prev, lstm_in)
     # flags if feeding previous output
     feed_prev = F.broadcast_to(F.expand_dims(x.data != IGNORE_LABEL, -1),
                                (batch_size, self.hidden_size))
     m = F.where(feed_prev, m_tmp, m_prev)
     h = F.where(feed_prev, h_tmp, h_prev)
     return m, h
Beispiel #43
0
    def check_backward(self, x_data, y_grad):
        x = chainer.Variable(x_data)
        y = functions.expand_dims(x, self.axis)
        y.grad = y_grad
        y.backward()

        func = y.creator
        f = lambda: func.forward((x_data,))
        gx, = gradient_check.numerical_grad(f, (x_data,), (y_grad,))
        gradient_check.assert_allclose(cuda.to_cpu(x.grad),
                                       cuda.to_cpu(gx))
Beispiel #44
0
 def __call__(self, x1,x2):
     """Applies the linear layer.
     Args:
         x (~chainer.Variable): Batch of input vectors.
     Returns:
         ~chainer.Variable: Output of the linear layer.
     """
     batch_size = x.data.shape[0]
     #print batch_size
     batch_W = F.concat([F.expand_dims(self.W,0)] * batch_size,0)
     #print batch_W.data.shape
     return F.reshape(F.batch_matmul(x, batch_W),x.data.shape[:-1])
Beispiel #45
0
    def __call__(self, h, h_gen=None, test=False):
        # Concat
        if h_gen is None:
            h = h
        elif h_gen is not None:
            # Restrict Decoder with input image
            h_stacked = ()
            for i in range(h_gen.shape[1]):
                if np.random.randint(2) == 0:
                    h_stacked += (F.expand_dims(h[:, i, :, :], axis=1), )
                else:
                    h_stacked += (F.expand_dims(h_gen[:, i, :, :], axis=1), )
            h = F.concat(h_stacked)
                    
        h = self.deconv0(h)  # 7x7 -> 14x14
        h = self.bn0(h, test)
        h = self.act(h)

        h = self.deconv1(h)  # 14x14 -> 28x28
        h = F.tanh(h)
        return h
Beispiel #46
0
 def setUp(self):
     self.x1 = numpy.random.uniform(
         .5, 1, (batch_size, m, k)).astype(numpy.float32)
     self.x2 = numpy.random.uniform(
         .5, 1, (k, n)).astype(numpy.float32)
     self.gy = numpy.random.uniform(
         -1, 1, (batch_size, m, n)).astype(numpy.float32)
     self.op = lambda x, y: F.batch_matmul(
         x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n)))
     self.forward_answer = numpy.array([
         numpy.dot(self.x1[i], self.x2)
         for i in six.moves.range(batch_size)])
Beispiel #47
0
def batch_rodrigues(theta):
    """
    Theta is N x 3
    """
    batch_size = theta.shape[0]
    xp = theta.xp

    angle = F.expand_dims(F.sqrt(F.batch_l2_norm_squared(theta + 1e-8)), -1)
    r = F.expand_dims(theta / F.tile(angle, 3), -1)

    angle = F.expand_dims(angle, -1)
    cos = F.cos(angle)
    sin = F.sin(angle)
    cos = F.tile(cos, (3, 3))
    sin = F.tile(sin, (3, 3))

    outer = F.matmul(r, r, transb=True)

    eyes = F.tile(F.expand_dims(
        Variable(xp.array(xp.eye(3), 'f')), 0), (batch_size, 1, 1))
    R = cos * eyes + (1 - cos) * outer + sin * batch_skew(r, batch_size)
    return R
Beispiel #48
0
 def __call__(self, x, y):
     """
     Parameters
     -----------------
     x: Variable
         Feature of unlabeled samples.
     y: Variable
         Feature of unlabeled samples.
     """
     g = self.gamma ** 2
     z = F.expand_dims((x - y) ** 2, axis=0)
     o = F.exp(- F.linear(z, g))
     return o
Beispiel #49
0
 def __call__(self, y, m_prev, s_prev, h_forward, h_backword, enable, disable_value):
     # m is memory cell of lstm, s is previous hidden output
     # calculate attention
     c = self._attention(h_forward, h_backword, s_prev, enable, disable_value)
     # decode once
     embeded_y = self.E(y)
     batch_size = y.shape[0]
     lstm_in = self.W(embeded_y) + self.U(s_prev) + self.C(c)
     m_tmp, s_tmp = F.lstm(m_prev, lstm_in)
     feed_prev = F.broadcast_to(F.expand_dims(y.data != IGNORE_LABEL, -1),
                                (batch_size, self.hidden_size))
     m = F.where(feed_prev, m_tmp, m_prev)
     s = F.where(feed_prev, s_tmp, s_prev)
     t = self.U_o(s) + self.V_o(embeded_y) + self.C_o(c)
     return self.W_o(t), m, s
Beispiel #50
0
    def __call__(self, x_u_0, x_u_1):
        """
        Parameters
        -----------------
        x_u_0: Variable
            Feature of unlabeled samples.
        x_u_1: Variable
            Feature of unlabeled samples.

        """
        ffnn_u_0 = self.layers["ffnn_u_0"]
        ffnn_u_1 = self.layers["ffnn_u_1"]
        
        f_0 = F.softmax(ffnn_u_0(x_u_0))
        f_1 = F.softmax(ffnn_u_1(x_u_1))

        mid_outputs_0 = ffnn_u_0.mid_outputs
        mid_outputs_1 = ffnn_u_1.mid_outputs
        
        L = len(self.dims[1:])
        similarities = self.similarities.values()

        # Efficient computation
        ## sample similarity W^l summed over l
        W = 0
        for l in range(L):
            W += similarities[l](mid_outputs_0[l], mid_outputs_1[l])

        ## class similarity 
        f_0_norm = F.sum(f_0**2, axis=1)
        f_1_norm = F.sum(f_1**2, axis=1)
        f_0_f_1 = F.linear(f_0, f_1)
        f_0_norm, f_0_f_1, f_1_norm= \
                                      F.broadcast(
                                          *[f_0_norm,
                                            f_0_f_1,
                                            F.expand_dims(f_1_norm, 1)])
        F_ = f_0_norm - 2 * f_0_f_1 + f_1_norm
        print(np.max(F_.data))
        print(np.min(F_.data))
        print(len((np.where(F_.data < 0)[0])), np.prod(F_.data.shape))

        loss = F.sum(W * F_) / (self.batch_size * 2)

        self.loss = loss
        return loss
Beispiel #51
0
  def _context(self, p, fb_mat, fbe_mat):
    batch_size, source_length, _ = fb_mat.data.shape
    # {pe,e}_mat: shape = [batch * srclen, atten]
    pe_mat = F.reshape(
        F.broadcast_to(
            F.expand_dims(self.p_e(p), 1),
            [batch_size, source_length, self.atten_size]),
        [batch_size * source_length, self.atten_size])
    e_mat = F.tanh(fbe_mat + pe_mat)
    # a_mat: shape = [batch, srclen]
    a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length]))
    # q: shape = [batch, 2 * hidden]
    q = F.reshape(
        F.batch_matmul(a_mat, fb_mat, transa=True),
        [batch_size, 2 * self.hidden_size])

    return q
Beispiel #52
0
    def _attention(self, h_forward, h_backword, s, enable, disable_value):
        batch_size = s.shape[0]
        sentence_size = len(h_forward)
        hidden_size = self.hidden_size
        xp = self.xp

        weighted_s = F.broadcast_to(F.expand_dims(self.W_a(s), axis=1),
                                    (batch_size, sentence_size, hidden_size))
        h = F.concat((F.concat(h_forward, axis=0), F.concat(h_backword, axis=0)))
        weighted_h = F.reshape(self.U_a(h), (batch_size, sentence_size, hidden_size))

        e = self.v_a(F.reshape(F.tanh(weighted_s + weighted_h),
                               (batch_size * sentence_size, hidden_size)))
        e = F.where(enable, F.reshape(e, (batch_size, sentence_size)), disable_value)
        alpha = F.softmax(e)
        c = F.batch_matmul(F.reshape(h, (batch_size, 2 * hidden_size, sentence_size)), alpha)
        return F.reshape(c, (batch_size, 2 * hidden_size))
Beispiel #53
0
    def proportions(self, doc_ids, softmax=False):
        """ Given an array of document indices, return a vector
        for each document of just the unnormalized topic weights.

        Returns:
            doc_weights : chainer.Variable
                Two dimensional topic weights of each document.
        """
        w = self.weights(doc_ids)
        if softmax:
            size = w.data.shape
            mask = self.xp.random.random_integers(0, 1, size=size)
            y = (F.softmax(w * self.temperature) *
                 Variable(mask.astype('float32')))
            norm, y = F.broadcast(F.expand_dims(F.sum(y, axis=1), 1), y)
            return y / (norm + 1e-7)
        else:
            return w
Beispiel #54
0
	def __call__(self, x):
		xp = chainer.cuda.get_array_module(x.data)
		batchsize = x.shape[0]
		if self.train_weights == False and self.initial_T is not None:
			self.T.W.data = self.initial_T

		M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
		M = F.expand_dims(M, 3)
		M_T = F.transpose(M, (3, 1, 2, 0))
		M, M_T = F.broadcast(M, M_T)

		norm = F.sum(abs(M - M_T), axis=2)
		eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
		c_b = F.exp(-(norm + 1e6 * eraser))
		o_b = F.sum(c_b, axis=2)

		if self.train_weights == False:
			self.initial_T = self.T.W.data

		return F.concat((x, o_b), axis=1)
 def _calc_rpn_loss_bbox(self, rpn_bbox_pred, bbox_reg_targets, inds_inside):
     # rpn_bbox_pred has the shape of (1, 4 x n_anchors, feat_h, feat_w)
     n_anchors = self.proposal_layer._num_anchors
     # Reshape it into (4, A, K)
     rpn_bbox_pred = rpn_bbox_pred.reshape(4, n_anchors, -1)
     # Transpose it into (K, A, 4)
     rpn_bbox_pred = rpn_bbox_pred.transpose(2, 1, 0)
     # Reshape it into (K x A, 4)
     rpn_bbox_pred = rpn_bbox_pred.reshape(-1, 4)
     # Keep the number of bbox
     n_bbox = rpn_bbox_pred.shape[0]
     # Select bbox and ravel it
     rpn_bbox_pred = F.flatten(rpn_bbox_pred[inds_inside])
     # Create batch dimension
     rpn_bbox_pred = F.expand_dims(rpn_bbox_pred, 0)
     # Ravel the targets and create batch dimension
     bbox_reg_targets = bbox_reg_targets.ravel()[None, :]
     # Calc Smooth L1 Loss (When delta=1, huber loss is SmoothL1Loss)
     rpn_loss_bbox = F.huber_loss(rpn_bbox_pred, bbox_reg_targets,
                                  self._delta)
     rpn_loss_bbox /= n_bbox
     return rpn_loss_bbox.reshape(())
Beispiel #56
0
    def __call__(self, beta, theta, get_skin=False, with_a=False):
        batch_size = beta.shape[0]

        # 1. Add shape blend shapes
        # (N x 10) x (10 x 6890*3) = N x 6890 x 3
        self.beta_shapedirs = F.matmul(beta, self.shapedirs)
        v_shaped = F.reshape(
            F.matmul(beta, self.shapedirs),
            [-1, self.size[0], self.size[1]]) + \
            F.repeat(self.v_template[None, ], batch_size, axis=0)
        self.v_shaped = v_shaped

        # 2. Infer shape-dependent joint locations.
        Jx = F.matmul(v_shaped[:, :, 0], self.J_regressor)
        Jy = F.matmul(v_shaped[:, :, 1], self.J_regressor)
        Jz = F.matmul(v_shaped[:, :, 2], self.J_regressor)
        J = F.stack([Jx, Jy, Jz], axis=2)

        self.J = J

        # 3. Add pose blend shapes
        # N x 24 x 3 x 3
        Rs = F.reshape(
            batch_rodrigues(F.reshape(theta, [-1, 3])), [-1, 24, 3, 3])
        self.Rs = Rs
        # Ignore global rotation.
        pose_feature = F.reshape(Rs[:, 1:, :, :] -
                                 F.repeat(F.repeat(Variable(self.xp.array(self.xp.eye(3), 'f'))[
                                          None, ], 23, axis=0)[None, ], batch_size, axis=0),
                                 [-1, 207])
        self.pose_feature = pose_feature

        # (N x 207) x (207, 20670) -> N x 6890 x 3
        v_posed = F.reshape(
            F.matmul(pose_feature, self.posedirs),
            [-1, self.size[0], self.size[1]]) + v_shaped

        # 4. Get the global joint location
        self.J_transformed, A = batch_global_rigid_transformation(
            Rs, J, self.parents)

        # 5. Do skinning:
        # W is N x 6890 x 24
        W = F.reshape(
            F.tile(self.weights, (batch_size, 1)), [batch_size, -1, 24])
        # (N x 6890 x 24) x (N x 24 x 16)
        T = F.reshape(
            F.matmul(W, F.reshape(A, [batch_size, 24, 16])),
            [batch_size, -1, 4, 4])
        v_posed_homo = F.concat(
            [v_posed, self.xp.ones([batch_size, v_posed.shape[1], 1], 'f')], 2)
        v_homo = F.matmul(T, F.expand_dims(v_posed_homo, -1))

        verts = v_homo[:, :, :3, 0]

        # Get cocoplus or lsp joints:
        joint_x = F.matmul(verts[:, :, 0], self.joint_regressor)
        joint_y = F.matmul(verts[:, :, 1], self.joint_regressor)
        joint_z = F.matmul(verts[:, :, 2], self.joint_regressor)
        joints = F.stack([joint_x, joint_y, joint_z], axis=2)

        return verts, joints, Rs, A
Beispiel #57
0
def logsoftmax_no_mask(x, mask, zero_pad, axis):
    x_logsumexp = logsumexp(x, mask, zero_pad, axis)

    return x - F.broadcast_to(F.expand_dims(x_logsumexp, 1), x.shape)
Beispiel #58
0
 def test_invalid_dim(self):
     x = chainer.Variable(self.x)
     with self.assertRaises(chainer.utils.type_check.InvalidType):
         functions.expand_dims(x, self.x.ndim + 1)
     with self.assertRaises(chainer.utils.type_check.InvalidType):
         functions.expand_dims(x, -self.x.ndim - 2)
Beispiel #59
0
    def __call__(self, h, h_new, g, step=0):
        """
        Describes the module for a single layer update.
        Do not forget to rest GRU for each batch...

        :param h: minibatch by num_nodes by hidden_dim numpy array.
                current local node hidden states as input of the vanilla GNN
        :param h_new: minibatch by num_nodes by hidden_dim numpy array.
                updated local node hidden states as output from the vanilla GNN
        :param adj: minibatch by bond_types by num_nodes by num_nodes 1/0 array.
                Adjacency matrices over several bond types
        :param g: minibatch by hidden_dim_super numpy array.
                current super node hiddden state
        :param step: integer, the layer index
        :return: updated h and g
        """

        xp = self.xp

        # (minibatch, atom, ch)
        mb, atom, ch = h.shape
        out_ch = ch

        #
        # Transmitter unit: inter-module message passing
        #
        
        # non linear update of the super node
        g_new = functions.relu(self.F_super[step](g))

        # original --> super transmission

        h1 = functions.expand_dims(h, 2)
        #assert h1.shape == (mb, atom, 1, ch)
        h1 = functions.broadcast_to(h1, [mb, atom, self.n_heads, ch])
        h1 = functions.reshape(h1, [mb, atom, self.n_heads* ch])
        #assert h1.shape==(mb, atom, self.n_heads * ch)
        h_j = functions.expand_dims(h, 1)
        h_j = functions.broadcast_to(h_j, (mb, self.n_heads, atom, ch))
        #assert h_j.shape==(mb, self.n_heads, atom, ch)

        # expand h_super
        g_extend = functions.expand_dims(g, 1)
        # assert g_extend.shape==(mb, 1, self.hidden_dim_super)
        g_extend = functions.broadcast_to(g_extend, (mb, self.n_heads, self.hidden_dim_super))
        # assert g_extend.shape==(mb, self.n_heads, self.hidden_dim_super)
        g_extend = functions.expand_dims(g_extend, 2)
        # assert g_extend.shape==(mb, self.n_heads, 1, self.hidden_dim_super)

        # update for attention-message B h_i
        # mb, atom, n_heads * ch
        Bh_i = self.B[step](h1)
        # assert Bh_i.shape==(mb, atom, self.n_heads * self.hidden_dim_super)
        # mb, atom, num_head, ch
        Bh_i = functions.reshape(Bh_i, [mb, atom, self.n_heads, self.hidden_dim_super])
        # mb, num_head, atom, ch
        Bh_i = functions.transpose(Bh_i, [0, 2, 1, 3])
        # assert Bh_i.shape==(mb, self.n_heads, atom, self.hidden_dim_super)

        # take g^{T} * B * h_i
        # indexed by i
        # mb, self.n_haeds atom(i)
        b_hi = functions.matmul(g_extend, Bh_i, transb=True)  # This will reduce the last hidden_dim_super axis
        # assert b_hi.shape==(mb, self.n_heads, 1, atom)

        # softmax. sum/normalize over the last axis.
        # mb, self.n_heda, atom(i-normzlied)
        attention_i = functions.softmax(b_hi, axis=3)
        if self.dropout_ratio > 0.0:
            attention_i = functions.dropout(attention_i,
                                            ratio=self.dropout_ratio)
        # assert attention_i.shape==(mb, self.n_heads, 1, atom)

        # element-wise product --> sum over i
        # mb, num_head, hidden_dim_super
        attention_sum = functions.matmul(attention_i, h_j)
        # assert attention_sum.shape==(mb, self.n_heads, 1, ch)
        attention_sum = functions.reshape(attention_sum, (mb, self.n_heads * ch))
        # assert attention_sum.shape==(mb, self.n_heads * ch)

        # weighting h for different heads
        h_trans = self.V_super[step](attention_sum)
        # assert intermediate_h.shape==(mb, self.n_heads * ch)
        # compress heads
        h_trans = self.W_super[step](h_trans)
        h_trans = functions.tanh(h_trans)
        # assert intermediate_h.shape==(mb, self.hidden_dim_super)


        # g_trans: super --> original transmission

        # for local updates
        g_trans = self.F_super[step](g)
        g_trans = functions.tanh(g_trans)
        # assert intermediate_h_super.shape==(mb, self.hidden_dim)
        g_trans = functions.expand_dims(g_trans, 1)
        # assert intermediate_h_super.shape==(mb, 1, self.hidden_dim)
        g_trans = functions.broadcast_to(g_trans, (mb, atom, self.hidden_dim))
        # assert intermediate_h_super.shape==(mb, atom, self.hidden_dim)


        #
        # Warp Gate unit
        #
        z_local = self.H_local[step](h_new) + self.G_local[step](g_trans)
        z_local = functions.broadcast_to(z_local, (mb, atom, self.hidden_dim))
        if self.dropout_ratio > 0.0:
            z_local = functions.dropout(z_local,ratio=self.dropout_ratio)
        z_local = functions.sigmoid(z_local)
        merged_h = (1.0-z_local) * h_new + z_local * g_trans
        # assert new_h.shape==(mb, atom, ch)

        z_super = self.H_super[step](h_trans) + self.G_super[step](g_new)
        z_super = functions.broadcast_to(z_super, (mb, self.hidden_dim_super))
        if self.dropout_ratio > 0.0:
            z_super = functions.dropout(z_super,ratio=self.dropout_ratio)
        z_super = functions.sigmoid(z_super)
        merged_g = (1.0-z_super) * h_trans + z_super * g_new
        # assert out_h_super.shape==(mb, self.hidden_dim_super)

        #
        # Self recurrent
        #
        out_h = functions.reshape(merged_h, (mb * atom, self.hidden_dim))
        out_h = self.GRU_local(out_h)
        out_h = functions.reshape(out_h, (mb, atom, self.hidden_dim))

        out_g = self.GRU_super(merged_g)

        return out_h, out_g
Beispiel #60
0
 def check_backward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.expand_dims(x, self.axis)
     y.grad = y.data
     y.backward()
     testing.assert_allclose(x.data, x.grad, atol=0, rtol=0)