Esempio n. 1
0
    def encode(self, x_input, x_query, answer):
        m = self.encode_input(x_input)
        u = self.encode_query(x_query)

#        print "m.data.shape", m.data.shape
#        print "u.data.shape", u.data.shape
        mu = functions.matmul(m, u, transb=True)
        # print "mu.data.shape", mu.data.shape
        # print "mu.data",  mu.data
        p = functions.softmax(mu)
#        print p.data
        c = self.encode_output(x_input)
        # print "p.data.shape:", p.data.shape
        # print "c.data.shape:", c.data.shape
#        print c.data.shape		#(3,50)
#        print "functions.swapaxes(c ,1, 1):", functions.swapaxes(c ,1, 1).data.shape
        o = functions.matmul(functions.swapaxes(c ,1, 0), p)	 #転置して、内積とる		(2, 50, 1)
        o = functions.swapaxes(o ,1, 0) # (2, 50)	
#        print "u.data.shape:", u.data.shape
#        print "o.data.shape:", o.data.shape
#        print "u.data:", u.data
#        print "o.data:", o.data
#        print "(u+o).data.shape:", (u+o).data.shape
        predict = self.W(u + o)
        loss = functions.softmax_cross_entropy(predict, answer)
        return loss
Esempio n. 2
0
def forward(x, p, a, A=None,P=None):

    conv1_1, conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': x}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'])
    conv1_1F,conv2_1F, conv3_1F, conv4_1F,conv5_1F, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]]
    conv1_1G,conv2_1G, conv3_1G, conv4_1G,conv5_1G, = [ Fu.matmul(x, x, transa=False, transb=True) for x in [conv1_1F,conv2_1F, conv3_1F, conv4_1F,conv5_1F]]
    
    # Because P an A is not change over iteration, it's better to calcurate onece.
    if A is None and B is None:
        #compute matrix P 
        conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': p}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'])
        conv1_1P,conv2_1P, conv3_1P, conv4_1P,conv5_1P, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]]
        #compute matrix A
        conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': a}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'])
        conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]]
        conv1_1A,conv2_1A, conv3_1A, conv4_1A,conv5_1A, = [ Fu.matmul(x, x, transa=False, transb=True) for x in [conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0]]
    else:
        conv1_1P,conv2_1P, conv3_1P, conv4_1P,conv5_1P,=P
        conv1_1A,conv2_1A, conv3_1A, conv4_1A,conv5_1A,=A

    L_content = Fu.mean_squared_error(conv4_1F,conv4_1P)/2

    #caution! the deviding number is hard coding!
    #this part is correspnding to equation (4) in the original paper
    #to check the current N and M, run the following
    #[x.data.shape  for x in [conv1_1F,conv2_1F, conv3_1F, conv4_1F,conv5_1F]]
    L_style = (Fu.mean_squared_error(conv1_1G,conv1_1A)/(4*64*64*50176*50176)
    + Fu.mean_squared_error(conv2_1G,conv2_1A)/(4*128**128*12544*12544)
    + Fu.mean_squared_error(conv3_1G,conv3_1A)/(4*256*256*3136*3136)
    + Fu.mean_squared_error(conv4_1G,conv4_1A)/(4*512*512*784*784)\
    )/4 # this is equal weighting of E_l

    loss = a_p_ratio*L_content + L_style
    return loss 
Esempio n. 3
0
    def calc_loss(self, sys_ys, ref_ys, dists):
        loss = wrapper.make_var([[0.0]])
        sys_Tscore = wrapper.make_var([[0.0]])
        ref_Tscore = wrapper.make_var([[0.0]])
        
        sys_Tscore, sys_vecs = self.calc_trans_score(sys_ys) #chainer.Variable, 1hotvecがconcateされたもの
        sys_matrix = wrapper.make_var([sys_vecs])

        ref_Tscore, ref_vecs = self.calc_trans_score(ref_ys) #chainer.Variable, 1hotvec
        ref_matrix = wrapper.make_var([ref_vecs])

        dists_matrix = functions.concat(tuple(dists))

        #異なるラベル数のカウント
        diff_cnt = wrapper.make_var([[0.0]])
        for sys_y, ref_y in zip(sys_ys, ref_ys):
            if sys_y != ref_y:
                diff_cnt += wrapper.make_var([[1.0]])

        #max 0
        loss = functions.matmul(sys_matrix, dists_matrix, transb=True) + sys_Tscore\
               - functions.matmul(ref_matrix, dists_matrix, transb=True) - ref_Tscore\
               + self.__eta * diff_cnt

        """
        debug
        print("sys_score trans : ", wrapper.get_data(functions.matmul(sys_matrix, dists_matrix, transb=True)), wrapper.get_data(sys_Tscore))
        print("ref_score trans : ", wrapper.get_data(functions.matmul(ref_matrix, dists_matrix, transb=True)), wrapper.get_data(ref_Tscore))
        print("diff_cnt penal : ",wrapper.get_data(diff_cnt), wrapper.get_data(self.__eta * diff_cnt))
        """

        return loss
Esempio n. 4
0
    def __call__(self, hx, cx, xs, enc_hs):
        xs_embed = [self.embed(x) for x in xs]
        hy, cy, ys = self.Nlstm(hx, cx, xs_embed)

        ys_pad = F.pad_sequence(ys, length=None, padding=0.0)
        enc_hs = F.pad_sequence(enc_hs, length=None, padding=0.0)

        mask = self.xp.all(enc_hs.data == 0, axis=2, keepdims=True)
        mask_num = self.xp.full(mask.shape, -1024.0, dtype=self.xp.float32)

        alignment = []
        decode = []

        ys_pad = F.transpose(ys_pad, (1, 0, 2))
        for y in ys_pad:
            y = F.reshape(y, (*y.shape, 1))
            score = F.matmul(enc_hs, y)
            score = F.where(mask, mask_num, score)
            align = F.softmax(score, axis=1)
            context_vector = F.matmul(enc_hs, align, True, False)
            t = self.W_c(
                F.dropout(F.concat((y, context_vector), axis=1), self.dropout))
            ys_proj = self.proj(F.dropout(t, self.dropout))
            alignment.append(F.reshape(align, (len(xs), -1)))
            decode.append(ys_proj)

        decode = F.stack(decode, axis=1)
        alignment = F.stack(alignment, axis=1)
        return hy, cy, decode, alignment.data
Esempio n. 5
0
    def __call__(self, x1, x2):
        # inputs: x1 = [x1_1 ... x1_i ... x1_n1]; dim(x1_i)=d1=left_size
        #         x2 = [x2_1 ... x2_j ... x2_n2]; dim(x2_j)=d2=right_size
        # output: o_ij = x1_i * W * x2_j + x2_j * U + b

        n1 = x1.shape[0]
        n2 = x2.shape[0]
        x2T = F.transpose(x2)
        x1_W = F.matmul(x1, self.W)  # (n1, d1) * (d1, d2) => (n1, d2)
        res = F.matmul(x1_W, x2T)  # (n1, d2) * (d2, n2) => (n1, n2)

        if self.U is not None:
            x1_U = F.broadcast_to(
                F.matmul(x1, self.U),
                (n1, n2))  # (n1, d1) * (d1, 1)  => (n1, 1) -> (n1, n2)
            # print('x1*U', x1_U.shape)
            res = res + x1_U

        if self.V is not None:  # TODO fix
            V_x2 = F.broadcast_to(
                F.matmul(self.V, x2T),
                (n1, n2))  # (1, d2) * (d2, n2) => (1, n2) -> (n1, n2)
            res = res + V_x2

        if self.b is not None:
            b = F.broadcast_to(self.b, (n1, n2))
            res = res + b

        return res
Esempio n. 6
0
 def __call__(self, a_list, b_list, a_mask, b_mask, knowledge):
     # a_list: Question
     # b_list: Story text
     ya_ori = self.input_encoding(self.input_lstm_a, a_list, a_mask)
     yb_ori = self.input_encoding(self.input_lstm_b, b_list, b_mask)
     alpha, _ = self.make_alpha(
         ya_ori, yb_ori, a_mask,
         knowledge)  # (minibatch, maxlen(a_list), maxlen(b_list))
     beta, beta_r = self.make_alpha(
         yb_ori, ya_ori, b_mask, xp.swapaxes(
             knowledge, axis1=1,
             axis2=2))  # (minibatch, maxlen(b_list), maxlen(a_list))
     ya_con, yb_con = self.kec(ya_ori, yb_ori, alpha, beta)
     # ya_loc = self.kelic(ya_ori, ya_con)
     yb_loc = self.kelic(yb_ori, yb_con, beta_r)
     h_start = self.modeling(self.modeling_start_lstm, yb_loc, b_mask)
     h_end = self.modeling(self.modeling_end_lstm, h_start, b_mask)
     batchsize, _, hidden_size = F.concat((yb_loc, h_start), axis=2).shape
     system_start = F.matmul(F.broadcast_to(self.W1,
                                            (batchsize, 1, hidden_size)),
                             F.concat((yb_loc, h_start), axis=2),
                             transb=True).reshape(batchsize, -1)
     system_end = F.matmul(F.broadcast_to(self.W2,
                                          (batchsize, 1, hidden_size)),
                           F.concat((yb_loc, h_end), axis=2),
                           transb=True).reshape(batchsize, -1)
     return system_start, system_end
Esempio n. 7
0
    def forward(self, e_var, s_var=None, mask=None, batch=1):
        """Core function of the Multi-head attention layer.

        Args:
            e_var (chainer.Variable): Variable of input array.
            s_var (chainer.Variable): Variable of source array from encoder.
            mask (chainer.Variable): Attention mask.
            batch (int): Batch size.

        Returns:
            chainer.Variable: Outout of multi-head attention layer.

        """
        xp = self.xp
        if s_var is None:
            # batch, head, time1/2, d_k)
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(e_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(e_var).reshape(batch, -1, self.h, self.d_k)
        else:
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(s_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(s_var).reshape(batch, -1, self.h, self.d_k)
        scores = F.matmul(F.swapaxes(Q, 1, 2), K.transpose(
            0, 2, 3, 1)) / np.sqrt(self.d_k)
        if mask is not None:
            mask = xp.stack([mask] * self.h, axis=1)
            scores = F.where(mask, scores, xp.full(scores.shape, MIN_VALUE,
                                                   'f'))
        self.attn = F.softmax(scores, axis=-1)
        p_attn = F.dropout(self.attn, self.dropout)
        x = F.matmul(p_attn, F.swapaxes(V, 1, 2))
        x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k)
        return self.linear_out(x)
Esempio n. 8
0
 def evaluate_actions(self, actions):
     u_minus_mu = actions - self.mu
     a = - 0.5 * \
         F.matmul(F.matmul(
             u_minus_mu[:, None, :], self.mat),
             u_minus_mu[:, :, None])[:, 0, 0]
     return a + F.reshape(self.v, (self.batch_size, ))
Esempio n. 9
0
    def calc_loss(self, sys_ys, ref_ys, dists):
        loss = wrapper.make_var([[0.0]])
        sys_Tscore = wrapper.make_var([[0.0]])
        ref_Tscore = wrapper.make_var([[0.0]])

        sys_Tscore, sys_vecs = self.calc_trans_score(
            sys_ys)  #chainer.Variable, 1hotvecがconcateされたもの
        sys_matrix = wrapper.make_var([sys_vecs])

        ref_Tscore, ref_vecs = self.calc_trans_score(
            ref_ys)  #chainer.Variable, 1hotvec
        ref_matrix = wrapper.make_var([ref_vecs])

        dists_matrix = functions.concat(tuple(dists))

        #異なるラベル数のカウント
        diff_cnt = wrapper.make_var([[0.0]])
        for sys_y, ref_y in zip(sys_ys, ref_ys):
            if sys_y != ref_y:
                diff_cnt += wrapper.make_var([[1.0]])

        #max 0
        loss = functions.matmul(sys_matrix, dists_matrix, transb=True) + sys_Tscore\
               - functions.matmul(ref_matrix, dists_matrix, transb=True) - ref_Tscore\
               + self.__eta * diff_cnt
        """
        debug
        print("sys_score trans : ", wrapper.get_data(functions.matmul(sys_matrix, dists_matrix, transb=True)), wrapper.get_data(sys_Tscore))
        print("ref_score trans : ", wrapper.get_data(functions.matmul(ref_matrix, dists_matrix, transb=True)), wrapper.get_data(ref_Tscore))
        print("diff_cnt penal : ",wrapper.get_data(diff_cnt), wrapper.get_data(self.__eta * diff_cnt))
        """

        return loss
Esempio n. 10
0
 def forward(self, x1, x2):
     xp = self.xp
     out_size = self.out_size
     batch_size, n1, d1 = x1.shape
     if not self.nobias[0]:
         x1 = F.concat((x1, xp.ones((batch_size, n1, 1), xp.float32)),
                       axis=2)
         d1 += 1
     n2, d2 = x2.shape[1:]
     if not self.nobias[1]:
         x2 = F.concat((x2, xp.ones((batch_size, n2, 1), xp.float32)),
                       axis=2)
         d2 += 1
     # (B * n1, d1) @ (d1, O * d2) => (B * n1, O * d2)
     x1W = F.matmul(
         F.reshape(x1, (batch_size * n1, d1)),
         F.reshape(F.transpose(self.W, (0, 2, 1)), (d1, out_size * d2)))
     # (B, n1 * O, d2) @ (B, d2, n2) => (B, n1 * O, n2)
     x1Wx2 = F.matmul(F.reshape(x1W, (batch_size, n1 * out_size, d2)),
                      x2,
                      transb=True)
     # => (B, n1, n2, O)
     y = F.transpose(F.reshape(x1Wx2, (batch_size, n1, out_size, n2)),
                     (0, 1, 3, 2))
     assert y.shape == (batch_size, n1, n2, out_size)
     if not self.nobias[2]:
         y += F.broadcast_to(self.b, y.shape)
     return y
Esempio n. 11
0
    def encode_decode_train(self, in_word_list, out_word_list, train=True):
        xp = cuda.cupy if self.gpuid >= 0 else np
        self.reset_state()
        # Add GO_ID, EOS_ID to decoder input
        decoder_word_list = [GO_ID] + out_word_list + [EOS_ID]
        # encode list of words/tokens
        enc_states = self.encode_list(in_word_list, train=train)
        # initialize decoder LSTM to final encoder state
        self.set_decoder_state()
        # decode and compute loss
        if not train:
            with chainer.no_backprop_mode():
                # convert list of tokens into chainer variable list
                var_dec = (Variable(
                    xp.asarray(decoder_word_list, dtype=np.int32).reshape(
                        (-1, 1))))
                # Initialise first decoded word to GOID
                pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32))
        else:
            # convert list of tokens into chainer variable list
            var_dec = (Variable(
                xp.asarray(decoder_word_list, dtype=np.int32).reshape(
                    (-1, 1))))
            # Initialise first decoded word to GOID
            pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32))

        # compute loss
        self.loss = 0
        # decode tokens
        for next_word_var in var_dec[1:]:
            self.decode(pred_word, train=train)
            if self.attn == NO_ATTN:
                predicted_out = self.out(self[self.lstm_dec[-1]].h)
            else:
                #Add attention
                dot_score = F.matmul(enc_states,
                                     self[self.lstm_dec[-1]].h,
                                     transb=True)
                alpha_list = F.softmax(F.transpose(dot_score))
                context_vector = F.matmul(alpha_list, enc_states)
                concat_vector = F.concat(
                    (self[self.lstm_dec[-1]].h, context_vector), axis=1)
                predicted_out = self.out(self.attention_context(concat_vector))

            # compute loss
            prob = F.softmax(predicted_out)

            pred_word = self.select_word(prob, train=train, sample=False)
            '''
            ___QUESTION-1-DESCRIBE-E-START___
            Explain what loss is computed with an example
            What does this value mean?
            '''
            self.loss += F.softmax_cross_entropy(predicted_out, next_word_var)
            '''___QUESTION-1-DESCRIBE-E-END___'''

        report({"loss": self.loss}, self)

        return self.loss
Esempio n. 12
0
def calc_score(labels, ts, dists):
    score = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    T_labels = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    T_ts = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    # make labels vector and labels transitions vector
    labels_vec = list()
    pre_label = None
    for label in labels:
        if not pre_label == None:
            T_labels += trans2para(pre_label, label)
        for i in range(label_num):
            if i == label:
                labels_vec.append(1)
            else:
                labels_vec.append(0)
        pre_label = label
    labels_matrix = make_chainer_matrix(labels_vec)

    # make true labels vector
    ts_vec = list()
    pre_label = None
    for t in ts:
        if not pre_label == None:
            T_ts += trans2para(pre_label, t)
        for i in range(label_num):
            if i == t:
                ts_vec.append(1)
            else:
                ts_vec.append(0)
        pre_label = t
    ts_matrix = make_chainer_matrix(ts_vec)
    dists_matrix = F.concat(tuple(dists))

    #print(ts_vec)
    #print(labels_vec)
    #print(len(labels_matrix.data[0]))
    #print(len(ts_matrix.data[0]))
    #print(len(dists_matrix.data[0]))

    # make loss (difference between y_hat and y)
    diff_cnt = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    for i in range(len(labels)):
        if labels[i] != ts[i]:
            diff_cnt += chainer.Variable(np.array([[1.0]], dtype=np.float32))
            correct = get_onehot(ts[i])
            #print()
            #print(dists[i].data)
            #print(correct.data)
            #diff_cnt += F.softmax_cross_entropy(dists[i], correct)

    predict_score = F.matmul(labels_matrix, dists_matrix,
                             transb=True) + T_labels
    true_score = F.matmul(ts_matrix, dists_matrix, transb=True) + T_ts

    score = predict_score - true_score + eta * diff_cnt
    #print('predict_score:', predict_score.data)
    #print('true_score:', true_score.data)
    #print('loss:', eta * diff_cnt.data)
    return score
Esempio n. 13
0
def batch_global_rigid_transformation(Rs, Js, parent, rotate_base=False):
    """
    Computes absolute joint locations given pose.

    rotate_base: if True, rotates the global rotation by 90 deg in x axis.
    if False, this is the original SMPL coordinate.

    Args:
      Rs: N x 24 x 3 x 3 rotation vector of K joints
      Js: N x 24 x 3, joint locations before posing
      parent: 24 holding the parent id for each index

    Returns
      new_J : `Tensor`: N x 24 x 3 location of absolute joints
      A     : `Tensor`: N x 24 4 x 4 relative joint transformations for LBS.
    """
    xp = Rs.xp
    N = Rs.shape[0]
    if rotate_base:
        print('Flipping the SMPL coordinate frame!!!!')
        rot_x = Variable([[1, 0, 0], [0, -1, 0], [0, 0, -1]], dtype=Rs.dtype)
        rot_x = F.reshape(F.tile(rot_x, [N, 1]), [N, 3, 3])
        root_rotation = F.matmul(Rs[:, 0, :, :], rot_x)
    else:
        root_rotation = Rs[:, 0, :, :]

    # Now Js is N x 24 x 3 x 1
    Js = F.expand_dims(Js, -1)

    def make_A(R, t, name=None):
        # Rs is N x 3 x 3, ts is N x 3 x 1
        R_homo = F.pad(R, [[0, 0], [0, 1], [0, 0]], 'constant')
        t_homo = F.concat([t, xp.ones([N, 1, 1], 'f')], 1)
        return F.concat([R_homo, t_homo], 2)

    A0 = make_A(root_rotation, Js[:, 0])
    results = [A0]
    for i in range(1, parent.shape[0]):
        j_here = Js[:, i] - Js[:, parent[i]]
        A_here = make_A(Rs[:, i], j_here)
        res_here = F.matmul(results[parent[i]], A_here)
        results.append(res_here)

    # 10 x 24 x 4 x 4
    results = F.stack(results, axis=1)

    new_J = results[:, :, :3, 3]

    # --- Compute relative A: Skinning is based on
    # how much the bone moved (not the final location of the bone)
    # but (final_bone - init_bone)
    # ---
    Js_w0 = F.concat([Js, xp.zeros([N, 24, 1, 1], 'f')], 2)
    init_bone = F.matmul(results, Js_w0)
    # Append empty 4 x 3:
    init_bone = F.pad(init_bone, [[0, 0], [0, 0], [0, 0], [3, 0]], 'constant')
    A = results - init_bone

    return new_J, results
Esempio n. 14
0
    def __call__(self, h, g, step=0):
        mb, atom, ch = h.shape

        h_j = functions.expand_dims(h, 1)
        # h_j.shape == (mb, self.n_heads, atom, ch)
        h_j = functions.broadcast_to(h_j, (mb, self.n_heads, atom, ch))

        # expand h_super
        # g_extend.shape (mb, 1, self.hidden_dim_super)
        g_extend = functions.expand_dims(g, 1)
        # g_extend.shape == (mb, self.n_heads, self.hidden_dim_super)
        g_extend = functions.broadcast_to(
            g_extend, (mb, self.n_heads, self.hidden_dim_super))
        # g_extend.shape == (mb, self.n_heads, 1, self.hidden_dim_super)
        g_extend = functions.expand_dims(g_extend, 2)

        # update for attention-message B h_i
        # h (mb, atom, ch)
        # Bh_i.shape == (mb, atom, self.n_heads * self.hidden_dim_super)
        Bh_i = self.B(h)
        # Bh_i.shpae == (mb, atom, num_head, ch)
        Bh_i = functions.reshape(
            Bh_i, (mb, atom, self.n_heads, self.hidden_dim_super))
        # Bh_i.shape == (mb, num_head, atom, ch)
        Bh_i = functions.transpose(Bh_i, [0, 2, 1, 3])

        # take g^{T} * B * h_i
        # indexed by i
        # mb, self.n_haeds atom(i)
        # b_hi.shape == (mb, self.n_heads, 1, atom)
        # This will reduce the last hidden_dim_super axis
        b_hi = functions.matmul(g_extend, Bh_i, transb=True)

        # softmax. sum/normalize over the last axis.
        # mb, self.n_heda, atom(i-normzlied)
        # attention_i.shape == (mb, self.n_heads, 1, atom)
        attention_i = functions.softmax(b_hi, axis=3)
        if self.dropout_ratio > 0.0:
            attention_i = functions.dropout(attention_i,
                                            ratio=self.dropout_ratio)

        # element-wise product --> sum over i
        # mb, num_head, hidden_dim_super
        # attention_sum.shape == (mb, self.n_heads, 1, ch)
        attention_sum = functions.matmul(attention_i, h_j)
        # attention_sum.shape == (mb, self.n_heads * ch)
        attention_sum = functions.reshape(attention_sum,
                                          (mb, self.n_heads * ch))

        # weighting h for different heads
        # intermediate_h.shape == (mb, self.n_heads * ch)
        # TODO (nakago): Consider to delete `V_super` maybe not necessary.
        # TODO (nakago): Consider to move `V_super` to calculate `h_j`??
        h_trans = self.V_super(attention_sum)
        # compress heads
        h_trans = self.W_super(h_trans)
        # intermediate_h.shape == (mb, self.hidden_dim_super)
        h_trans = self.activation(h_trans)
        return h_trans
Esempio n. 15
0
    def test_invalid_shape(self):
        x_data = numpy.zeros((2, 3, 4), dtype=numpy.float32)
        y_data = numpy.zeros((1, 4, 3), dtype=numpy.float32)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        with self.assertRaises(type_check.InvalidType):
            F.matmul(x, y)
    def _a(self, x, a):
        L_matrix = self._L_matrix(x)
        mu = self._mu(x)

        P_matrix = F.matmul(L_matrix, L_matrix, transb=True)
        a_minus_mu = (a - mu)[:, :, None]
        return -0.5 * F.matmul(
            a_minus_mu, F.matmul(P_matrix, a_minus_mu), transa=True)[:, 0]
Esempio n. 17
0
    def test_invalid_ndim(self):
        x_data = numpy.zeros((3, 2, 5), dtype=numpy.float32)
        y_data = numpy.zeros((3, 5), dtype=numpy.float32)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        with self.assertRaises(type_check.InvalidType):
            F.matmul(x, y)
Esempio n. 18
0
def spectral_normalize(weight, init_u):
    W = weight.reshape(weight.shape[0], -1)  #C x N
    v = F.normalize(F.matmul(W, init_u, transa=True), eps=1e-12,
                    axis=0)  #N x C * C x 1 -> N x 1
    u = F.normalize(F.matmul(W, v), eps=1e-12, axis=0)  #C x N * N x 1 -> C x 1
    sigma = F.matmul(F.matmul(u, W, transa=True),
                     v)  #1 x C * C x N * N x -> 1 x 1 (spectral norm)
    return weight / sigma
Esempio n. 19
0
 def update(self, data_x):
     u = F.matmul(self.Wi,
                  F.vstack(((xp.array([[1]], dtype=xp.float32), data_x))))
     r_tld = self.resv_act(F.matmul(self.Wr, self.resv) + u)
     new_r = (1 - self.leaking_rate) * self.resv \
         + self.leaking_rate * r_tld
     self.resv = new_r
     return new_r
    def test_invalid_ndim(self):
        x_data = numpy.zeros((3, 2, 5), dtype=numpy.float32)
        y_data = numpy.zeros((3, 5), dtype=numpy.float32)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        with self.assertRaises(type_check.InvalidType):
            F.matmul(x, y)
Esempio n. 21
0
    def __call__(self, s, a):
        L_matrix = self._L(s)
        mu = self._mu(s)

        P_matrix = F.matmul(L_matrix, L_matrix, transb=True)
        a_minus_mu = (a - mu)[:, :, None]
        return -0.5 * F.matmul(
            a_minus_mu, F.matmul(P_matrix, a_minus_mu), transa=True)[:, 0]
    def test_invalid_shape(self):
        x_data = numpy.zeros((2, 3, 4), dtype=numpy.float32)
        y_data = numpy.zeros((1, 4, 3), dtype=numpy.float32)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        with self.assertRaises(type_check.InvalidType):
            F.matmul(x, y)
Esempio n. 23
0
    def encode_decode_train(self,
                            in_word_list,
                            out_word_list,
                            train=True,
                            sample=False):
        xp = cuda.cupy if self.gpuid >= 0 else np
        self.reset_state()
        # Add GO_ID, EOS_ID to decoder input
        decoder_word_list = [GO_ID] + out_word_list + [EOS_ID]
        # encode list of words/tokens
        enc_states = self.encode_list(in_word_list, train=train)
        # initialize decoder LSTM to final encoder state
        self.set_decoder_state()
        # decode and compute loss
        # convert list of tokens into chainer variable list
        var_dec = (Variable(xp.asarray(decoder_word_list,
                                       dtype=np.int32).reshape((-1, 1)),
                            volatile=not train))
        # Initialise first decoded word to GOID
        pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32),
                             volatile=not train)

        # compute loss
        self.loss = 0
        # decode tokens
        for next_word_var in var_dec[1:]:
            self.decode(pred_word, train=train)
            if self.attn == NO_ATTN:
                predicted_out = self.out(self[self.lstm_dec[-1]].h)
            else:
                # __QUESTION Add attention
                pass

                c = F.matmul((self[self.lstm_dec[-1]].h),
                             enc_states,
                             transb=True)
                score = F.softmax(c)
                ct = F.matmul(score, enc_states)
                s = F.concat((ct, (self[self.lstm_dec[-1]].h)))
                hs = F.tanh(s)
                predict = self.attention(hs)
                predicted_out = self.out(predict)
            # compute loss
            prob = F.softmax(predicted_out)

            pred_word = self.select_word(prob, train=train, sample=False)
            # pred_word = Variable(xp.asarray([pred_word.data], dtype=np.int32), volatile=not train)
            '''
            ___QUESTION-1-DESCRIBE-E-START___
            Explain what loss is computed with an example
            What does this value mean?
            '''
            self.loss += F.softmax_cross_entropy(predicted_out, next_word_var)
            '''___QUESTION-1-DESCRIBE-E-END___'''

        report({"loss": self.loss}, self)

        return self.loss
Esempio n. 24
0
def calc_score(labels, ts, dists):
    score = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    T_labels = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    T_ts = chainer.Variable(np.array([[0.0]], dtype=np.float32))
    # make labels vector and labels transitions vector
    labels_vec = list()
    pre_label = None
    for label in labels:
        if not pre_label == None:
            T_labels += trans2para(pre_label, label)
        for i in range(label_num):
            if i == label:
                labels_vec.append(1)
            else:
                labels_vec.append(0)
        pre_label = label
    labels_matrix = make_chainer_matrix(labels_vec)
    
    # make true labels vector
    ts_vec = list()
    pre_label = None
    for t in ts:
        if not pre_label == None:
            T_ts += trans2para(pre_label, t)
        for i in range(label_num):
            if i == t:
                ts_vec.append(1)
            else:
                ts_vec.append(0)
        pre_label = t
    ts_matrix = make_chainer_matrix(ts_vec)
    dists_matrix = F.concat(tuple(dists))

    #print('gold_labels:',ts_vec)
    #print('labels:', labels_vec)
    #print('labels_matrix.data[0]:',labels_matrix.data[0])
    #print(len(ts_matrix.data[0]))
    #print(len(dists_matrix.data[0]))

    # make loss (difference between y_hat and y)
    diff_cnt = chainer.Variable(np.array([[0.0]], dtype=np.float32))  
    for i in range(len(labels)):
        if labels[i]!=ts[i]:
            diff_cnt += chainer.Variable(np.array([[1.0]], dtype=np.float32))
            correct = get_onehot(ts[i])
            #print()
            #print(dists[i].data)
            #print(correct.data)
            #diff_cnt += F.softmax_cross_entropy(dists[i], correct)
     
    predict_score = F.matmul(labels_matrix, dists_matrix, transb=True)+ T_labels
    true_score = F.matmul(ts_matrix, dists_matrix, transb=True) + T_ts
    
    score = predict_score - true_score + eta * diff_cnt
    #print('predict_score:', predict_score.data)
    #print('true_score:', true_score.data)
    #print('loss:', eta * diff_cnt.data)
    return  score
Esempio n. 25
0
 def occupancy_net_loss(self, occupancy_net, depth, theta, z):
     R = theta[:, :3, :3]
     t = theta[:, :3, -1:]
     depth = depth.reshape(depth.shape[0], 1, -1)
     eps = self.xp.random.normal(0, 0.05, size=depth.shape)
     real_pos = F.matmul(F.matmul(R, self.inv_K), (depth + eps) * self.p) + t
     label = (eps > 0).reshape(-1, 1).astype("int32")
     occupancy_field = occupancy_net(z, real_pos + eps)
     return F.sigmoid_cross_entropy(occupancy_field, label)
Esempio n. 26
0
 def node2edge(self, x, rel_rec, rel_send):
     # NOTE: Assumes that we have the same graph across all samples.
     # x: [batch_size, num_nodes, feature_dim]
     # rel_rec, rel_send: [num_edges, num_nodes]
     receivers = F.matmul(rel_rec, x)
     senders = F.matmul(rel_send, x)
     # receivers, senders: [batch_size, num_edges, feature_dim]
     edges = F.concat([receivers, senders], axis=2)  # along num_edges
     return edges
Esempio n. 27
0
def word_attention(emb_a, emb_b):
    A = F.matmul(emb_a, emb_b, transb=True)
    A = F.sum(A, axis=-1, keepdims=True)
    A = F.softmax(A, axis=-2)

    B, N, C = emb_a.shape
    wf = F.matmul(emb_a, A, transa=True)
    wf = F.transpose(wf, axes=(0, 2, 1))
    return wf, A
Esempio n. 28
0
 def house_transform(self,z):
     vec_t = self.qh_vec_0
     
     for i in range(self.num_trans):
         vec_t = F.identity(self.qlin_h_vec_t(vec_t))
         vec_t_product = F.matmul(vec_t, vec_t, transb=True)
         vec_t_norm_sqr = F.tile(F.sum(F.square(vec_t)), (z.shape[0], z.shape[1]))
         z = z - 2*F.matmul(vec_t_product,  z)/vec_t_norm_sqr
     return z
Esempio n. 29
0
def scaled_dot_product_attention(queries, keys, values, scale=1., mask=None):
    x1 = F.matmul(queries, keys, transb=True) * xp.array(scale,
                                                         dtype=keys.dtype)
    x2 = F.where(mask,
                 xp.ones_like(x1.array) *
                 -xp.inf, x1) if mask is not None else x1
    x3 = F.softmax(x2, axis=-1)
    x4 = F.matmul(x3, values)
    return x4
Esempio n. 30
0
 def st_graph_output(self, f_A,
                     f_G):  # f_A shape = (N,D), f_G shape = (N,4)
     assert f_A.shape[0] == f_G.shape[0]
     if self.add_self:
         assert f_A.shape[1] == self.out_size
     N = f_G.shape[0]
     assert N % self.frame_node_num == 0
     T = N // self.frame_node_num
     geo_dim = f_G.shape[1]
     f_A_orig = f_A
     f_G = F.reshape(f_G, (T, self.frame_node_num, geo_dim))
     f_A = F.reshape(f_A, (T, self.frame_node_num, f_A.shape[-1]))
     assert f_A_orig.ndim == 2, f_A_orig.ndim
     f_R = []
     for nr in range(self.num_relations):
         f_G_1 = F.tile(f_G, (1, 1, F))  # shape = (T, F, 4 * F)
         f_G_1 = F.reshape(
             f_G_1,
             (T, self.frame_node_num**
              2, geo_dim))  # after tile: (T, F, (4 x F)) then (T,F^2,4)
         f_G_2 = F.tile(f_G, (1, F, 1))  # shape = (T, F*F, 4)
         encoded_offset = self.encode_box_offset(f_G_1.reshape(
             -1, geo_dim), f_G_2.reshape(-1, geo_dim))  # shape = (TxFxF, 4)
         # paper formula (5), shape = (T,F,F)
         w_G = F.relu(
             getattr(self, self.W_G_lst[nr])(self.position_encoding(
                 encoded_offset, self.d_g)))  # TxFxF,1
         w_G = F.reshape(w_G,
                         shape=(T, self.frame_node_num,
                                self.frame_node_num))  # shape = (T,F,F)
         # paper formula (4), shape = (N,N)
         w_K_result = getattr(self, self.W_K_lst[nr])(f_A_orig).reshape(
             T, self.frame_node_num, self.d_k)  # shape = (T, F, d_k)
         w_Q_transpose_result = F.transpose(getattr(
             self,
             self.W_Q_lst[nr])(f_A_orig).reshape(T, self.frame_node_num,
                                                 self.d_k),
                                            axes=(0, 2,
                                                  1))  # shape = (T, d_k, F)
         w_A = F.matmul(w_K_result, w_Q_transpose_result)  # shape = (T,F,F)
         w_A = w_A + F.log(w_G)
         # paper formula (3), shape = (T,F,F)
         w = F.softmax(
             w_A, axis=2
         )  # original paper formula (3) is weighted softmax, because chainer does not provide such weighed-softmax
         # we instead only element-wise dot here, then softmax
         # w = w_G * F.exp(w_A) / F.sum(w_G * F.exp(w_A), axis=1)  # denominator shape = (N,1) numerator shape = (N,N)
         # paper formula (2), weight sum = matmul:(T,F,F) x (T, F, out_size//nr) = (T, F, out_size//nr)
         f_R_nr = F.matmul(
             w,
             getattr(self, self.W_V_lst[nr])(f_A_orig).reshape(
                 T, self.frame_node_num, self.w_v_outsize))
         f_R.append(f_R_nr)
     if self.add_self:
         return f_A + F.concat(f_R, axis=2).reshape(N, self.out_size)
     return F.concat(f_R, axis=2).reshape(N, self.out_size)
Esempio n. 31
0
    def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send,
                            single_timestep_rel_type):
        # single_timestep_inputs: [batch_size, num_sequences, num_nodes, feature_dims]
        # single_timestep_rel_type: [batch_size, num_sequences, num_edges, edge_types]
        batch_size, num_sequences, num_edges, _ = single_timestep_rel_type.shape
        _, num_nodes = rel_rec.shape

        # Node2edge
        # rel_rec: [num_edges, num_nodes]
        # rel_send: [num_edges, num_nodes]
        receivers = F.matmul(rel_rec, single_timestep_inputs)
        senders = F.matmul(rel_send, single_timestep_inputs)
        pre_msg = F.concat([receivers, senders], axis=-1)
        # pre_msg: [batch_size, num_sequences, num_edges, 2 * feature_dims]
        pre_msg = F.reshape(pre_msg,
                            [batch_size * num_sequences * num_edges, -1])

        all_msgs = chainer.Variable(
            pre_msg.xp.zeros(
                (batch_size, num_sequences, num_edges, self.msg_out_shape),
                dtype=single_timestep_rel_type.dtype))
        if self.skip_first_edge_type:
            start_idx = 1
        else:
            start_idx = 0

        # Run separate MLP for every edge type
        # NOTE: To exlude one edge type, simply offset range by 1
        for i in range(start_idx, len(self.msg_fc2)):
            msg = F.relu(self.msg_fc1[i](pre_msg))
            msg = F.dropout(msg, self.dropout_prob)
            msg = F.relu(self.msg_fc2[i](msg))
            # msg: [batch_size * num_sequences * num_edges, msg_hid]
            msg = F.reshape(msg, [batch_size, num_sequences, num_edges, -1])
            msg = msg * single_timestep_rel_type[:, :, :, i:i + 1]
            all_msgs += msg

        # Aggregate all msgs to receiver
        # all_msgs: [batch_size, num_sequences, num_edges, msg_out_shape]
        # rel_rec: [num_edges, num_nodes]
        agg_msgs = F.matmul(rel_rec.T, all_msgs)

        # Skip connection
        aug_inputs = F.concat([single_timestep_inputs, agg_msgs], axis=-1)
        # aug_inputs: [batch_size, num_sequences, num_nodes, msg_out_shape + feature_dims]
        aug_inputs = F.reshape(aug_inputs,
                               [batch_size * num_sequences * num_nodes, -1])

        # Output MLP
        pred = F.dropout(F.relu(self.out_fc1(aug_inputs)), self.dropout_prob)
        pred = F.dropout(F.relu(self.out_fc2(pred)), self.dropout_prob)
        pred = self.out_fc3(pred)
        pred = F.reshape(pred, [batch_size, num_sequences, num_nodes, -1])

        # Predict position/velocity difference
        return single_timestep_inputs + pred
Esempio n. 32
0
File: e2emn.py Progetto: shuyo/iir
    def forward(self, x, q, is_linear=False):
        # Random Noise for Learing Time invariance
        if chainer.configuration.config.train:
            xp = chainer.cuda.get_array_module(x)
            z = xp.zeros((1,x.shape[1]), dtype=numpy.float32)
            i = 0
            while i<x.shape[0]:
                if numpy.random.rand(1)[0]<0.1:
                    x = xp.vstack((x[:i], z, x[i:]))
                    i += 1
                i += 1
        max_knowledge, D = self.temporal_a.shape
        if len(x)>max_knowledge: x = x[len(x)-max_knowledge:]
        j = max_knowledge-len(x)

        if self.pe:
            a = xp.arange(1,0,-1/D)
            b = xp.arange(-1,1,2/D)
            M = a * F.matmul(x[:,:self.V], self.embedid_a) + b * F.matmul(x[:,self.V:], self.embedid_a) + self.temporal_a[j:]
            C = a * F.matmul(x[:,:self.V], self.embedid_c) + b * F.matmul(x[:,self.V:], self.embedid_c) + self.temporal_c[j:]
        else:
            M = F.matmul(x[:,:self.V], self.embedid_a) + self.temporal_a[j:]
            C = F.matmul(x[:,:self.V], self.embedid_c) + self.temporal_c[j:]

        U = F.matmul(q.reshape(1,-1), self.embedid_b)
        for l in range(self.layer):
            P = F.transpose(F.matmul(M,U[0]))
            if not is_linear: P = F.softmax(P)
            O = F.matmul(P,C)
            if l == self.layer-1:
                U = U + O
            else:
                U = self.H(U) + O
        return self.W(U) # (1,D)
Esempio n. 33
0
def norm(x):
    """
    ベクトルの正規化
    x -> x/|x|
    """
    s = F.sum(x**2, axis=1) ** 0.5  # [a,b]^T
    height = s.data.shape[0]
    a = Variable(np.ones((1, height), dtype=np.float32))  # [1,1]
    eye = Variable(np.eye(height, dtype=np.float32))
    b = F.inv(F.matmul(s, a) * eye)  # [1/a, 0; 0, 1/b]
    return F.matmul(b, x)
Esempio n. 34
0
def ls_solution(g0, g1):
    """ Get least-squares solution matrix for regression from rows of g0
      to rows of g1. Both g0 and g1 are chainer's Variable.
  """
    g0t = F.transpose(g0)
    if g0.shape[0] >= g0.shape[1]:
        g0pinv = F.matmul(F.inv(F.matmul(g0t, g0)), g0t)
    else:
        g0pinv = F.matmul(g0t, F.inv(F.matmul(g0, g0t)))
    K = F.transpose(F.matmul(g0pinv, g1))
    return K
Esempio n. 35
0
    def translate(self, xs, max_length=100):
        xs = numpy.insert(xs, 0, 2)
        xs = numpy.append(xs, 0)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            exs = self.embed_x(Variable(self.xp.array(xs,
                                                      dtype=self.xp.int32)))

            h = F.expand_dims(exs, axis=0)
            h = F.expand_dims(h, axis=0)
            h = F.transpose(h, (0, 1, 3, 2))
            for i in range(self.stack):
                h = self.gcnn[i](h)
            h = F.squeeze(h, axis=1)
            h = F.squeeze(h, axis=0)
            h = F.transpose(h, (1, 0))

            ys = self.xp.full(1, 2, self.xp.int32)
            result = []
            hx = None
            cx = None
            hx2 = None
            cx2 = None

            for i in range(max_length):
                eys = self.embed_y(ys)
                eyys = self.embed_yy(ys)
                eys2 = [eys]
                eyys2 = [eyys]
                hx, cx, ss = self.decoder(hx, cx, eys2)
                hx2, cx2, ss2 = self.decoder2(hx2, cx2, eyys2)

                batch_A = F.matmul(h, ss[0], transb=True) * self.scale_score
                batch_A = F.softmax(batch_A, axis=0)
                if self.weight:
                    with open("weight/wei.txt", "a", encoding="utf-8") as f:
                        for j in range(len(batch_A)):
                            f.write(str(batch_A[j][0].data) + "\n")
                        f.write("--------------\n")
                s = F.matmul(batch_A, h, transa=True)
                t = (self.We(s) + self.Ws(ss2[0]))
                ys = self.xp.argmax(t.data, axis=1).astype(self.xp.int32)
                if ys[0] == 0:
                    break
                result.append(ys)
        result = cuda.to_cpu(
            self.xp.concatenate([self.xp.expand_dims(x, 0) for x in result]).T)
        # Remove EOS taggs
        outs = []
        for y in result:
            inds = numpy.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Esempio n. 36
0
 def obtain_loss(self, lambda_val, P1, W1, X, P2, W2, Y, A, R, alpha, beta):
     loss = 0
     loss += lambda_val * F.sum(
         F.square(F.matmul(P1, F.transpose(self.u.W)) - F.matmul(W1, X)))
     loss += lambda_val * F.sum(
         F.square(F.matmul(P2, F.transpose(self.v.W)) - F.matmul(W2, Y)))
     loss += alpha * F.sum(
         F.square(A * (R - F.matmul(self.u.W, F.transpose(self.v.W)))))
     loss += beta * (
         (F.sum(F.square(self.u.W)) + F.sum(F.square(self.v.W))))
     return loss
Esempio n. 37
0
    def __call__(self, e1, e2):
        ele2 = F.reshape(
                F.batch_matmul(e1[:,:,None], e2[:,None,:]), (-1, self.in_size1 * self.in_size2))

        res = F.matmul(ele2,
                F.reshape(self.W, (self.in_size1 * self.in_size2, self.out_size))) + \
            F.matmul(e1, self.V1) + \
            F.matmul(e2, self.V2)

        res, bias = F.broadcast(res, self.b)
        return res + bias
Esempio n. 38
0
    def __call__(self, text, x, t, textlens, xlens):
        batchsize = text.shape[0]

        vk = self.text_enc(text)

        v = vk[:, :self.d, :]
        k = vk[:, self.d:, :]
        q = self.audio_enc(x)

        a = F.matmul(F.transpose(k, (0, 2, 1)), q)
        a = F.softmax(a / self.xp.sqrt(self.d))
        r = F.matmul(v, a)
        rd = F.concat((r, q))

        y = self.audio_dec(rd)

        loss_bin = 0
        for i in range(batchsize):
            loss_bin += F.mean(
                F.bernoulli_nll(t[i, :, :xlens[i]], y[i, :, :xlens[i]], 'no'))
        loss_bin /= batchsize

        y = F.sigmoid(y)

        loss_l1 = 0
        for i in range(batchsize):
            loss_l1 += F.mean_absolute_error(t[i, :, :xlens[i]],
                                             y[i, :, :xlens[i]])
        loss_l1 /= batchsize

        loss_att = 0
        for i in range(batchsize):
            N = textlens[i]
            T = xlens[i]

            def w_fun(n, t):
                return 1 - np.exp(-((n / (N - 1) - t / (T - 1))**2) /
                                  (2 * self.g**2))

            w = np.fromfunction(w_fun, (a.shape[1], T), dtype='f')
            w = self.xp.array(w)
            loss_att += F.mean(w * a[i, :, :T])
        loss_att /= batchsize

        loss = loss_bin + loss_l1 + loss_att

        chainer.reporter.report({
            'loss_bin': loss_bin,
            'loss_l1': loss_l1,
            'loss_att': loss_att,
        })

        return loss, y, a
Esempio n. 39
0
    def global_attention_layer(self, dec_h, attention):
        """
        https://nlp.stanford.edu/pubs/emnlp15_attn.pdf
        :param dec_h: デコーダの中間層(内部状態)
        :param attention: エンコーダの中間層(内部状態)
        :return:
        """
        weights = F.softmax(F.matmul(dec_h, attention, transb=True))  # Global align weights
        contexts = F.matmul(weights, attention)  # Context vector(Attention layer output)
        o = F.tanh(self.attention(F.concat((contexts, dec_h))))  # Attentionとデコーダの中間層の合成

        return self.y(o)
Esempio n. 40
0
    def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send, single_timestep_rel_type):
        # single_timestep_inputs: [batch_size, num_sequences, num_nodes, feature_dims]
        # single_timestep_rel_type: [batch_size, num_sequences, num_edges, edge_types]
        batch_size, num_sequences, num_edges, _ = single_timestep_rel_type.shape
        _, num_nodes = rel_rec.shape

        # Node2edge
        # rel_rec: [num_edges, num_nodes]
        # rel_send: [num_edges, num_nodes]
        receivers = F.matmul(rel_rec, single_timestep_inputs)
        senders = F.matmul(rel_send, single_timestep_inputs)
        pre_msg = F.concat([receivers, senders], axis=-1)
        # pre_msg: [batch_size, num_sequences, num_edges, 2 * feature_dims]
        pre_msg = F.reshape(pre_msg, [batch_size * num_sequences * num_edges, -1])

        all_msgs = chainer.Variable(
            pre_msg.xp.zeros((batch_size, num_sequences, num_edges, self.msg_out_shape),
                             dtype=single_timestep_rel_type.dtype))
        if self.skip_first_edge_type:
            start_idx = 1
        else:
            start_idx = 0

        # Run separate MLP for every edge type
        # NOTE: To exlude one edge type, simply offset range by 1
        for i in range(start_idx, len(self.msg_fc2)):
            msg = F.relu(self.msg_fc1[i](pre_msg))
            msg = F.dropout(msg, self.dropout_prob)
            msg = F.relu(self.msg_fc2[i](msg))
            # msg: [batch_size * num_sequences * num_edges, msg_hid]
            msg = F.reshape(msg, [batch_size, num_sequences, num_edges, -1])
            msg = msg * single_timestep_rel_type[:, :, :, i:i + 1]
            all_msgs += msg

        # Aggregate all msgs to receiver
        # all_msgs: [batch_size, num_sequences, num_edges, msg_out_shape]
        # rel_rec: [num_edges, num_nodes]
        agg_msgs = F.matmul(rel_rec.T, all_msgs)

        # Skip connection
        aug_inputs = F.concat([single_timestep_inputs, agg_msgs], axis=-1)
        # aug_inputs: [batch_size, num_sequences, num_nodes, msg_out_shape + feature_dims]
        aug_inputs = F.reshape(aug_inputs, [batch_size * num_sequences * num_nodes, -1])

        # Output MLP
        pred = F.dropout(F.relu(self.out_fc1(aug_inputs)), self.dropout_prob)
        pred = F.dropout(F.relu(self.out_fc2(pred)), self.dropout_prob)
        pred = self.out_fc3(pred)
        pred = F.reshape(pred, [batch_size, num_sequences, num_nodes, -1])

        # Predict position/velocity difference
        return single_timestep_inputs + pred
Esempio n. 41
0
def calculate_max_singular_value(weight_matrix, u, v):
    """Calculate max singular value by power iteration method.

    Args:
        weight_matrix (~chainer.Variable)
        u (numpy.ndarray or cupy.ndarray)
        v (numpy.ndarray or cupy.ndarray)

    Returns:
        ~chainer.Variable: Max singular value via power iteration method.

    """
    sigma = F.matmul(F.matmul(u, weight_matrix), v)
    return sigma
Esempio n. 42
0
 def attention_history(self, dL, cue, train=True):
     D = F.concat(dL, axis=0)
     D, Cue = F.broadcast(D, cue)
     S = self.m(F.tanh(self.W_dm(D) + Cue))
     S = F.softmax(F.reshape(S, (1, len(dL))))
     pre_v = F.matmul(S, D)
     return pre_v
Esempio n. 43
0
 def forward(self, doc, wrd, window=5):
     doc, wrd = utils.move(self.xp, doc, wrd)
     proportions = self.proportions(doc)
     ld = dirichlet_likelihood(self.proportions.W)
     context = F.matmul(F.softmax(proportions), self.factors())
     loss = self.loss_func(context, wrd)
     return loss, ld
Esempio n. 44
0
    def __call__(self, x):
        # x.shape == (batchsize, 3, 128, 64)
        batchsize = x.shape[0]
        h = F.elu(self.bn1(self.conv1_1(x)))
        h = F.elu(self.bn2(self.conv1_2(h)))
        h = F.max_pooling_2d(h, 3, 2, cover_all=False)
        h = self.conv2_1(h)
        h = self.conv2_3(h)
        h = self.conv3_1(h)
        h = self.conv3_3(h)
        h = self.conv4_1(h)
        h = self.conv4_3(h)

        h = h.reshape(batchsize, -1)
        h = F.dropout(h, ratio=0.6)
        h = F.elu(self.fc1_bn(self.fc1(h)))

        # Features in rows, normalize axis 1.
        weights = self.mean_vectors
        features = self.ball(h)
        features = F.normalize(features, eps=1e-8)
        scale = F.softplus(self.scale)
        normalized_weight = F.normalize(weights, axis=0, eps=1e-8)
        logits = F.tile(scale[None, ], (batchsize, 1)) * \
            F.matmul(features, normalized_weight)
        return logits
Esempio n. 45
0
 def setUp(self):
     self.x1 = numpy.random.uniform(.5, 1, (m,)).astype(numpy.float32)
     self.x2 = numpy.random.uniform(.5, 1, (m,)).astype(numpy.float32)
     self.gy = numpy.random.uniform(-1, 1, (m, m)).astype(numpy.float32)
     self.op = lambda x, y: F.matmul(x, y, transb=True)
     self.forward_answer = numpy.dot(
         self.x1.reshape(m, 1), self.x2.reshape(1, m))
Esempio n. 46
0
 def _log_prob_words(self, context, temperature=1.0):
     """ This calculates an softmax over the vocabulary as a function
     of the dot product of context and word.
     """
     dot = F.matmul(context, F.transpose(self.vocab.W))
     prob = F.softmax(dot / temperature)
     return F.log(prob)
Esempio n. 47
0
    def __call__(self, h, adj):
        """

        Args:
            h: (batchsize, num_nodes, in_channels)
            adj: (batchsize, num_edge_type, num_nodes, num_nodes)

        Returns:
            (batchsize, num_nodes, ch)

        """
        mb, node, ch = h.shape

        # --- self connection, apply linear function ---
        hs = self.graph_linear_self(h)
        # --- relational feature, from neighbor connection ---
        # Expected number of neighbors of a vertex
        # Since you have to divide by it, if its 0, you need to
        # arbitrarily set it to 1
        m = self.graph_linear_edge(h)
        m = functions.reshape(
            m, (mb, node, self.out_channels, self.num_edge_type))
        m = functions.transpose(m, (0, 3, 1, 2))
        # m: (batchsize, edge_type, node, ch)
        # hrL (batchsize, edge_type, node, ch)
        hr = functions.matmul(adj, m)
        # hr: (batchsize, node, ch)
        hr = functions.sum(hr, axis=1)
        return hs + hr
Esempio n. 48
0
	def memory(self, x_input, query, layer):
		m = self.encode_input(x_input,layer)		# memory for input
		c = self.encode_output(x_input)		# memory for output
		if layer == 1:
			u = self.encode_query(query)			# memory for query
		else:
			u = query
#		print "m.data.shape", m.data.shape		# (50,20)
#		print "u.data.shape", u.data.shape		# (1,20)
		mu = F.matmul(m, u, transb=True)	# mを転置して内積をとる
		p = F.softmax(mu)									# 文の重要度p(アテンション)
#		print p.data.shape		# (50,1)
#		print c.data.shape		# (50,20)
		o = F.matmul(p, c, transa=True)		# cとpのweighted sum
#		print o.data.shape		# (1,20)
		return (u+o)
Esempio n. 49
0
def cos(a, b):
    """
    cos-similarity
    """
    height = a.data.shape[0]
    c = F.matmul(norm(a), norm(b), transb=True)
    eye = Variable(np.eye(height, dtype=np.float32))
    return F.sum(c * eye, axis=0)
Esempio n. 50
0
 def __call__(self, x):
     """Applies the linear layer.
     Args:
         x (~chainer.Variable): Batch of input vectors.
     Returns:
         ~chainer.Variable: Output of the linear layer.
     """
     return F.matmul(x, self.W_polarity)
Esempio n. 51
0
def trans2para(pre_label, next_label):
    pre_label = chainer.Variable(np.array([pre_label], dtype=np.int32))
    trans_vec = model.trans(pre_label)
    onehot = [1 if l == next_label else 0 for l in range(label_num)]
    onehot = chainer.Variable(np.array([onehot], dtype=np.float32))
    trans_para = F.matmul(onehot, F.softmax(trans_vec), transb=True)
    
    return trans_para
 def __call__(self, x,train = True):
 	batchsize = 100
 	ones = chainer.Variable(np.array([[1]]*batchsize).astype(np.float32))
 	pre_f = self.W_graph(val_y)
 	pref_ones = F.dropout(F.matmul(ones,pre_f),train = train)
 	polarity_Mat = x * pref_ones
 	#polarity_Mat = self.W_graph(x)
 	pre_y = self.W_out(polarity_Mat)
 	return pre_y
Esempio n. 53
0
def compute_A_P(a,p):
    #compute matrix P 
    conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': p}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'])
    P = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]]
    #compute matrix A
    conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': a}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'])
    conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]]
    A = [ Fu.matmul(x, x, transa=False, transb=True) for x in [conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0]]

    return A,P
Esempio n. 54
0
 def forward(self, ids, bow):
     bow, ids = utils.move(self.xp, bow, ids)
     proportions = self.proportions(ids)
     ld = dirichlet_likelihood(proportions)
     doc = F.matmul(F.softmax(proportions), self.factors())
     logp = F.dropout(self.embedding(doc))
     # loss = -F.sum(bow * F.log_softmax(logp))
     sources, targets, counts = [], [], []
     lpi =  F.sum(bow * F.log_softmax(logp), axis=1)
     loss = -F.sum(lpi)
     return loss, ld
Esempio n. 55
0
	def __call__(self, x_input, query, answer, train=True):
		m = self.encode_input(x_input)		# memory for input
		u = self.encode_query(query)			# memory for query
		c = self.encode_output(x_input)		# memory for output
#		print "m.data.shape", m.data.shape		# (50,20)
#		print "u.data.shape", u.data.shape		# (1,20)
		# mとuの内積
		mu = F.matmul(m, u, transb=True)	# mを転置して内積をとる
		# 文の重要度p(アテンション)
		p = F.softmax(mu)
#		print p.data.shape		# (50,1)
#		print c.data.shape		# (50,20)
		o = F.matmul(p, c, transa=True)		# cとpのweighted sum
#		print o.data.shape		# (1,20)
		predict = self.W(u+o)
#		print "answer.shape,predict.shape:", answer.shape,predict.data.shape
		if train:
			return F.softmax_cross_entropy(predict, answer)
		else:
			return F.accuracy(predict, answer)
Esempio n. 56
0
 def __call__(self, x):
     """Applies the linear layer.
     Args:
         x (~chainer.Variable): Batch of input vectors.
     Returns:
         ~chainer.Variable: Output of the linear layer.
     """
     if self.has_uninitialized_params:
         self._initialize_params(x.shape[0])
     batch_size = x.data.shape[0]
     batch_ones = chainer.Variable(xp.ones((batch_size,1)).astype(np.float32))
     return x*(F.matmul(batch_ones, self.W))
Esempio n. 57
0
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen = xp.random.uniform(-20,20,(1,3,width,width),dtype=np.float32)
        else:
            img_gen = np.random.uniform(-20,20,(1,3,width,width)).astype(np.float32)
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen,xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            ch = y[l].data.shape[1]
            wd = y[l].data.shape[2]
            gogh_y = F.reshape(y[l], (ch,wd**2))
            gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print i,l,L1.data,L2.data

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i%3000==0:
            save_image(img_gen, W, nw, nh, i)
Esempio n. 58
0
    def setUp(self):
        self.x1 = numpy.random.uniform(.5, 1, self.x1_shape)
        self.x1 = self.x1.astype(self.x1_dtype)
        self.x2 = numpy.random.uniform(.5, 1, self.x2_shape)
        self.x2 = self.x2.astype(self.x2_dtype)
        ret_dtype = numpy.result_type(self.x1_dtype, self.x2_dtype)
        self.gy = numpy.random.uniform(-1, 1, self.gy_shape).astype(ret_dtype)
        self.ggx1 = numpy.random.uniform(
            .5, 1, self.x1_shape).astype(self.x1_dtype)
        self.ggx2 = numpy.random.uniform(
            .5, 1, self.x2_shape).astype(self.x2_dtype)

        self.op = lambda x, y: F.matmul(x, y, transa=self.transa,
                                        transb=self.transb)
        self.forward_answer = self._get_forward_answer(self.x1, self.x2,
                                                       self.transa,
                                                       self.transb)
Esempio n. 59
0
    def __call__(self, doc_ids):
        """ Given an array of document integer indices, returns a vector
        for each document. The vector is composed of topic weights projected
        onto topic vectors.

        Args:
            doc_ids : chainer.Variable
                One-dimensional batch vectors of IDs

        Returns:
            doc_vector : chainer.Variable
                Batch of two-dimensional embeddings for every document.
        """
        # (batchsize, ) --> (batchsize, multinomial)
        proportions = self.proportions(doc_ids, softmax=True)
        # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim)
        factors = F.dropout(self.factors(), ratio=self.dropout_ratio)
        w_sum = F.matmul(proportions, factors)
        return w_sum
Esempio n. 60
0
    def __call__(self, doc_ids):
        """ Given an array of document integer indices, returns a vector
        for each document. The vector is composed of topic weights projected
        onto topic vectors.

        Args:
            doc_ids (~chainer.Variable): One-dimensional batch vectors of IDs

        Returns:
            ~chainer.Variable: Batch of two-dimensional embeddings for every
                document.
        """
        # (batchsize, ) --> (batchsize, logweights)
        w = self.weights(doc_ids)
        # (batchsize, logweights) --> (batchsize, multinomial)
        multi = F.softmax(w)
        # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim)
        w_sum = F.matmul(multi, self.factors())
        return w_sum