def encode(self, x_input, x_query, answer): m = self.encode_input(x_input) u = self.encode_query(x_query) # print "m.data.shape", m.data.shape # print "u.data.shape", u.data.shape mu = functions.matmul(m, u, transb=True) # print "mu.data.shape", mu.data.shape # print "mu.data", mu.data p = functions.softmax(mu) # print p.data c = self.encode_output(x_input) # print "p.data.shape:", p.data.shape # print "c.data.shape:", c.data.shape # print c.data.shape #(3,50) # print "functions.swapaxes(c ,1, 1):", functions.swapaxes(c ,1, 1).data.shape o = functions.matmul(functions.swapaxes(c ,1, 0), p) #転置して、内積とる (2, 50, 1) o = functions.swapaxes(o ,1, 0) # (2, 50) # print "u.data.shape:", u.data.shape # print "o.data.shape:", o.data.shape # print "u.data:", u.data # print "o.data:", o.data # print "(u+o).data.shape:", (u+o).data.shape predict = self.W(u + o) loss = functions.softmax_cross_entropy(predict, answer) return loss
def forward(x, p, a, A=None,P=None): conv1_1, conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': x}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']) conv1_1F,conv2_1F, conv3_1F, conv4_1F,conv5_1F, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]] conv1_1G,conv2_1G, conv3_1G, conv4_1G,conv5_1G, = [ Fu.matmul(x, x, transa=False, transb=True) for x in [conv1_1F,conv2_1F, conv3_1F, conv4_1F,conv5_1F]] # Because P an A is not change over iteration, it's better to calcurate onece. if A is None and B is None: #compute matrix P conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': p}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']) conv1_1P,conv2_1P, conv3_1P, conv4_1P,conv5_1P, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]] #compute matrix A conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': a}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']) conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]] conv1_1A,conv2_1A, conv3_1A, conv4_1A,conv5_1A, = [ Fu.matmul(x, x, transa=False, transb=True) for x in [conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0]] else: conv1_1P,conv2_1P, conv3_1P, conv4_1P,conv5_1P,=P conv1_1A,conv2_1A, conv3_1A, conv4_1A,conv5_1A,=A L_content = Fu.mean_squared_error(conv4_1F,conv4_1P)/2 #caution! the deviding number is hard coding! #this part is correspnding to equation (4) in the original paper #to check the current N and M, run the following #[x.data.shape for x in [conv1_1F,conv2_1F, conv3_1F, conv4_1F,conv5_1F]] L_style = (Fu.mean_squared_error(conv1_1G,conv1_1A)/(4*64*64*50176*50176) + Fu.mean_squared_error(conv2_1G,conv2_1A)/(4*128**128*12544*12544) + Fu.mean_squared_error(conv3_1G,conv3_1A)/(4*256*256*3136*3136) + Fu.mean_squared_error(conv4_1G,conv4_1A)/(4*512*512*784*784)\ )/4 # this is equal weighting of E_l loss = a_p_ratio*L_content + L_style return loss
def calc_loss(self, sys_ys, ref_ys, dists): loss = wrapper.make_var([[0.0]]) sys_Tscore = wrapper.make_var([[0.0]]) ref_Tscore = wrapper.make_var([[0.0]]) sys_Tscore, sys_vecs = self.calc_trans_score(sys_ys) #chainer.Variable, 1hotvecがconcateされたもの sys_matrix = wrapper.make_var([sys_vecs]) ref_Tscore, ref_vecs = self.calc_trans_score(ref_ys) #chainer.Variable, 1hotvec ref_matrix = wrapper.make_var([ref_vecs]) dists_matrix = functions.concat(tuple(dists)) #異なるラベル数のカウント diff_cnt = wrapper.make_var([[0.0]]) for sys_y, ref_y in zip(sys_ys, ref_ys): if sys_y != ref_y: diff_cnt += wrapper.make_var([[1.0]]) #max 0 loss = functions.matmul(sys_matrix, dists_matrix, transb=True) + sys_Tscore\ - functions.matmul(ref_matrix, dists_matrix, transb=True) - ref_Tscore\ + self.__eta * diff_cnt """ debug print("sys_score trans : ", wrapper.get_data(functions.matmul(sys_matrix, dists_matrix, transb=True)), wrapper.get_data(sys_Tscore)) print("ref_score trans : ", wrapper.get_data(functions.matmul(ref_matrix, dists_matrix, transb=True)), wrapper.get_data(ref_Tscore)) print("diff_cnt penal : ",wrapper.get_data(diff_cnt), wrapper.get_data(self.__eta * diff_cnt)) """ return loss
def __call__(self, hx, cx, xs, enc_hs): xs_embed = [self.embed(x) for x in xs] hy, cy, ys = self.Nlstm(hx, cx, xs_embed) ys_pad = F.pad_sequence(ys, length=None, padding=0.0) enc_hs = F.pad_sequence(enc_hs, length=None, padding=0.0) mask = self.xp.all(enc_hs.data == 0, axis=2, keepdims=True) mask_num = self.xp.full(mask.shape, -1024.0, dtype=self.xp.float32) alignment = [] decode = [] ys_pad = F.transpose(ys_pad, (1, 0, 2)) for y in ys_pad: y = F.reshape(y, (*y.shape, 1)) score = F.matmul(enc_hs, y) score = F.where(mask, mask_num, score) align = F.softmax(score, axis=1) context_vector = F.matmul(enc_hs, align, True, False) t = self.W_c( F.dropout(F.concat((y, context_vector), axis=1), self.dropout)) ys_proj = self.proj(F.dropout(t, self.dropout)) alignment.append(F.reshape(align, (len(xs), -1))) decode.append(ys_proj) decode = F.stack(decode, axis=1) alignment = F.stack(alignment, axis=1) return hy, cy, decode, alignment.data
def __call__(self, x1, x2): # inputs: x1 = [x1_1 ... x1_i ... x1_n1]; dim(x1_i)=d1=left_size # x2 = [x2_1 ... x2_j ... x2_n2]; dim(x2_j)=d2=right_size # output: o_ij = x1_i * W * x2_j + x2_j * U + b n1 = x1.shape[0] n2 = x2.shape[0] x2T = F.transpose(x2) x1_W = F.matmul(x1, self.W) # (n1, d1) * (d1, d2) => (n1, d2) res = F.matmul(x1_W, x2T) # (n1, d2) * (d2, n2) => (n1, n2) if self.U is not None: x1_U = F.broadcast_to( F.matmul(x1, self.U), (n1, n2)) # (n1, d1) * (d1, 1) => (n1, 1) -> (n1, n2) # print('x1*U', x1_U.shape) res = res + x1_U if self.V is not None: # TODO fix V_x2 = F.broadcast_to( F.matmul(self.V, x2T), (n1, n2)) # (1, d2) * (d2, n2) => (1, n2) -> (n1, n2) res = res + V_x2 if self.b is not None: b = F.broadcast_to(self.b, (n1, n2)) res = res + b return res
def __call__(self, a_list, b_list, a_mask, b_mask, knowledge): # a_list: Question # b_list: Story text ya_ori = self.input_encoding(self.input_lstm_a, a_list, a_mask) yb_ori = self.input_encoding(self.input_lstm_b, b_list, b_mask) alpha, _ = self.make_alpha( ya_ori, yb_ori, a_mask, knowledge) # (minibatch, maxlen(a_list), maxlen(b_list)) beta, beta_r = self.make_alpha( yb_ori, ya_ori, b_mask, xp.swapaxes( knowledge, axis1=1, axis2=2)) # (minibatch, maxlen(b_list), maxlen(a_list)) ya_con, yb_con = self.kec(ya_ori, yb_ori, alpha, beta) # ya_loc = self.kelic(ya_ori, ya_con) yb_loc = self.kelic(yb_ori, yb_con, beta_r) h_start = self.modeling(self.modeling_start_lstm, yb_loc, b_mask) h_end = self.modeling(self.modeling_end_lstm, h_start, b_mask) batchsize, _, hidden_size = F.concat((yb_loc, h_start), axis=2).shape system_start = F.matmul(F.broadcast_to(self.W1, (batchsize, 1, hidden_size)), F.concat((yb_loc, h_start), axis=2), transb=True).reshape(batchsize, -1) system_end = F.matmul(F.broadcast_to(self.W2, (batchsize, 1, hidden_size)), F.concat((yb_loc, h_end), axis=2), transb=True).reshape(batchsize, -1) return system_start, system_end
def forward(self, e_var, s_var=None, mask=None, batch=1): """Core function of the Multi-head attention layer. Args: e_var (chainer.Variable): Variable of input array. s_var (chainer.Variable): Variable of source array from encoder. mask (chainer.Variable): Attention mask. batch (int): Batch size. Returns: chainer.Variable: Outout of multi-head attention layer. """ xp = self.xp if s_var is None: # batch, head, time1/2, d_k) Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k) K = self.linear_k(e_var).reshape(batch, -1, self.h, self.d_k) V = self.linear_v(e_var).reshape(batch, -1, self.h, self.d_k) else: Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k) K = self.linear_k(s_var).reshape(batch, -1, self.h, self.d_k) V = self.linear_v(s_var).reshape(batch, -1, self.h, self.d_k) scores = F.matmul(F.swapaxes(Q, 1, 2), K.transpose( 0, 2, 3, 1)) / np.sqrt(self.d_k) if mask is not None: mask = xp.stack([mask] * self.h, axis=1) scores = F.where(mask, scores, xp.full(scores.shape, MIN_VALUE, 'f')) self.attn = F.softmax(scores, axis=-1) p_attn = F.dropout(self.attn, self.dropout) x = F.matmul(p_attn, F.swapaxes(V, 1, 2)) x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k) return self.linear_out(x)
def evaluate_actions(self, actions): u_minus_mu = actions - self.mu a = - 0.5 * \ F.matmul(F.matmul( u_minus_mu[:, None, :], self.mat), u_minus_mu[:, :, None])[:, 0, 0] return a + F.reshape(self.v, (self.batch_size, ))
def calc_loss(self, sys_ys, ref_ys, dists): loss = wrapper.make_var([[0.0]]) sys_Tscore = wrapper.make_var([[0.0]]) ref_Tscore = wrapper.make_var([[0.0]]) sys_Tscore, sys_vecs = self.calc_trans_score( sys_ys) #chainer.Variable, 1hotvecがconcateされたもの sys_matrix = wrapper.make_var([sys_vecs]) ref_Tscore, ref_vecs = self.calc_trans_score( ref_ys) #chainer.Variable, 1hotvec ref_matrix = wrapper.make_var([ref_vecs]) dists_matrix = functions.concat(tuple(dists)) #異なるラベル数のカウント diff_cnt = wrapper.make_var([[0.0]]) for sys_y, ref_y in zip(sys_ys, ref_ys): if sys_y != ref_y: diff_cnt += wrapper.make_var([[1.0]]) #max 0 loss = functions.matmul(sys_matrix, dists_matrix, transb=True) + sys_Tscore\ - functions.matmul(ref_matrix, dists_matrix, transb=True) - ref_Tscore\ + self.__eta * diff_cnt """ debug print("sys_score trans : ", wrapper.get_data(functions.matmul(sys_matrix, dists_matrix, transb=True)), wrapper.get_data(sys_Tscore)) print("ref_score trans : ", wrapper.get_data(functions.matmul(ref_matrix, dists_matrix, transb=True)), wrapper.get_data(ref_Tscore)) print("diff_cnt penal : ",wrapper.get_data(diff_cnt), wrapper.get_data(self.__eta * diff_cnt)) """ return loss
def forward(self, x1, x2): xp = self.xp out_size = self.out_size batch_size, n1, d1 = x1.shape if not self.nobias[0]: x1 = F.concat((x1, xp.ones((batch_size, n1, 1), xp.float32)), axis=2) d1 += 1 n2, d2 = x2.shape[1:] if not self.nobias[1]: x2 = F.concat((x2, xp.ones((batch_size, n2, 1), xp.float32)), axis=2) d2 += 1 # (B * n1, d1) @ (d1, O * d2) => (B * n1, O * d2) x1W = F.matmul( F.reshape(x1, (batch_size * n1, d1)), F.reshape(F.transpose(self.W, (0, 2, 1)), (d1, out_size * d2))) # (B, n1 * O, d2) @ (B, d2, n2) => (B, n1 * O, n2) x1Wx2 = F.matmul(F.reshape(x1W, (batch_size, n1 * out_size, d2)), x2, transb=True) # => (B, n1, n2, O) y = F.transpose(F.reshape(x1Wx2, (batch_size, n1, out_size, n2)), (0, 1, 3, 2)) assert y.shape == (batch_size, n1, n2, out_size) if not self.nobias[2]: y += F.broadcast_to(self.b, y.shape) return y
def encode_decode_train(self, in_word_list, out_word_list, train=True): xp = cuda.cupy if self.gpuid >= 0 else np self.reset_state() # Add GO_ID, EOS_ID to decoder input decoder_word_list = [GO_ID] + out_word_list + [EOS_ID] # encode list of words/tokens enc_states = self.encode_list(in_word_list, train=train) # initialize decoder LSTM to final encoder state self.set_decoder_state() # decode and compute loss if not train: with chainer.no_backprop_mode(): # convert list of tokens into chainer variable list var_dec = (Variable( xp.asarray(decoder_word_list, dtype=np.int32).reshape( (-1, 1)))) # Initialise first decoded word to GOID pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32)) else: # convert list of tokens into chainer variable list var_dec = (Variable( xp.asarray(decoder_word_list, dtype=np.int32).reshape( (-1, 1)))) # Initialise first decoded word to GOID pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32)) # compute loss self.loss = 0 # decode tokens for next_word_var in var_dec[1:]: self.decode(pred_word, train=train) if self.attn == NO_ATTN: predicted_out = self.out(self[self.lstm_dec[-1]].h) else: #Add attention dot_score = F.matmul(enc_states, self[self.lstm_dec[-1]].h, transb=True) alpha_list = F.softmax(F.transpose(dot_score)) context_vector = F.matmul(alpha_list, enc_states) concat_vector = F.concat( (self[self.lstm_dec[-1]].h, context_vector), axis=1) predicted_out = self.out(self.attention_context(concat_vector)) # compute loss prob = F.softmax(predicted_out) pred_word = self.select_word(prob, train=train, sample=False) ''' ___QUESTION-1-DESCRIBE-E-START___ Explain what loss is computed with an example What does this value mean? ''' self.loss += F.softmax_cross_entropy(predicted_out, next_word_var) '''___QUESTION-1-DESCRIBE-E-END___''' report({"loss": self.loss}, self) return self.loss
def calc_score(labels, ts, dists): score = chainer.Variable(np.array([[0.0]], dtype=np.float32)) T_labels = chainer.Variable(np.array([[0.0]], dtype=np.float32)) T_ts = chainer.Variable(np.array([[0.0]], dtype=np.float32)) # make labels vector and labels transitions vector labels_vec = list() pre_label = None for label in labels: if not pre_label == None: T_labels += trans2para(pre_label, label) for i in range(label_num): if i == label: labels_vec.append(1) else: labels_vec.append(0) pre_label = label labels_matrix = make_chainer_matrix(labels_vec) # make true labels vector ts_vec = list() pre_label = None for t in ts: if not pre_label == None: T_ts += trans2para(pre_label, t) for i in range(label_num): if i == t: ts_vec.append(1) else: ts_vec.append(0) pre_label = t ts_matrix = make_chainer_matrix(ts_vec) dists_matrix = F.concat(tuple(dists)) #print(ts_vec) #print(labels_vec) #print(len(labels_matrix.data[0])) #print(len(ts_matrix.data[0])) #print(len(dists_matrix.data[0])) # make loss (difference between y_hat and y) diff_cnt = chainer.Variable(np.array([[0.0]], dtype=np.float32)) for i in range(len(labels)): if labels[i] != ts[i]: diff_cnt += chainer.Variable(np.array([[1.0]], dtype=np.float32)) correct = get_onehot(ts[i]) #print() #print(dists[i].data) #print(correct.data) #diff_cnt += F.softmax_cross_entropy(dists[i], correct) predict_score = F.matmul(labels_matrix, dists_matrix, transb=True) + T_labels true_score = F.matmul(ts_matrix, dists_matrix, transb=True) + T_ts score = predict_score - true_score + eta * diff_cnt #print('predict_score:', predict_score.data) #print('true_score:', true_score.data) #print('loss:', eta * diff_cnt.data) return score
def batch_global_rigid_transformation(Rs, Js, parent, rotate_base=False): """ Computes absolute joint locations given pose. rotate_base: if True, rotates the global rotation by 90 deg in x axis. if False, this is the original SMPL coordinate. Args: Rs: N x 24 x 3 x 3 rotation vector of K joints Js: N x 24 x 3, joint locations before posing parent: 24 holding the parent id for each index Returns new_J : `Tensor`: N x 24 x 3 location of absolute joints A : `Tensor`: N x 24 4 x 4 relative joint transformations for LBS. """ xp = Rs.xp N = Rs.shape[0] if rotate_base: print('Flipping the SMPL coordinate frame!!!!') rot_x = Variable([[1, 0, 0], [0, -1, 0], [0, 0, -1]], dtype=Rs.dtype) rot_x = F.reshape(F.tile(rot_x, [N, 1]), [N, 3, 3]) root_rotation = F.matmul(Rs[:, 0, :, :], rot_x) else: root_rotation = Rs[:, 0, :, :] # Now Js is N x 24 x 3 x 1 Js = F.expand_dims(Js, -1) def make_A(R, t, name=None): # Rs is N x 3 x 3, ts is N x 3 x 1 R_homo = F.pad(R, [[0, 0], [0, 1], [0, 0]], 'constant') t_homo = F.concat([t, xp.ones([N, 1, 1], 'f')], 1) return F.concat([R_homo, t_homo], 2) A0 = make_A(root_rotation, Js[:, 0]) results = [A0] for i in range(1, parent.shape[0]): j_here = Js[:, i] - Js[:, parent[i]] A_here = make_A(Rs[:, i], j_here) res_here = F.matmul(results[parent[i]], A_here) results.append(res_here) # 10 x 24 x 4 x 4 results = F.stack(results, axis=1) new_J = results[:, :, :3, 3] # --- Compute relative A: Skinning is based on # how much the bone moved (not the final location of the bone) # but (final_bone - init_bone) # --- Js_w0 = F.concat([Js, xp.zeros([N, 24, 1, 1], 'f')], 2) init_bone = F.matmul(results, Js_w0) # Append empty 4 x 3: init_bone = F.pad(init_bone, [[0, 0], [0, 0], [0, 0], [3, 0]], 'constant') A = results - init_bone return new_J, results
def __call__(self, h, g, step=0): mb, atom, ch = h.shape h_j = functions.expand_dims(h, 1) # h_j.shape == (mb, self.n_heads, atom, ch) h_j = functions.broadcast_to(h_j, (mb, self.n_heads, atom, ch)) # expand h_super # g_extend.shape (mb, 1, self.hidden_dim_super) g_extend = functions.expand_dims(g, 1) # g_extend.shape == (mb, self.n_heads, self.hidden_dim_super) g_extend = functions.broadcast_to( g_extend, (mb, self.n_heads, self.hidden_dim_super)) # g_extend.shape == (mb, self.n_heads, 1, self.hidden_dim_super) g_extend = functions.expand_dims(g_extend, 2) # update for attention-message B h_i # h (mb, atom, ch) # Bh_i.shape == (mb, atom, self.n_heads * self.hidden_dim_super) Bh_i = self.B(h) # Bh_i.shpae == (mb, atom, num_head, ch) Bh_i = functions.reshape( Bh_i, (mb, atom, self.n_heads, self.hidden_dim_super)) # Bh_i.shape == (mb, num_head, atom, ch) Bh_i = functions.transpose(Bh_i, [0, 2, 1, 3]) # take g^{T} * B * h_i # indexed by i # mb, self.n_haeds atom(i) # b_hi.shape == (mb, self.n_heads, 1, atom) # This will reduce the last hidden_dim_super axis b_hi = functions.matmul(g_extend, Bh_i, transb=True) # softmax. sum/normalize over the last axis. # mb, self.n_heda, atom(i-normzlied) # attention_i.shape == (mb, self.n_heads, 1, atom) attention_i = functions.softmax(b_hi, axis=3) if self.dropout_ratio > 0.0: attention_i = functions.dropout(attention_i, ratio=self.dropout_ratio) # element-wise product --> sum over i # mb, num_head, hidden_dim_super # attention_sum.shape == (mb, self.n_heads, 1, ch) attention_sum = functions.matmul(attention_i, h_j) # attention_sum.shape == (mb, self.n_heads * ch) attention_sum = functions.reshape(attention_sum, (mb, self.n_heads * ch)) # weighting h for different heads # intermediate_h.shape == (mb, self.n_heads * ch) # TODO (nakago): Consider to delete `V_super` maybe not necessary. # TODO (nakago): Consider to move `V_super` to calculate `h_j`?? h_trans = self.V_super(attention_sum) # compress heads h_trans = self.W_super(h_trans) # intermediate_h.shape == (mb, self.hidden_dim_super) h_trans = self.activation(h_trans) return h_trans
def test_invalid_shape(self): x_data = numpy.zeros((2, 3, 4), dtype=numpy.float32) y_data = numpy.zeros((1, 4, 3), dtype=numpy.float32) x = chainer.Variable(x_data) y = chainer.Variable(y_data) with self.assertRaises(type_check.InvalidType): F.matmul(x, y)
def _a(self, x, a): L_matrix = self._L_matrix(x) mu = self._mu(x) P_matrix = F.matmul(L_matrix, L_matrix, transb=True) a_minus_mu = (a - mu)[:, :, None] return -0.5 * F.matmul( a_minus_mu, F.matmul(P_matrix, a_minus_mu), transa=True)[:, 0]
def test_invalid_ndim(self): x_data = numpy.zeros((3, 2, 5), dtype=numpy.float32) y_data = numpy.zeros((3, 5), dtype=numpy.float32) x = chainer.Variable(x_data) y = chainer.Variable(y_data) with self.assertRaises(type_check.InvalidType): F.matmul(x, y)
def spectral_normalize(weight, init_u): W = weight.reshape(weight.shape[0], -1) #C x N v = F.normalize(F.matmul(W, init_u, transa=True), eps=1e-12, axis=0) #N x C * C x 1 -> N x 1 u = F.normalize(F.matmul(W, v), eps=1e-12, axis=0) #C x N * N x 1 -> C x 1 sigma = F.matmul(F.matmul(u, W, transa=True), v) #1 x C * C x N * N x -> 1 x 1 (spectral norm) return weight / sigma
def update(self, data_x): u = F.matmul(self.Wi, F.vstack(((xp.array([[1]], dtype=xp.float32), data_x)))) r_tld = self.resv_act(F.matmul(self.Wr, self.resv) + u) new_r = (1 - self.leaking_rate) * self.resv \ + self.leaking_rate * r_tld self.resv = new_r return new_r
def __call__(self, s, a): L_matrix = self._L(s) mu = self._mu(s) P_matrix = F.matmul(L_matrix, L_matrix, transb=True) a_minus_mu = (a - mu)[:, :, None] return -0.5 * F.matmul( a_minus_mu, F.matmul(P_matrix, a_minus_mu), transa=True)[:, 0]
def encode_decode_train(self, in_word_list, out_word_list, train=True, sample=False): xp = cuda.cupy if self.gpuid >= 0 else np self.reset_state() # Add GO_ID, EOS_ID to decoder input decoder_word_list = [GO_ID] + out_word_list + [EOS_ID] # encode list of words/tokens enc_states = self.encode_list(in_word_list, train=train) # initialize decoder LSTM to final encoder state self.set_decoder_state() # decode and compute loss # convert list of tokens into chainer variable list var_dec = (Variable(xp.asarray(decoder_word_list, dtype=np.int32).reshape((-1, 1)), volatile=not train)) # Initialise first decoded word to GOID pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32), volatile=not train) # compute loss self.loss = 0 # decode tokens for next_word_var in var_dec[1:]: self.decode(pred_word, train=train) if self.attn == NO_ATTN: predicted_out = self.out(self[self.lstm_dec[-1]].h) else: # __QUESTION Add attention pass c = F.matmul((self[self.lstm_dec[-1]].h), enc_states, transb=True) score = F.softmax(c) ct = F.matmul(score, enc_states) s = F.concat((ct, (self[self.lstm_dec[-1]].h))) hs = F.tanh(s) predict = self.attention(hs) predicted_out = self.out(predict) # compute loss prob = F.softmax(predicted_out) pred_word = self.select_word(prob, train=train, sample=False) # pred_word = Variable(xp.asarray([pred_word.data], dtype=np.int32), volatile=not train) ''' ___QUESTION-1-DESCRIBE-E-START___ Explain what loss is computed with an example What does this value mean? ''' self.loss += F.softmax_cross_entropy(predicted_out, next_word_var) '''___QUESTION-1-DESCRIBE-E-END___''' report({"loss": self.loss}, self) return self.loss
def calc_score(labels, ts, dists): score = chainer.Variable(np.array([[0.0]], dtype=np.float32)) T_labels = chainer.Variable(np.array([[0.0]], dtype=np.float32)) T_ts = chainer.Variable(np.array([[0.0]], dtype=np.float32)) # make labels vector and labels transitions vector labels_vec = list() pre_label = None for label in labels: if not pre_label == None: T_labels += trans2para(pre_label, label) for i in range(label_num): if i == label: labels_vec.append(1) else: labels_vec.append(0) pre_label = label labels_matrix = make_chainer_matrix(labels_vec) # make true labels vector ts_vec = list() pre_label = None for t in ts: if not pre_label == None: T_ts += trans2para(pre_label, t) for i in range(label_num): if i == t: ts_vec.append(1) else: ts_vec.append(0) pre_label = t ts_matrix = make_chainer_matrix(ts_vec) dists_matrix = F.concat(tuple(dists)) #print('gold_labels:',ts_vec) #print('labels:', labels_vec) #print('labels_matrix.data[0]:',labels_matrix.data[0]) #print(len(ts_matrix.data[0])) #print(len(dists_matrix.data[0])) # make loss (difference between y_hat and y) diff_cnt = chainer.Variable(np.array([[0.0]], dtype=np.float32)) for i in range(len(labels)): if labels[i]!=ts[i]: diff_cnt += chainer.Variable(np.array([[1.0]], dtype=np.float32)) correct = get_onehot(ts[i]) #print() #print(dists[i].data) #print(correct.data) #diff_cnt += F.softmax_cross_entropy(dists[i], correct) predict_score = F.matmul(labels_matrix, dists_matrix, transb=True)+ T_labels true_score = F.matmul(ts_matrix, dists_matrix, transb=True) + T_ts score = predict_score - true_score + eta * diff_cnt #print('predict_score:', predict_score.data) #print('true_score:', true_score.data) #print('loss:', eta * diff_cnt.data) return score
def occupancy_net_loss(self, occupancy_net, depth, theta, z): R = theta[:, :3, :3] t = theta[:, :3, -1:] depth = depth.reshape(depth.shape[0], 1, -1) eps = self.xp.random.normal(0, 0.05, size=depth.shape) real_pos = F.matmul(F.matmul(R, self.inv_K), (depth + eps) * self.p) + t label = (eps > 0).reshape(-1, 1).astype("int32") occupancy_field = occupancy_net(z, real_pos + eps) return F.sigmoid_cross_entropy(occupancy_field, label)
def node2edge(self, x, rel_rec, rel_send): # NOTE: Assumes that we have the same graph across all samples. # x: [batch_size, num_nodes, feature_dim] # rel_rec, rel_send: [num_edges, num_nodes] receivers = F.matmul(rel_rec, x) senders = F.matmul(rel_send, x) # receivers, senders: [batch_size, num_edges, feature_dim] edges = F.concat([receivers, senders], axis=2) # along num_edges return edges
def word_attention(emb_a, emb_b): A = F.matmul(emb_a, emb_b, transb=True) A = F.sum(A, axis=-1, keepdims=True) A = F.softmax(A, axis=-2) B, N, C = emb_a.shape wf = F.matmul(emb_a, A, transa=True) wf = F.transpose(wf, axes=(0, 2, 1)) return wf, A
def house_transform(self,z): vec_t = self.qh_vec_0 for i in range(self.num_trans): vec_t = F.identity(self.qlin_h_vec_t(vec_t)) vec_t_product = F.matmul(vec_t, vec_t, transb=True) vec_t_norm_sqr = F.tile(F.sum(F.square(vec_t)), (z.shape[0], z.shape[1])) z = z - 2*F.matmul(vec_t_product, z)/vec_t_norm_sqr return z
def scaled_dot_product_attention(queries, keys, values, scale=1., mask=None): x1 = F.matmul(queries, keys, transb=True) * xp.array(scale, dtype=keys.dtype) x2 = F.where(mask, xp.ones_like(x1.array) * -xp.inf, x1) if mask is not None else x1 x3 = F.softmax(x2, axis=-1) x4 = F.matmul(x3, values) return x4
def st_graph_output(self, f_A, f_G): # f_A shape = (N,D), f_G shape = (N,4) assert f_A.shape[0] == f_G.shape[0] if self.add_self: assert f_A.shape[1] == self.out_size N = f_G.shape[0] assert N % self.frame_node_num == 0 T = N // self.frame_node_num geo_dim = f_G.shape[1] f_A_orig = f_A f_G = F.reshape(f_G, (T, self.frame_node_num, geo_dim)) f_A = F.reshape(f_A, (T, self.frame_node_num, f_A.shape[-1])) assert f_A_orig.ndim == 2, f_A_orig.ndim f_R = [] for nr in range(self.num_relations): f_G_1 = F.tile(f_G, (1, 1, F)) # shape = (T, F, 4 * F) f_G_1 = F.reshape( f_G_1, (T, self.frame_node_num** 2, geo_dim)) # after tile: (T, F, (4 x F)) then (T,F^2,4) f_G_2 = F.tile(f_G, (1, F, 1)) # shape = (T, F*F, 4) encoded_offset = self.encode_box_offset(f_G_1.reshape( -1, geo_dim), f_G_2.reshape(-1, geo_dim)) # shape = (TxFxF, 4) # paper formula (5), shape = (T,F,F) w_G = F.relu( getattr(self, self.W_G_lst[nr])(self.position_encoding( encoded_offset, self.d_g))) # TxFxF,1 w_G = F.reshape(w_G, shape=(T, self.frame_node_num, self.frame_node_num)) # shape = (T,F,F) # paper formula (4), shape = (N,N) w_K_result = getattr(self, self.W_K_lst[nr])(f_A_orig).reshape( T, self.frame_node_num, self.d_k) # shape = (T, F, d_k) w_Q_transpose_result = F.transpose(getattr( self, self.W_Q_lst[nr])(f_A_orig).reshape(T, self.frame_node_num, self.d_k), axes=(0, 2, 1)) # shape = (T, d_k, F) w_A = F.matmul(w_K_result, w_Q_transpose_result) # shape = (T,F,F) w_A = w_A + F.log(w_G) # paper formula (3), shape = (T,F,F) w = F.softmax( w_A, axis=2 ) # original paper formula (3) is weighted softmax, because chainer does not provide such weighed-softmax # we instead only element-wise dot here, then softmax # w = w_G * F.exp(w_A) / F.sum(w_G * F.exp(w_A), axis=1) # denominator shape = (N,1) numerator shape = (N,N) # paper formula (2), weight sum = matmul:(T,F,F) x (T, F, out_size//nr) = (T, F, out_size//nr) f_R_nr = F.matmul( w, getattr(self, self.W_V_lst[nr])(f_A_orig).reshape( T, self.frame_node_num, self.w_v_outsize)) f_R.append(f_R_nr) if self.add_self: return f_A + F.concat(f_R, axis=2).reshape(N, self.out_size) return F.concat(f_R, axis=2).reshape(N, self.out_size)
def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send, single_timestep_rel_type): # single_timestep_inputs: [batch_size, num_sequences, num_nodes, feature_dims] # single_timestep_rel_type: [batch_size, num_sequences, num_edges, edge_types] batch_size, num_sequences, num_edges, _ = single_timestep_rel_type.shape _, num_nodes = rel_rec.shape # Node2edge # rel_rec: [num_edges, num_nodes] # rel_send: [num_edges, num_nodes] receivers = F.matmul(rel_rec, single_timestep_inputs) senders = F.matmul(rel_send, single_timestep_inputs) pre_msg = F.concat([receivers, senders], axis=-1) # pre_msg: [batch_size, num_sequences, num_edges, 2 * feature_dims] pre_msg = F.reshape(pre_msg, [batch_size * num_sequences * num_edges, -1]) all_msgs = chainer.Variable( pre_msg.xp.zeros( (batch_size, num_sequences, num_edges, self.msg_out_shape), dtype=single_timestep_rel_type.dtype)) if self.skip_first_edge_type: start_idx = 1 else: start_idx = 0 # Run separate MLP for every edge type # NOTE: To exlude one edge type, simply offset range by 1 for i in range(start_idx, len(self.msg_fc2)): msg = F.relu(self.msg_fc1[i](pre_msg)) msg = F.dropout(msg, self.dropout_prob) msg = F.relu(self.msg_fc2[i](msg)) # msg: [batch_size * num_sequences * num_edges, msg_hid] msg = F.reshape(msg, [batch_size, num_sequences, num_edges, -1]) msg = msg * single_timestep_rel_type[:, :, :, i:i + 1] all_msgs += msg # Aggregate all msgs to receiver # all_msgs: [batch_size, num_sequences, num_edges, msg_out_shape] # rel_rec: [num_edges, num_nodes] agg_msgs = F.matmul(rel_rec.T, all_msgs) # Skip connection aug_inputs = F.concat([single_timestep_inputs, agg_msgs], axis=-1) # aug_inputs: [batch_size, num_sequences, num_nodes, msg_out_shape + feature_dims] aug_inputs = F.reshape(aug_inputs, [batch_size * num_sequences * num_nodes, -1]) # Output MLP pred = F.dropout(F.relu(self.out_fc1(aug_inputs)), self.dropout_prob) pred = F.dropout(F.relu(self.out_fc2(pred)), self.dropout_prob) pred = self.out_fc3(pred) pred = F.reshape(pred, [batch_size, num_sequences, num_nodes, -1]) # Predict position/velocity difference return single_timestep_inputs + pred
def forward(self, x, q, is_linear=False): # Random Noise for Learing Time invariance if chainer.configuration.config.train: xp = chainer.cuda.get_array_module(x) z = xp.zeros((1,x.shape[1]), dtype=numpy.float32) i = 0 while i<x.shape[0]: if numpy.random.rand(1)[0]<0.1: x = xp.vstack((x[:i], z, x[i:])) i += 1 i += 1 max_knowledge, D = self.temporal_a.shape if len(x)>max_knowledge: x = x[len(x)-max_knowledge:] j = max_knowledge-len(x) if self.pe: a = xp.arange(1,0,-1/D) b = xp.arange(-1,1,2/D) M = a * F.matmul(x[:,:self.V], self.embedid_a) + b * F.matmul(x[:,self.V:], self.embedid_a) + self.temporal_a[j:] C = a * F.matmul(x[:,:self.V], self.embedid_c) + b * F.matmul(x[:,self.V:], self.embedid_c) + self.temporal_c[j:] else: M = F.matmul(x[:,:self.V], self.embedid_a) + self.temporal_a[j:] C = F.matmul(x[:,:self.V], self.embedid_c) + self.temporal_c[j:] U = F.matmul(q.reshape(1,-1), self.embedid_b) for l in range(self.layer): P = F.transpose(F.matmul(M,U[0])) if not is_linear: P = F.softmax(P) O = F.matmul(P,C) if l == self.layer-1: U = U + O else: U = self.H(U) + O return self.W(U) # (1,D)
def norm(x): """ ベクトルの正規化 x -> x/|x| """ s = F.sum(x**2, axis=1) ** 0.5 # [a,b]^T height = s.data.shape[0] a = Variable(np.ones((1, height), dtype=np.float32)) # [1,1] eye = Variable(np.eye(height, dtype=np.float32)) b = F.inv(F.matmul(s, a) * eye) # [1/a, 0; 0, 1/b] return F.matmul(b, x)
def ls_solution(g0, g1): """ Get least-squares solution matrix for regression from rows of g0 to rows of g1. Both g0 and g1 are chainer's Variable. """ g0t = F.transpose(g0) if g0.shape[0] >= g0.shape[1]: g0pinv = F.matmul(F.inv(F.matmul(g0t, g0)), g0t) else: g0pinv = F.matmul(g0t, F.inv(F.matmul(g0, g0t))) K = F.transpose(F.matmul(g0pinv, g1)) return K
def translate(self, xs, max_length=100): xs = numpy.insert(xs, 0, 2) xs = numpy.append(xs, 0) with chainer.no_backprop_mode(), chainer.using_config('train', False): exs = self.embed_x(Variable(self.xp.array(xs, dtype=self.xp.int32))) h = F.expand_dims(exs, axis=0) h = F.expand_dims(h, axis=0) h = F.transpose(h, (0, 1, 3, 2)) for i in range(self.stack): h = self.gcnn[i](h) h = F.squeeze(h, axis=1) h = F.squeeze(h, axis=0) h = F.transpose(h, (1, 0)) ys = self.xp.full(1, 2, self.xp.int32) result = [] hx = None cx = None hx2 = None cx2 = None for i in range(max_length): eys = self.embed_y(ys) eyys = self.embed_yy(ys) eys2 = [eys] eyys2 = [eyys] hx, cx, ss = self.decoder(hx, cx, eys2) hx2, cx2, ss2 = self.decoder2(hx2, cx2, eyys2) batch_A = F.matmul(h, ss[0], transb=True) * self.scale_score batch_A = F.softmax(batch_A, axis=0) if self.weight: with open("weight/wei.txt", "a", encoding="utf-8") as f: for j in range(len(batch_A)): f.write(str(batch_A[j][0].data) + "\n") f.write("--------------\n") s = F.matmul(batch_A, h, transa=True) t = (self.We(s) + self.Ws(ss2[0])) ys = self.xp.argmax(t.data, axis=1).astype(self.xp.int32) if ys[0] == 0: break result.append(ys) result = cuda.to_cpu( self.xp.concatenate([self.xp.expand_dims(x, 0) for x in result]).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def obtain_loss(self, lambda_val, P1, W1, X, P2, W2, Y, A, R, alpha, beta): loss = 0 loss += lambda_val * F.sum( F.square(F.matmul(P1, F.transpose(self.u.W)) - F.matmul(W1, X))) loss += lambda_val * F.sum( F.square(F.matmul(P2, F.transpose(self.v.W)) - F.matmul(W2, Y))) loss += alpha * F.sum( F.square(A * (R - F.matmul(self.u.W, F.transpose(self.v.W))))) loss += beta * ( (F.sum(F.square(self.u.W)) + F.sum(F.square(self.v.W)))) return loss
def __call__(self, e1, e2): ele2 = F.reshape( F.batch_matmul(e1[:,:,None], e2[:,None,:]), (-1, self.in_size1 * self.in_size2)) res = F.matmul(ele2, F.reshape(self.W, (self.in_size1 * self.in_size2, self.out_size))) + \ F.matmul(e1, self.V1) + \ F.matmul(e2, self.V2) res, bias = F.broadcast(res, self.b) return res + bias
def __call__(self, text, x, t, textlens, xlens): batchsize = text.shape[0] vk = self.text_enc(text) v = vk[:, :self.d, :] k = vk[:, self.d:, :] q = self.audio_enc(x) a = F.matmul(F.transpose(k, (0, 2, 1)), q) a = F.softmax(a / self.xp.sqrt(self.d)) r = F.matmul(v, a) rd = F.concat((r, q)) y = self.audio_dec(rd) loss_bin = 0 for i in range(batchsize): loss_bin += F.mean( F.bernoulli_nll(t[i, :, :xlens[i]], y[i, :, :xlens[i]], 'no')) loss_bin /= batchsize y = F.sigmoid(y) loss_l1 = 0 for i in range(batchsize): loss_l1 += F.mean_absolute_error(t[i, :, :xlens[i]], y[i, :, :xlens[i]]) loss_l1 /= batchsize loss_att = 0 for i in range(batchsize): N = textlens[i] T = xlens[i] def w_fun(n, t): return 1 - np.exp(-((n / (N - 1) - t / (T - 1))**2) / (2 * self.g**2)) w = np.fromfunction(w_fun, (a.shape[1], T), dtype='f') w = self.xp.array(w) loss_att += F.mean(w * a[i, :, :T]) loss_att /= batchsize loss = loss_bin + loss_l1 + loss_att chainer.reporter.report({ 'loss_bin': loss_bin, 'loss_l1': loss_l1, 'loss_att': loss_att, }) return loss, y, a
def global_attention_layer(self, dec_h, attention): """ https://nlp.stanford.edu/pubs/emnlp15_attn.pdf :param dec_h: デコーダの中間層(内部状態) :param attention: エンコーダの中間層(内部状態) :return: """ weights = F.softmax(F.matmul(dec_h, attention, transb=True)) # Global align weights contexts = F.matmul(weights, attention) # Context vector(Attention layer output) o = F.tanh(self.attention(F.concat((contexts, dec_h)))) # Attentionとデコーダの中間層の合成 return self.y(o)
def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send, single_timestep_rel_type): # single_timestep_inputs: [batch_size, num_sequences, num_nodes, feature_dims] # single_timestep_rel_type: [batch_size, num_sequences, num_edges, edge_types] batch_size, num_sequences, num_edges, _ = single_timestep_rel_type.shape _, num_nodes = rel_rec.shape # Node2edge # rel_rec: [num_edges, num_nodes] # rel_send: [num_edges, num_nodes] receivers = F.matmul(rel_rec, single_timestep_inputs) senders = F.matmul(rel_send, single_timestep_inputs) pre_msg = F.concat([receivers, senders], axis=-1) # pre_msg: [batch_size, num_sequences, num_edges, 2 * feature_dims] pre_msg = F.reshape(pre_msg, [batch_size * num_sequences * num_edges, -1]) all_msgs = chainer.Variable( pre_msg.xp.zeros((batch_size, num_sequences, num_edges, self.msg_out_shape), dtype=single_timestep_rel_type.dtype)) if self.skip_first_edge_type: start_idx = 1 else: start_idx = 0 # Run separate MLP for every edge type # NOTE: To exlude one edge type, simply offset range by 1 for i in range(start_idx, len(self.msg_fc2)): msg = F.relu(self.msg_fc1[i](pre_msg)) msg = F.dropout(msg, self.dropout_prob) msg = F.relu(self.msg_fc2[i](msg)) # msg: [batch_size * num_sequences * num_edges, msg_hid] msg = F.reshape(msg, [batch_size, num_sequences, num_edges, -1]) msg = msg * single_timestep_rel_type[:, :, :, i:i + 1] all_msgs += msg # Aggregate all msgs to receiver # all_msgs: [batch_size, num_sequences, num_edges, msg_out_shape] # rel_rec: [num_edges, num_nodes] agg_msgs = F.matmul(rel_rec.T, all_msgs) # Skip connection aug_inputs = F.concat([single_timestep_inputs, agg_msgs], axis=-1) # aug_inputs: [batch_size, num_sequences, num_nodes, msg_out_shape + feature_dims] aug_inputs = F.reshape(aug_inputs, [batch_size * num_sequences * num_nodes, -1]) # Output MLP pred = F.dropout(F.relu(self.out_fc1(aug_inputs)), self.dropout_prob) pred = F.dropout(F.relu(self.out_fc2(pred)), self.dropout_prob) pred = self.out_fc3(pred) pred = F.reshape(pred, [batch_size, num_sequences, num_nodes, -1]) # Predict position/velocity difference return single_timestep_inputs + pred
def calculate_max_singular_value(weight_matrix, u, v): """Calculate max singular value by power iteration method. Args: weight_matrix (~chainer.Variable) u (numpy.ndarray or cupy.ndarray) v (numpy.ndarray or cupy.ndarray) Returns: ~chainer.Variable: Max singular value via power iteration method. """ sigma = F.matmul(F.matmul(u, weight_matrix), v) return sigma
def attention_history(self, dL, cue, train=True): D = F.concat(dL, axis=0) D, Cue = F.broadcast(D, cue) S = self.m(F.tanh(self.W_dm(D) + Cue)) S = F.softmax(F.reshape(S, (1, len(dL)))) pre_v = F.matmul(S, D) return pre_v
def forward(self, doc, wrd, window=5): doc, wrd = utils.move(self.xp, doc, wrd) proportions = self.proportions(doc) ld = dirichlet_likelihood(self.proportions.W) context = F.matmul(F.softmax(proportions), self.factors()) loss = self.loss_func(context, wrd) return loss, ld
def __call__(self, x): # x.shape == (batchsize, 3, 128, 64) batchsize = x.shape[0] h = F.elu(self.bn1(self.conv1_1(x))) h = F.elu(self.bn2(self.conv1_2(h))) h = F.max_pooling_2d(h, 3, 2, cover_all=False) h = self.conv2_1(h) h = self.conv2_3(h) h = self.conv3_1(h) h = self.conv3_3(h) h = self.conv4_1(h) h = self.conv4_3(h) h = h.reshape(batchsize, -1) h = F.dropout(h, ratio=0.6) h = F.elu(self.fc1_bn(self.fc1(h))) # Features in rows, normalize axis 1. weights = self.mean_vectors features = self.ball(h) features = F.normalize(features, eps=1e-8) scale = F.softplus(self.scale) normalized_weight = F.normalize(weights, axis=0, eps=1e-8) logits = F.tile(scale[None, ], (batchsize, 1)) * \ F.matmul(features, normalized_weight) return logits
def setUp(self): self.x1 = numpy.random.uniform(.5, 1, (m,)).astype(numpy.float32) self.x2 = numpy.random.uniform(.5, 1, (m,)).astype(numpy.float32) self.gy = numpy.random.uniform(-1, 1, (m, m)).astype(numpy.float32) self.op = lambda x, y: F.matmul(x, y, transb=True) self.forward_answer = numpy.dot( self.x1.reshape(m, 1), self.x2.reshape(1, m))
def _log_prob_words(self, context, temperature=1.0): """ This calculates an softmax over the vocabulary as a function of the dot product of context and word. """ dot = F.matmul(context, F.transpose(self.vocab.W)) prob = F.softmax(dot / temperature) return F.log(prob)
def __call__(self, h, adj): """ Args: h: (batchsize, num_nodes, in_channels) adj: (batchsize, num_edge_type, num_nodes, num_nodes) Returns: (batchsize, num_nodes, ch) """ mb, node, ch = h.shape # --- self connection, apply linear function --- hs = self.graph_linear_self(h) # --- relational feature, from neighbor connection --- # Expected number of neighbors of a vertex # Since you have to divide by it, if its 0, you need to # arbitrarily set it to 1 m = self.graph_linear_edge(h) m = functions.reshape( m, (mb, node, self.out_channels, self.num_edge_type)) m = functions.transpose(m, (0, 3, 1, 2)) # m: (batchsize, edge_type, node, ch) # hrL (batchsize, edge_type, node, ch) hr = functions.matmul(adj, m) # hr: (batchsize, node, ch) hr = functions.sum(hr, axis=1) return hs + hr
def memory(self, x_input, query, layer): m = self.encode_input(x_input,layer) # memory for input c = self.encode_output(x_input) # memory for output if layer == 1: u = self.encode_query(query) # memory for query else: u = query # print "m.data.shape", m.data.shape # (50,20) # print "u.data.shape", u.data.shape # (1,20) mu = F.matmul(m, u, transb=True) # mを転置して内積をとる p = F.softmax(mu) # 文の重要度p(アテンション) # print p.data.shape # (50,1) # print c.data.shape # (50,20) o = F.matmul(p, c, transa=True) # cとpのweighted sum # print o.data.shape # (1,20) return (u+o)
def cos(a, b): """ cos-similarity """ height = a.data.shape[0] c = F.matmul(norm(a), norm(b), transb=True) eye = Variable(np.eye(height, dtype=np.float32)) return F.sum(c * eye, axis=0)
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ return F.matmul(x, self.W_polarity)
def trans2para(pre_label, next_label): pre_label = chainer.Variable(np.array([pre_label], dtype=np.int32)) trans_vec = model.trans(pre_label) onehot = [1 if l == next_label else 0 for l in range(label_num)] onehot = chainer.Variable(np.array([onehot], dtype=np.float32)) trans_para = F.matmul(onehot, F.softmax(trans_vec), transb=True) return trans_para
def __call__(self, x,train = True): batchsize = 100 ones = chainer.Variable(np.array([[1]]*batchsize).astype(np.float32)) pre_f = self.W_graph(val_y) pref_ones = F.dropout(F.matmul(ones,pre_f),train = train) polarity_Mat = x * pref_ones #polarity_Mat = self.W_graph(x) pre_y = self.W_out(polarity_Mat) return pre_y
def compute_A_P(a,p): #compute matrix P conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': p}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']) P = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]] #compute matrix A conv1_1,conv2_1, conv3_1, conv4_1,conv5_1, = func(inputs={'data': a}, outputs=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']) conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0, = [ reshape2(x) for x in [conv1_1,conv2_1, conv3_1, conv4_1,conv5_1]] A = [ Fu.matmul(x, x, transa=False, transb=True) for x in [conv1_1A0,conv2_1A0, conv3_1A0, conv4_1A0,conv5_1A0]] return A,P
def forward(self, ids, bow): bow, ids = utils.move(self.xp, bow, ids) proportions = self.proportions(ids) ld = dirichlet_likelihood(proportions) doc = F.matmul(F.softmax(proportions), self.factors()) logp = F.dropout(self.embedding(doc)) # loss = -F.sum(bow * F.log_softmax(logp)) sources, targets, counts = [], [], [] lpi = F.sum(bow * F.log_softmax(logp), axis=1) loss = -F.sum(lpi) return loss, ld
def __call__(self, x_input, query, answer, train=True): m = self.encode_input(x_input) # memory for input u = self.encode_query(query) # memory for query c = self.encode_output(x_input) # memory for output # print "m.data.shape", m.data.shape # (50,20) # print "u.data.shape", u.data.shape # (1,20) # mとuの内積 mu = F.matmul(m, u, transb=True) # mを転置して内積をとる # 文の重要度p(アテンション) p = F.softmax(mu) # print p.data.shape # (50,1) # print c.data.shape # (50,20) o = F.matmul(p, c, transa=True) # cとpのweighted sum # print o.data.shape # (1,20) predict = self.W(u+o) # print "answer.shape,predict.shape:", answer.shape,predict.data.shape if train: return F.softmax_cross_entropy(predict, answer) else: return F.accuracy(predict, answer)
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: self._initialize_params(x.shape[0]) batch_size = x.data.shape[0] batch_ones = chainer.Variable(xp.ones((batch_size,1)).astype(np.float32)) return x*(F.matmul(batch_ones, self.W))
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None): mid_orig = nn.forward(Variable(img_orig, volatile=True)) style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))] if img_gen is None: if args.gpu >= 0: img_gen = xp.random.uniform(-20,20,(1,3,width,width),dtype=np.float32) else: img_gen = np.random.uniform(-20,20,(1,3,width,width)).astype(np.float32) x = Variable(img_gen) xg = xp.zeros_like(x.data) optimizer = optimizers.Adam(alpha=lr) optimizer.setup((img_gen,xg)) for i in range(max_iter): x = Variable(img_gen) y = nn.forward(x) optimizer.zero_grads() L = Variable(xp.zeros((), dtype=np.float32)) for l in range(len(y)): ch = y[l].data.shape[1] wd = y[l].data.shape[2] gogh_y = F.reshape(y[l], (ch,wd**2)) gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2) L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data)) L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y)) L += L1+L2 if i%100==0: print i,l,L1.data,L2.data L.backward() xg += x.grad optimizer.update() tmp_shape = img_gen.shape if args.gpu >= 0: img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen else: def clip(x): return -120 if x<-120 else (136 if x>136 else x) img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen if i%3000==0: save_image(img_gen, W, nw, nh, i)
def setUp(self): self.x1 = numpy.random.uniform(.5, 1, self.x1_shape) self.x1 = self.x1.astype(self.x1_dtype) self.x2 = numpy.random.uniform(.5, 1, self.x2_shape) self.x2 = self.x2.astype(self.x2_dtype) ret_dtype = numpy.result_type(self.x1_dtype, self.x2_dtype) self.gy = numpy.random.uniform(-1, 1, self.gy_shape).astype(ret_dtype) self.ggx1 = numpy.random.uniform( .5, 1, self.x1_shape).astype(self.x1_dtype) self.ggx2 = numpy.random.uniform( .5, 1, self.x2_shape).astype(self.x2_dtype) self.op = lambda x, y: F.matmul(x, y, transa=self.transa, transb=self.transb) self.forward_answer = self._get_forward_answer(self.x1, self.x2, self.transa, self.transb)
def __call__(self, doc_ids): """ Given an array of document integer indices, returns a vector for each document. The vector is composed of topic weights projected onto topic vectors. Args: doc_ids : chainer.Variable One-dimensional batch vectors of IDs Returns: doc_vector : chainer.Variable Batch of two-dimensional embeddings for every document. """ # (batchsize, ) --> (batchsize, multinomial) proportions = self.proportions(doc_ids, softmax=True) # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim) factors = F.dropout(self.factors(), ratio=self.dropout_ratio) w_sum = F.matmul(proportions, factors) return w_sum
def __call__(self, doc_ids): """ Given an array of document integer indices, returns a vector for each document. The vector is composed of topic weights projected onto topic vectors. Args: doc_ids (~chainer.Variable): One-dimensional batch vectors of IDs Returns: ~chainer.Variable: Batch of two-dimensional embeddings for every document. """ # (batchsize, ) --> (batchsize, logweights) w = self.weights(doc_ids) # (batchsize, logweights) --> (batchsize, multinomial) multi = F.softmax(w) # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim) w_sum = F.matmul(multi, self.factors()) return w_sum