def __call__(self, annotion_list, back_word_list, p): """ Calculate the annotion and back word value :param annotion_list: :param back_word_list: :param p: hidden value :return: """ batch_size = p.data.shape[0] exponential_list = [] sum_exponential = XP.fzeros((batch_size, 1)) # Calculate the total value list and total value # Prepare the Convoluation for annotion, back_word in zip(annotion_list, back_word_list): weight = functions.tanh(self.annotion_weight(annotion) + self.back_weight(back_word) + self.pw(p)) exponential = functions.exp(self.weight_exponential(weight)) exponential_list.append(exponential) sum_exponential += exponential ZEROS = XP.fzeros((batch_size, self.hidden_size)) annotion_value = ZEROS back_word_value = ZEROS # Calculate the Convolution Value each annotion and back word for annotion, back_word, exponential in zip(annotion_list, back_word_list, exponential_list): exponential /= sum_exponential annotion_value += functions.reshape(functions.batch_matmul(annotion, exponential), (batch_size, self.hidden_size)) back_word_value += functions.reshape(functions.batch_matmul(back_word, exponential), (batch_size, self.hidden_size)) return annotion_value, back_word_value
def step(self, x, rnn_states, encoder_states, train): new_states = [] h_in = self.word_emb(x) for i, (rnn, state) in enumerate(zip(self.rnns, rnn_states)): if self.gru: h = state state = rnn(h, h_in) h_in = state else: c, h = state state = rnn(c, h, h_in) _, h_in = state new_states.append(state) if i < len(self.rnns) - 1: if self.dropout_ratio > 0: h_in = F.dropout(h_in, self.dropout_ratio, train) batch_size, input_length, hidden_dim = encoder_states.data.shape h_in_linear = self.phi1_linear(h_in) # (batch_size, hidden_dim) h_in_linear_tanh = F.tanh(h_in_linear) # (batch_size, hidden_dim) unnormalized_weights = F.reshape(F.batch_matmul(encoder_states, h_in_linear_tanh), (batch_size, input_length)) # (batch, input_length) normalized_weights = F.softmax(unnormalized_weights) # (batch, input_length) encoder_context = F.reshape(F.batch_matmul(encoder_states, normalized_weights, transa=True), (batch_size, hidden_dim)) # (batch, hidden_dim) encoder_context_h_in = F.concat([encoder_context, h_in], axis=1) # (batch, hidden_dim * 2) y = self.softmax_linear(F.relu(encoder_context_h_in)) # Is ReLU here really necessary? return y, normalized_weights, new_states
def __call__(self, fs, bs, h): ''' Attentionの計算 :param fs: 順向きのEncoderの中間ベクトルが記録されたリスト :param bs: 逆向きのEncoderの中間ベクトルが記録されたリスト :param h: Decoderで出力された中間ベクトル :return: 順向きのEncoderの中間ベクトルの加重平均と逆向きのEncoderの中間ベクトルの加重平均 ''' batch_size = h.data.shape[0] # ミニバッチのサイズを記憶 ws = [] # ウェイトを記録するためのリストの初期化 sum_w = Variable(xp.zeros((batch_size, 1), dtype='float32')) # ウェイトの合計値を計算するための値を初期化 # Encoderの中間ベクトルとDecoderの中間ベクトルを使ってウェイトの計算 for f, b in zip(fs, bs): w = F.tanh(self.fh(f)+self.bh(b)+self.hh(h)) # 順向きEncoderの中間ベクトル、逆向きEncoderの中間ベクトル、Decoderの中間ベクトルを使ってウェイトの計算 w = F.exp(self.hw(w)) # softmax関数を使って正規化する ws.append(w) # 計算したウェイトを記録 sum_w += w # 出力する加重平均ベクトルの初期化 att_f = Variable(xp.zeros((batch_size, self.hidden_size), dtype='float32')) att_b = Variable(xp.zeros((batch_size, self.hidden_size), dtype='float32')) for f, b, w in zip(fs, bs, ws): w /= sum_w # ウェイトの和が1になるように正規化 # ウェイト * Encoderの中間ベクトルを出力するベクトルに足していく att_f += F.reshape(F.batch_matmul(f, w), (batch_size, self.hidden_size)) att_b += F.reshape(F.batch_matmul(b, w), (batch_size, self.hidden_size)) return att_f, att_b
def _attn(self, q, k, v): w = F.batch_matmul(q.reshape(-1, *q.shape[-2:]), k.reshape(-1, *k.shape[-2:])) if self.scale: w = w / math.sqrt(v.shape[-1]) # TF implem method: mask_attn_weights w = w * self.b.array[0] + -1e9 * (1 - self.b.array[0]) w = F.softmax(w, axis=2) w = self.attn_dropout(w) return F.batch_matmul(w, v.reshape(-1, *v.shape[-2:]))\ .reshape(v.shape[0], v.shape[1], v.shape[2], -1)
def __call__(self, x): bs, ch, t, wi = x.shape f = F.reshape(F.leaky_relu(self.c_f(x)), (-1, t, wi)) g = F.reshape(F.leaky_relu(self.c_g(x)), (-1, t, wi)) m = F.softmax(F.batch_matmul(F.transpose(f, ( 0, 2, 1, )), g), axis=1) h = F.batch_matmul(m, F.reshape(F.leaky_relu(self.c_h(x)), (-1, t, wi))) h = F.reshape(h, (bs, ch, t, wi)) return h
def calculate_score(self, h, pos, neg, pos_score=None, neg_score=None, multipos=False): #h_pro = self.act1(self.W_predict(h)) h_pro = h if multipos: # If multiple positive vectors are given, # max score is picked up. (other ones are not propagated) pos_scoreL = [F.batch_matmul(h_pro, pos_one, transa=True) for pos_one in pos] pos_score = F.max(F.concat(pos_scoreL, axis=1), axis=1, keepdims=True) else: pos_score = F.batch_matmul(h_pro, pos, transa=True) neg_score = F.batch_matmul(h_pro, neg, transa=True) return pos_score, neg_score
def query(self, u): xp = cuda.get_array_module(u) size = self.m.shape[1] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, self.m.shape) tc = F.broadcast_to(tc, self.c.shape) p = F.softmax(F.batch_matmul(self.m + tm, u)) o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p) o = F.squeeze(o, -1) u = o + u return u
def __call__(self, x): batch, channels, height, width = x.shape proj_query = self.query_conv(x).reshape((batch, -1, height * width)) proj_key = self.key_conv(x).reshape((batch, -1, height * width)) proj_value = self.value_conv(x).reshape((batch, -1, height * width)) energy = F.batch_matmul(proj_query, proj_key, transa=True) w = F.softmax(energy, axis=-1) y = F.batch_matmul(proj_value, w, transb=True) y = y.reshape((batch, -1, height, width)) y = self.scale(y) + x return y
def query(self, u): xp = backend.get_array_module(u) size = self.m.shape[1] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, self.m.shape) tc = F.broadcast_to(tc, self.c.shape) p = F.softmax(F.batch_matmul(self.m + tm, u)) o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p) o = F.squeeze(o, -1) u = o + u return u
def evaluate(self, images, labels): """ Evaluate accuracy score """ nb_class = self.nb_class_test images = self.xp.stack(images) batchsize = images.shape[0] accs = [] key = self.encoder(images, batchsize, train=False) support_set = key[:nb_class * self.n_shot, :] query_set = key[nb_class * self.n_shot:, :] average_key = F.mean(F.reshape(support_set, [self.n_shot, nb_class, -1]), axis=0) batchsize_q = len(query_set.data) pow_avg = self.compute_power_avg_phi(batchsize_q, nb_class, average_key, train=False) phi_ind = [ np.int(ind) for ind in self.select_phi(average_key, pow_avg) ] M = self.Projection_Space(average_key, batchsize_q, nb_class, train=False, phi_ind=phi_ind) r_t = F.reshape( F.batch_matmul(M, F.batch_matmul(M, query_set, transa=True)), (batchsize_q, -1)) pow_t = self.compute_power(batchsize_q, query_set, M, nb_class, train=False, phi_ind=phi_ind) accs_tmp = self.compute_accuracy(labels[nb_class * self.n_shot:], r_t, pow_t, batchsize_q, nb_class, phi_ind=phi_ind) accs.append(accs_tmp) return accs
def query(self, u): m = self.m c = self.c batch, size = m.data.shape[:2] inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1]) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch,) + tm.data.shape) tc = F.broadcast_to(tc, (batch,) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = F.reshape(o, (batch, m.data.shape[2])) u = o + u return u
def InvFilter(self, X_patch): """ [input] X_patch : BxPxP[mono] 3BxPxP[color] matrix (Variable) Fx : BxPxW[mono] BxPxW[color] matrix (Variable) Fy : BxPxH[mono] BxPxH[color] matrix (Variable) Gamma BxHxW[mono] 3BxHxW[color] (Variable) [output] X : BxHxW[mono] 3BxHxW[color] matrix (Variable) """ return F.batch_matmul( F.batch_matmul(self.Fy, X_patch / self.Gamma, transa=True), self.Fx)
def fwd(self, mb): '''ネットワークの出力を計算''' # mb (mb_size, channel_size(1), radical_sequence) # radical_emb (mb_size, channel_size, radical_sequence, embed_dim) radical_emb = self.embed(mb) # cnn_output (mb_size, vecter_dim(output_channel), word_step, 1) cnn_output = self.concat_cnn(radical_emb)[:, :, :, 0] mb_size = cnn_output.shape[0] vector_dim = cnn_output.shape[1] word_step_len = cnn_output.shape[2] # -> cnn_output (mb_size * word_step, vecter_dim) -> Highway1 cnn_output = F.swapaxes(cnn_output, 1, 2) cnn_output = F.reshape(cnn_output, (mb_size*word_step_len, vector_dim)) hw_out = self.hw1(cnn_output) hw_out = F.reshape(hw_out, (mb_size, word_step_len, vector_dim)) # ndarray -> list (LSTM_layer needs list-type for each data) hw_out = [hw_out[i, :, :] for i in range(len(mb))] # BiLSTM hy, cy, ys = self.bi_lstm(hx=None, cx=None, xs=hw_out) # list -> ndarray (axis=0) h_i = F.concat([i[None, :, :] for i in ys], axis=0) # Highway2 vector_dim = vector_dim * 2 ys = F.reshape(h_i, (mb_size*word_step_len, vector_dim)) u_i = self.hw2(ys) u_i = F.reshape(u_i, (mb_size, word_step_len, vector_dim)) # Soft Attention u_a = F.broadcast_to(self.u_a, (u_i.shape[0], vector_dim)) a_i = F.batch_matmul(u_i, u_a)[:, :, 0] a_i = F.softmax(a_i)[0, :] # h_i -> (mb_size, vector, word_step), a_i -> (mb_size, word_step) h_i = F.swapaxes(h_i, 1, 2) a_i = F.broadcast_to(a_i, (h_i.shape[0], a_i.shape[0])) # バッチごとにh_iとa_iの行列積 z = F.batch_matmul(h_i, a_i)[:, :, 0] # output y = self.fc(z) if self.a.bnorm_flag is True: y = self.bnorm_last(y) return y
def query(self, u): m = self.m c = self.c batch, size = m.data.shape[:2] inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1]) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch, ) + tm.data.shape) tc = F.broadcast_to(tc, (batch, ) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = F.reshape(o, (batch, m.data.shape[2])) u = o + u return u
def read(self, h): #M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m) M_key = F.stack(self.key_buff, axis=1) # (B, M, m) self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M) #p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M) #print("p", p.shape) #M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m) M_val = F.stack(self.val_buff, axis=1) # (B, M, m) #print("M_val", M_val.shape) o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m) o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m) #print("o", o.shape) return o, self.p
def prediction(self, x): x = Variable(x) ecfp = self.build_ecfp(x) fcfp = self.build_fcfp(x) ecfp_beta = self.ecfp_attension(ecfp) fcfp_beta = self.ecfp_attension(fcfp) ecfp_alpha = ecfp_beta / (ecfp_beta + fcfp_beta) fcfp_alpha = fcfp_beta / (ecfp_beta + fcfp_beta) attension_ecfp = F.batch_matmul(ecfp, ecfp_alpha) attension_fcfp = F.batch_matmul(fcfp, fcfp_alpha) pred = self.dnn(attension_ecfp) pred = self.dnn(attension_fcfp + attension_ecfp) return pred
def __call__(self, x): batch, channels, height, width = x.shape proj_query = x.reshape((batch, -1, height * width)) proj_key = x.reshape((batch, -1, height * width)) proj_value = x.reshape((batch, -1, height * width)) energy = F.batch_matmul(proj_query, proj_key, transb=True) energy_new = F.broadcast_to(F.max(energy, axis=-1, keepdims=True), shape=energy.shape) - energy w = F.softmax(energy_new, axis=-1) y = F.batch_matmul(w, proj_value) y = y.reshape((batch, -1, height, width)) y = self.scale(y) + x return y
def query(self, u): xp = cuda.get_array_module(u.data) m = self.m c = self.c batch, size = m.data.shape[:2] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch,) + tm.data.shape) tc = F.broadcast_to(tc, (batch,) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = o[:, :, 0] u = o + u return u
def pairwise_ranking_loss(h_cxt, h_wrd_pos, h_wrd_negs): N = h_cxt.shape[0] score_pos = F.batch_matmul(h_cxt, h_wrd_pos, transa=True) score_pos = F.reshape(score_pos, (N, )) loss = 0.0 for h_wrd_neg in h_wrd_negs: score_neg = F.batch_matmul(h_cxt, h_wrd_pos, transa=True) score_neg = F.reshape(score_neg, (N, )) loss += F.sum(F.clip(1.0 + score_neg - score_pos, 0.0, 100000000.0)) loss /= (N * len(h_wrd_negs)) return loss
def cam2pixel(K, x, D=None): """Convert 3D points to pixel coordinates. Args: K (:class `~chainer.Variable` or :ref:`ndarray`): A 2-D array of shape `(B, 3, 3)`. Camera matrices. x (:class `~chainer.Variable` or :ref:`ndarray`): A 2-D array of shape `(B, 3, N)`. 3D points x, y and z. D (:class `~chainer.Variable` or :ref:`ndarray`): Distortion coefficients. A 2-D array of shape `(B, K)` K is 4 or 5 or 8. The elements corresponds to (k1, k2, p1, p2, [k3, [k4, k5 k6]]) respectively. Returns: ~chainer.Variable: A 2-D array of shape `(B, 2, N)`. Pixel coordinates u and v. """ B, _, N = x.shape x = x / x[:, 2, None, :] if D is not None: x = to_homogenous(distort_points(D, x[:,:2,:])) return F.batch_matmul(K, x)[:,:2]
def __call__(self, x, length): """ Args: x (numpy.ndarray or cupy.ndarray): sequences of vocabulary indices in shape (batchsize, tokens, feature size) lengths (numpy.ndarray or cupy.ndarray): number of tokens in each batch index Returns: chainer.Variable: Sentence embedding in shape (batchsize, feature size) """ # e: (batchsize, feature size) e = self.embed(x) if self.fix_embedding: e.unchain_backward() # y: (batchsize, feature size) y = F.sum(e, axis=1) / length.astype(np.float32).reshape(-1, 1) # Equivalent to e.tran(M).y -> (batchsize, tokens, 1) d = F.batch_matmul(e, F.matmul(y, self.M)) a = masked_softmax(d, length) # Sentence embedding z: (batchsize, feature size) z = F.sum(F.broadcast_to(a, e.shape) * e, axis=1) return z
def forward(self, data): self.reset_state() x_list = [XP.iarray([d[0]]) for d in data] ep_list = [self.p_embed(x) for x in x_list] ec_list = [self.c_embed(x) for x in x_list] er_list = [self.r_embed(x) for x in x_list] p_list = self.p_encode(ep_list) c_list = self.c_encode(ec_list) r_list = self.r_encode(er_list) P = functions.reshape( functions.concat(p_list, 0), (1, len(data), self.hidden_size)) C = functions.reshape( functions.concat(c_list, 0), (1, len(data), self.hidden_size)) R = functions.concat(r_list, 0) parent_scores = functions.reshape( functions.batch_matmul(C, P, transb=True), (len(data), len(data))) root_scores = functions.reshape( self.r_scorer(R), (1, len(data))) return parent_scores, root_scores
def get_matrix(y): ch = y.data.shape[1] wd = y.data.shape[2] gogh_y = F.reshape(y, (y.data.shape[0], ch, wd**2)) gogh_matrix = F.batch_matmul(gogh_y, gogh_y, transb=True) / np.float32( ch * wd**2) return gogh_matrix
def one_step(self, x, h, adj, deg_conds): # Take sum along adjacent atoms fv = F.batch_matmul(adj, x) ##print('fv', fv.shape) # (minibatch, max_num_atoms, hidden_dim) s0, s1, s2 = fv.shape zero_array = self.xp.zeros_like(fv) fvds = [ F.reshape(F.where(cond, fv, zero_array), (s0 * s1, s2)) for cond in deg_conds ] out_x = 0 dh = 0 for wout, win, fvd in zip(self.outer_weight, self.inner_weight, fvds): out_x = out_x + win(fvd) dh = dh + wout(fvd) out_x = F.sigmoid(out_x) out_x = F.reshape(out_x, (s0, s1, s2)) dh = F.softmax(dh) dh = F.sum(F.reshape(dh, (s0, s1, s2)), axis=1) # sum along atom's axis out_h = h + dh return out_x, out_h
def __call__(self, atoms, adjs): x = self.graph_conv(atoms, adjs) alpha = self.attention_layer(x) x = F.batch_matmul(x, alpha) if self.mlp: x = self.mlp(x) return x
def __call__(self, adj, atom_array): counts = [] for list_atom in atom_array: list_atom = np.array(list_atom) count = np.count_nonzero(list_atom) counts.append(count) x = self.atom_embed(atom_array) degree_mat = F.sum(adj, axis=1) degree_mat = F.sum(degree_mat, axis=1) s0, s1, s2 = x.shape m = F.reshape(self.edge_layer(F.reshape(x, (s0 * s1, s2))), (s0, s1, s2, MAX_EDGE_TYPE)) m = F.transpose(m, (0, 3, 1, 2)) adj = F.reshape(adj, (s0 * MAX_EDGE_TYPE, s1, s1)) m = F.reshape(m, (s0 * MAX_EDGE_TYPE, s1, s2)) m = F.batch_matmul(adj, m) m = F.reshape(m, (s0, MAX_EDGE_TYPE, s1, s2)) m = F.sum(m, axis=1) s0, s1, s2 = m.shape m = F.reshape(m, (s0 * s1, s2)) atom_array = np.asarray(atom_array).reshape(-1) for s_index in range(s0): atom_array[counts[s_index] + s_index * s1:(s_index + 1) * s1] = -1 t = chainer.Variable(atom_array) self.loss = self.loss_func(m, t) return self.loss
def proj_tgt_to_src(vec, K, N, xp=np, use_cpu=True): """Projection matrix from target to sources. Args: vec(Variable): Shape is (N, 6). K(array): Shape is (N, 3, 3). N(int): Batch size. Returns: Variable: Projection matrix. """ is_transfer = False if xp != np and use_cpu: vec = gpu2cpu(vec) K = chainer.cuda.to_cpu(K) xp = np is_transfer = True global filler if filler is None or filler.shape[0] != N: filler = xp.tile( xp.asarray([0.0, 0.0, 0.0, 1.0], 'f').reshape(1, 1, 4), [N, 1, 1]) K_ = F.concat([F.concat([K, xp.zeros([N, 3, 1], 'f')], axis=2), filler], axis=1) poses = pose_vec2mat(vec, filler, xp) proj_tgt_cam_to_src_pixel = F.batch_matmul(K_, poses) if is_transfer: proj_tgt_cam_to_src_pixel = cpu2gpu(proj_tgt_cam_to_src_pixel) return proj_tgt_cam_to_src_pixel
def forward(self, x): """ h1 : (1, 64, 112, 112) h2 : (1, 128, 56, 56) h3 : (1, 256, 28, 28) h4 : (1, 512, 14, 14) h5 : (1, 512, 7, 7) :param x: :return: """ h = x h = F.relu((self.conv1_1(h))) h = F.relu((self.conv1_2(h))) pool1 = F.max_pooling_2d(h, 2, stride=2) h = F.relu((self.conv2_1(pool1))) h = F.relu((self.conv2_2(h))) pool2 = F.max_pooling_2d(h, 2, stride=2) h = F.relu((self.conv3_1(pool2))) h = F.relu((self.conv3_2(h))) h = F.relu((self.conv3_3(h))) pool3 = F.max_pooling_2d(h, 2, stride=2) h = F.relu((self.conv4_1(pool3))) h = F.relu((self.conv4_2(h))) h = F.relu((self.conv4_3(h))) pool4 = F.max_pooling_2d(h, 2, stride=2) if self.texture: h = { 'pool1': pool1, 'pool2': pool2, 'pool3': pool3, 'pool4': pool4 }[self.texture_layer] if self.cbp: h = F.convolution_2d(h, self.W1) * F.convolution_2d(h, self.W2) h = global_average_pooling_2d(h) if self.normalize: h = power_normalize(h) h = F.normalize(h) h = self.fc8(F.dropout(h, 0.2)) return h else: b, ch, height, width = h.data.shape h = F.reshape(h, (b, ch, width * height)) h = F.batch_matmul(h, h, transb=True) / self.xp.float32( width * height) h = self.fc8(F.dropout(h, 0.4)) return h else: h = F.relu((self.conv5_1(pool4))) h = F.relu((self.conv5_2(h))) h = F.relu((self.conv5_3(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.dropout(F.relu(self.fc6(h)), ratio=0.5) h = F.dropout(F.relu(self.fc7(h)), ratio=0.5) h = self.fc8(h) return h
def message_and_update(self, cur, adj, deg_conds, counts, level): s0, s1, s2 = cur.shape tmp = self.edge_layer(F.reshape(cur, (s0 * s1, s2))) m = F.reshape(tmp, (s0, s1, s2, MAX_EDGE_TYPE)) m = F.transpose(m, (0, 3, 1, 2)) m = F.reshape(m, (s0 * MAX_EDGE_TYPE, s1, s2)) adj = F.reshape(adj, (s0 * MAX_EDGE_TYPE, s1, s1)) m = F.batch_matmul(adj, m) m = F.reshape(m, (s0, MAX_EDGE_TYPE, s1, s2)) m = F.sum(m, axis=1) m = m + cur s0, s1, s2 = m.shape zero_array = np.zeros(m.shape, dtype=np.float32) ms = [F.reshape(F.where(cond, m, zero_array), (s0 * s1, s2)) for cond in deg_conds] out_x = 0 for hidden_weight, m in zip(self.H[level], ms): out_x = out_x + hidden_weight(m) out_x = F.sigmoid(out_x) for s_index in range(s0): _from = counts[s_index] + s_index * s1 _to = (s_index + 1) * s1 out_x.data[_from:_to:, :] = 0.0 out_x = F.reshape(out_x, (s0, s1, s2)) return out_x
def attend(self, encoded_features): self.out_lstm.reset_state() transformed_encoded_features = F.concat([ F.expand_dims(self.transform_encoded_features(feature), axis=1) for feature in encoded_features ], axis=1) concat_encoded_features = F.concat( [F.expand_dims(e, axis=1) for e in encoded_features], axis=1) lstm_output = self.xp.zeros_like(encoded_features[0]) outputs = [] for _ in range(self.num_labels): transformed_lstm_output = self.transform_out_lstm_feature( lstm_output) attended_feats = [] for transformed_encoded_feature in F.separate( transformed_encoded_features, axis=1): attended_feat = transformed_encoded_feature + transformed_lstm_output attended_feat = F.tanh(attended_feat) attended_feats.append( self.generate_attended_feat(attended_feat)) attended_feats = F.concat(attended_feats, axis=1) alphas = F.softmax(attended_feats, axis=1) lstm_input_feature = F.batch_matmul(alphas, concat_encoded_features, transa=True) lstm_input_feature = F.squeeze(lstm_input_feature, axis=1) lstm_output = self.out_lstm(lstm_input_feature) outputs.append(lstm_output) return outputs
def one_step(self, mol_reps, sub_reps, adj, atom_array, counts): s0, s1, s2 = sub_reps.shape tmp = self.edge_layer(F.reshape(sub_reps, (s0 * s1, s2))) m = F.reshape(tmp, (s0, s1, s2, MAX_EDGE_TYPE)) m = F.transpose(m, (0, 3, 1, 2)) m = F.reshape(m, (s0 * MAX_EDGE_TYPE, s1, s2)) adj = F.reshape(adj, (s0 * MAX_EDGE_TYPE, s1, s1)) m = F.batch_matmul(adj, m) m = F.reshape(m, (s0, MAX_EDGE_TYPE, s1, s2)) m = F.sum(m, axis=1) s0, s1, s2 = m.shape m = F.reshape(m, (s0 * s1, s2)) mol_reps = F.tile(mol_reps, (1, s1)) mol_reps = F.reshape(mol_reps, (s0 * s1, s2)) reps = mol_reps + m atom_array = atom_array.flatten() for s_index in range(s0): _from = counts[s_index] + s_index * s1 _to = (s_index + 1) * s1 reps.data[_from:_to, :] = 0.0 atom_array[_from:_to] = -1 t = chainer.Variable(atom_array) loss = F.softmax_cross_entropy(self.out(reps), t) return loss
def setUp(self): self.x1 = numpy.random.uniform(.5, 1, (1, m, k)).astype(numpy.float32) self.x2 = numpy.random.uniform(.5, 1, (1, k, n)).astype(numpy.float32) self.gy = numpy.random.uniform(-1, 1, (1, m, n)).astype(numpy.float32) self.op = lambda x, y: F.batch_matmul(x, y) self.forward_answer = numpy.array( [numpy.dot(self.x1[i], self.x2[i]) for i in six.moves.range(1)])
def __call__(self, S, h): batch_size, src_len, hidden_size = S.data.shape S = self.inner_weight(F.reshape(S, (batch_size * src_len, hidden_size))) S = F.reshape(S, (batch_size, src_len, hidden_size)) a = F.softmax(F.squeeze(F.batch_matmul(S, h), axis=2)) return a
def __call__(self, x, z): """ Args: x (~chainer.Variable): Batch of input vectors. z (~chainer.Variable): Batch of context vectors. Returns: ~chainer.Variable: Output of the context layer. """ if self.has_uninitialized_params: with cuda.get_device(self._device_id): self._initialize_params(x.size // x.shape[0]) batch_size = x.shape[0] # compute adaptive filter W = self.predictor(z) # reshape linear W to the correct size W = F.reshape(W, [batch_size] + self.shape) # add constant W if defined if self.constantW: W += F.tile(self.C, (batch_size, 1, 1)) # multiply weights with inputs in batch mode y = F.squeeze(F.batch_matmul(W, x), 2) # add bias y += F.tile(self.b, tuple([batch_size, 1])) return y
def evaluate(self, images, labels): labels = cp.array(labels) nb_class = self.nb_class_test images = self.xp.stack(images) batchsize = images.shape[0] accs = [] support_images=images[:self.nb_class_test*self.n_shot] query_images=images[self.nb_class_test*self.n_shot:] support_set=self.encoder(support_images,support_images.shape[0],train=False) query_set=self.encoder(query_images,query_images.shape[0],train=False) support_label = labels[:self.nb_class_test*self.n_shot] average_key = F.mean(F.reshape(support_set,[self.n_shot,self.nb_class_test,-1]),axis=0) batchsize_q = len(query_set.data) batchsize_s = len(support_set.data) convert_dim = nb_class*self.n_shot W = self.Fisher(support_set,support_label,batchsize_s,nb_class,convert_dim,self.dimension,0.01) W_batch = F.broadcast_to(W,[nb_class,convert_dim,self.dimension]) W_mean = F.batch_matmul(W_batch,average_key) accs_tmp = self.compute_accuracy(labels[nb_class*self.n_shot:],query_set,W,W_mean,batchsize_q,nb_class,convert_dim) accs.append(accs_tmp) return accs
def __call__(self, x): # <question : is batchsize>1 possible for RNN ? if No, I will implement calculations without batch dimension.> self.chi = F.concat((x, self.r)) (self.nu, self.xi) = \ F.split_axis(self.l_dl(self.chi), [self.Y], 1) (self.kr, self.betar, self.kw, self.betaw, self.e, self.v, self.f, self.ga, self.gw, self.pi ) = F.split_axis(self.xi, np.cumsum( [self.W*self.R, self.R, self.W, 1, self.W, self.W, self.R, 1, 1]), 1) self.kr = F.reshape(self.kr, (self.R, self.W)) # R * W self.betar = 1 + F.softplus(self.betar) # 1 * R # self.kw: 1 * W self.betaw = 1 + F.softplus(self.betaw) # 1 * 1 self.e = F.sigmoid(self.e) # 1 * W # self.v : 1 * W self.f = F.sigmoid(self.f) # 1 * R self.ga = F.sigmoid(self.ga) # 1 * 1 self.gw = F.sigmoid(self.gw) # 1 * 1 self.pi = F.softmax(F.reshape(self.pi, (self.R, 3))) # R * 3 (softmax for 3) # self.wr : N * R self.psi_mat = 1 - F.matmul(Variable(np.ones((self.N, 1)).astype(np.float32)), self.f) * self.wr # N * R self.psi = Variable(np.ones((self.N, 1)).astype(np.float32)) # N * 1 for i in range(self.R): self.psi = self.psi * F.reshape(self.psi_mat[:,i],(self.N,1)) # N * 1 # self.ww, self.u : N * 1 self.u = (self.u + self.ww - (self.u * self.ww)) * self.psi self.a = u2a(self.u) # N * 1 self.cw = C(self.M, self.kw, self.betaw) # N * 1 self.ww = F.matmul(F.matmul(self.a, self.ga) + F.matmul(self.cw, 1.0 - self.ga), self.gw) # N * 1 self.M = self.M * (np.ones((self.N, self.W)).astype(np.float32) - F.matmul(self.ww, self.e)) + F.matmul(self.ww, self.v) # N * W self.p = (1.0 - F.matmul(Variable(np.ones((self.N,1)).astype(np.float32)), F.reshape(F.sum(self.ww),(1,1)))) \ * self.p + self.ww # N * 1 self.wwrep = F.matmul(self.ww, Variable(np.ones((1, self.N)).astype(np.float32))) # N * N self.L = (1.0 - self.wwrep - F.transpose(self.wwrep)) * self.L + F.matmul(self.ww, F.transpose(self.p)) # N * N self.L = self.L * (np.ones((self.N, self.N)) - np.eye(self.N)) # force L[i,i] == 0 self.fo = F.matmul(self.L, self.wr) # N * R self.ba = F.matmul(F.transpose(self.L), self.wr) # N * R self.cr_list = [0] * self.R for i in range(self.R): self.cr_list[i] = C(self.M, F.reshape(self.kr[i,:],(1, self.W)), F.reshape(self.betar[0,i],(1, 1))) # N * 1 self.cr = F.concat(self.cr_list) # N * R self.bacrfo = F.concat((F.reshape(F.transpose(self.ba),(self.R,self.N,1)), F.reshape(F.transpose(self.cr),(self.R,self.N,1)), F.reshape(F.transpose(self.fo) ,(self.R,self.N,1)),),2) # R * N * 3 self.pi = F.reshape(self.pi, (self.R,3,1)) # R * 3 * 1 self.wr = F.transpose(F.reshape(F.batch_matmul(self.bacrfo, self.pi), (self.R, self.N))) # N * R self.r = F.reshape(F.matmul(F.transpose(self.M), self.wr),(1, self.R * self.W)) # W * R (-> 1 * RW) self.y = self.l_Wr(self.r) + self.nu # 1 * Y return self.y
def extract_style_feature(self, images, masks=None): xp = self.xp mean = xp.array([103.939, 116.779, 123.68], 'float32') # BGR images = images[:, ::-1] * 255 - mean[None, :, None, None] features = self.vgg16( images, layers=['conv1_2', 'conv2_2', 'conv3_3', 'conv4_3']).values() if masks is None: masks = xp.ones( (images.shape[0], images.shape[2], images.shape[3])) style_features = [] for feature in features: scale = masks.shape[-1] / feature.shape[-1] m = cf.average_pooling_2d(masks[:, None, :, :], scale, scale).data dim = feature.shape[1] m = m.reshape((m.shape[0], -1)) f2 = feature.transpose((0, 2, 3, 1)) f2 = f2.reshape((f2.shape[0], -1, f2.shape[-1])) f2 *= xp.sqrt(m)[:, :, None] f2 = cf.batch_matmul(f2.transpose((0, 2, 1)), f2) f2 /= dim * m.sum(axis=1)[:, None, None] style_features.append(f2) return style_features
def train(self, images, labels): labels = cp.array(labels) images = self.xp.stack(images) batchsize = images.shape[0] loss = 0 support_images=images[:self.nb_class_train*self.n_shot] query_images=images[self.nb_class_train*self.n_shot:] support_set = self.encoder(support_images,support_images.shape[0],train=True) query_set=self.encoder(query_images,query_images.shape[0],train=True) support_label = labels[:self.nb_class_train*self.n_shot] average_key = F.mean(F.reshape(support_set,[self.n_shot ,self.nb_class_train,-1]),axis=0)#augment_ratio*self.n_shot batchsize_q = len(query_set.data) batchsize_s = len(support_set.data) convert_dim = self.nb_class_train * self.n_shot W = self.Fisher(support_set,support_label,batchsize_s,self.nb_class_train,convert_dim,self.dimension,0.01) W_batch = F.broadcast_to(W,[self.nb_class_train,convert_dim,self.dimension]) W_mean = F.batch_matmul(W_batch,average_key) loss = self.compute_loss(labels[self.nb_class_train*self.n_shot:],query_set,W,W_mean,batchsize_q,self.nb_class_train,convert_dim) self.chain.zerograds() loss.backward() self.optimizer.update() return loss.data
def __call__(self, a_list, b_list, p): batch_size = p.data.shape[0] e_list = [] sum_e = XP.fzeros((batch_size, 1)) for a, b in zip(a_list, b_list): w = functions.tanh(self.aw(a) + self.bw(b) + self.pw(p)) e = functions.exp(self.we(w)) e_list.append(e) sum_e += e ZEROS = XP.fzeros((batch_size, self.hidden_size)) aa = ZEROS bb = ZEROS for a, b, e in zip(a_list, b_list, e_list): e /= sum_e aa += functions.reshape(functions.batch_matmul(a, e), (batch_size, self.hidden_size)) bb += functions.reshape(functions.batch_matmul(b, e), (batch_size, self.hidden_size)) return aa, bb
def cosine_similarity(x, y, eps=1e-6): n1, n2, n3 = x.data.shape _, m2, _ = y.data.shape z = F.batch_matmul(x, y, transb=True) x2 = F.broadcast_to(F.reshape(F.sum(x * x, axis=2), (n1, n2, 1)), (n1, n2, m2)) y2 = F.broadcast_to(F.reshape(F.sum(y * y, axis=2), (n1, 1, m2)), (n1, n2, m2)) z /= F.exp(F.log(x2 * y2 + eps) / 2) return z
def setUp(self): self.x1 = numpy.random.uniform( .5, 1, (batch_size, m,)).astype(numpy.float32) self.x2 = numpy.random.uniform( .5, 1, (batch_size, m,)).astype(numpy.float32) self.gy = numpy.random.uniform( -1, 1, (batch_size, m, m)).astype(numpy.float32) self.op = lambda x, y: F.batch_matmul(x, y, transb=True) self.forward_answer = numpy.array([ numpy.dot(self.x1[i].reshape(m, 1), self.x2[i].reshape(1, m)) for i in six.moves.range(batch_size)])
def setUp(self): self.x1 = numpy.random.uniform( .5, 1, (1, m, k)).astype(numpy.float32) self.x2 = numpy.random.uniform( .5, 1, (1, k, n)).astype(numpy.float32) self.gy = numpy.random.uniform( -1, 1, (1, m, n)).astype(numpy.float32) self.op = lambda x, y: F.batch_matmul(x, y) self.forward_answer = numpy.array([ numpy.dot(self.x1[i], self.x2[i]) for i in six.moves.range(1)])
def setUp(self): self.x1 = numpy.random.uniform( .5, 1, (batch_size, m, k)).astype(numpy.float32) self.x2 = numpy.random.uniform( .5, 1, (k, n)).astype(numpy.float32) self.gy = numpy.random.uniform( -1, 1, (batch_size, m, n)).astype(numpy.float32) self.op = lambda x, y: F.batch_matmul( x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n))) self.forward_answer = numpy.array([ numpy.dot(self.x1[i], self.x2) for i in six.moves.range(batch_size)])
def __call__(self, x1,x2): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ batch_size = x.data.shape[0] #print batch_size batch_W = F.concat([F.expand_dims(self.W,0)] * batch_size,0) #print batch_W.data.shape return F.reshape(F.batch_matmul(x, batch_W),x.data.shape[:-1])
def forward(self, data): self.reset_state() x_list = [XP.iarray([d[0]]) for d in data] pe_list = [self.p_embed(x) for x in x_list] ce_list = [self.c_embed(x) for x in x_list] re_list = [self.r_embed(x) for x in x_list] pf_list = [] for pe in pe_list: pf_list.append(self.p_forward(pe)) cf_list = [] for ce in ce_list: cf_list.append(self.c_forward(ce)) rf_list = [] for re in re_list: rf_list.append(self.r_forward(re)) pb_list = [] for pe in reversed(pe_list): pb_list.append(self.p_backward(pe)) cb_list = [] for ce in reversed(ce_list): cb_list.append(self.c_backward(ce)) rb_list = [] for re in reversed(re_list): rb_list.append(self.r_backward(re)) pc_list = [self.p_combine(pf, pb) for pf, pb in zip(pf_list, pb_list)] cc_list = [self.c_combine(cf, cb) for cf, cb in zip(cf_list, cb_list)] rc_list = [self.r_combine(rf, rb) for rf, rb in zip(rf_list, rb_list)] P = functions.reshape( functions.concat(pc_list, 0), (1, len(data), self.hidden_size)) C = functions.reshape( functions.concat(cc_list, 0), (1, len(data), self.hidden_size)) R = functions.concat(rc_list, 0) parent_scores = functions.reshape( functions.batch_matmul(C, P, transb=True), (len(data), len(data))) root_scores = functions.reshape( self.r_scorer(R), (1, len(data))) return parent_scores, root_scores
def __call__(self, x, t): h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv2_1(h)) h = F.relu(self.conv2_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv3_1(h)) h = F.relu(self.conv3_2(h)) h = F.relu(self.conv3_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv4_1(h)) h = F.relu(self.conv4_2(h)) h = F.relu(self.conv4_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv5_1(h)) h = F.relu(self.conv5_2(h)) h = F.relu(self.conv5_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.fc6(h)) h = F.relu(self.fc7(h)) h = self.fc8(h) # Channelwise Inhibited h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (x.data.shape[0], 16, 16)) xp = cuda.get_array_module(x.data) volatile = False if t is not None else True z = Variable(xp.zeros_like(c.data), volatile=volatile) c = F.batch_matmul(c, z) c = F.reshape(c, (x.data.shape[0], 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) self.pred = F.concat(hs, 1) if t is not None: self.loss = F.softmax_cross_entropy(self.pred, t) self.loss /= 16 * 16 return self.loss else: self.pred = F.softmax(self.pred) return self.pred
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: self._initialize_params(x.shape[1]) # return linear.linear(x, self.W, self.b) batch_size = x.data.shape[1] return F.transpose(F.reshape(batch_matmul(x, self.W), (self.out_size, batch_size)))
def setUp(self): self.x1 = numpy.random.uniform(.5, 1, self.x1_shape) self.x1 = self.x1.astype(self.x1_dtype) self.x2 = numpy.random.uniform(.5, 1, self.x2_shape) self.x2 = self.x2.astype(self.x2_dtype) ret_dtype = numpy.result_type(self.x1_dtype, self.x2_dtype) self.gy = numpy.random.uniform(-1, 1, self.gy_shape).astype(ret_dtype) self.ggx1 = numpy.random.uniform(.5, 1, self.x1_shape).astype( self.x1_dtype) self.ggx2 = numpy.random.uniform(.5, 1, self.x2_shape).astype( self.x2_dtype) self.op = lambda x, y: F.batch_matmul( x, y, transa=self.transa, transb=self.transb) self.forward_answer = self._get_forward_answer( self.x1, self.x2, self.transa, self.transb)
def channelwise_inhibited(self, h): xp = cuda.get_array_module(h.data) num = h.data.shape[0] h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (num, 16, 16)) z = Variable(xp.zeros_like(c.data), 'AUTO') c = F.batch_matmul(c, z) c = F.reshape(c, (num, 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) return F.concat(hs, 1)
def _context(self, p, fb_mat, fbe_mat): batch_size, source_length, _ = fb_mat.data.shape # {pe,e}_mat: shape = [batch * srclen, atten] pe_mat = F.reshape( F.broadcast_to( F.expand_dims(self.p_e(p), 1), [batch_size, source_length, self.atten_size]), [batch_size * source_length, self.atten_size]) e_mat = F.tanh(fbe_mat + pe_mat) # a_mat: shape = [batch, srclen] a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length])) # q: shape = [batch, 2 * hidden] q = F.reshape( F.batch_matmul(a_mat, fb_mat, transa=True), [batch_size, 2 * self.hidden_size]) return q
def _attention(self, h_forward, h_backword, s, enable, disable_value): batch_size = s.shape[0] sentence_size = len(h_forward) hidden_size = self.hidden_size xp = self.xp weighted_s = F.broadcast_to(F.expand_dims(self.W_a(s), axis=1), (batch_size, sentence_size, hidden_size)) h = F.concat((F.concat(h_forward, axis=0), F.concat(h_backword, axis=0))) weighted_h = F.reshape(self.U_a(h), (batch_size, sentence_size, hidden_size)) e = self.v_a(F.reshape(F.tanh(weighted_s + weighted_h), (batch_size * sentence_size, hidden_size))) e = F.where(enable, F.reshape(e, (batch_size, sentence_size)), disable_value) alpha = F.softmax(e) c = F.batch_matmul(F.reshape(h, (batch_size, 2 * hidden_size, sentence_size)), alpha) return F.reshape(c, (batch_size, 2 * hidden_size))
def _additional_score(self, y, a, src): batch_size = len(y.data) vocab_size = self._output xp = self._xp src_len = len(self.prob_dict) # Calculating dict prob y_dict = F.reshape(F.batch_matmul(self.prob_dict, a, transa=True), (batch_size, vocab_size)) is_prob = False # Using dict prob if self._method == "bias": yp = y + F.log(eps + y_dict) elif self._method == "linear": yp = self.LI(y_dict, F.softmax(y)) is_prob = True else: raise ValueError("Unrecognized dictionary method:", self._method) return yp, is_prob
def __call__(self, s, a, h): c = F.reshape(F.batch_matmul(a, s, transa=True), h.data.shape) ht = F.tanh(self.WC(F.concat((h, c), axis=1))) return self.WS(ht)
def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None): m = self.__model tanh = functions.tanh lstm = functions.lstm batch_size = len(src_batch) hidden_size = self.__n_hidden src_len = len(src_batch[0]) trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit src_stoi = self.__src_vocab.stoi trg_stoi = self.__trg_vocab.stoi trg_itos = self.__trg_vocab.itos hidden_zeros = wrapper.zeros((batch_size, hidden_size)) sum_e_zeros = wrapper.zeros((batch_size, 1)) # make embedding list_x = [] for l in range(src_len): s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32) list_x.append(s_x) # forward encoding c = hidden_zeros s_a = hidden_zeros list_a = [] for l in range(src_len): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a)) list_a.append(s_a) # backward encoding c = hidden_zeros s_b = hidden_zeros list_b = [] for l in reversed(range(src_len)): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b)) list_b.insert(0, s_b) # decoding c = hidden_zeros s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0])) s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32) hyp_batch = [[] for _ in range(batch_size)] accum_loss = wrapper.zeros(()) if is_training else None #for n in range(src_len): # six.print_(src_batch[0][n], end=' ') #six.print_() for l in range(trg_len): # calculate attention weights list_e = [] sum_e = sum_e_zeros for n in range(src_len): s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p)) r_e = functions.exp(m.w_we(s_w)) #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden))) list_e.append(r_e) sum_e += r_e #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden)) # make attention vector s_c = hidden_zeros s_d = hidden_zeros for n in range(src_len): s_e = list_e[n] / sum_e #s_c += s_e * list_a[n] #s_d += s_e * list_b[n] s_c += functions.reshape(functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size)) s_d += functions.reshape(functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size)) #zxcv = wrapper.get_data(s_e)[0][0] #if zxcv > 0.9: asdf='#' #elif zxcv > 0.7: asdf='*' #elif zxcv > 0.3: asdf='+' #elif zxcv > 0.1: asdf='.' #else: asdf=' ' #six.print_(asdf * len(src_batch[0][n]), end=' ') # generate next word c, s_p = lstm(c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d)) r_y = m.w_py(s_p) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) #six.print_(hyp_batch[0][-1]) if is_training: s_t = wrapper.make_var([trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32) accum_loss += functions.softmax_cross_entropy(r_y, s_t) s_y = s_t else: if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break s_y = wrapper.make_var(output, dtype=np.int32) return hyp_batch, accum_loss
def test_identity_gpu(self): eye = cuda.to_gpu(_make_eye(self.x.shape)) x = chainer.Variable(cuda.to_gpu(self.x)) y = functions.batch_matmul(x, functions.batch_inv(x)) gradient_check.assert_allclose(y.data, eye, rtol=1e-4, atol=1e-4)
usedvoclist_new3 = usedvoclist[np.array(sum_list2) > 5] initialpolarityVec = np.zeros((len(usedvoclist_new3),1)) for index, word in enumerate(usedvoclist_new3): try: #polarity = pne2[word] initialpolarityVec[index] = pne2[word] except: continue x_batch = chainer.Variable(np.array(kakariukeMat_list3[0:10]).astype(np.float32)) x_batch_1 = chainer.Variable(np.array(kakariukeMat_list_1[0:10]).astype(np.float32)) x_batch_2 = chainer.Variable(np.array(kakariukeMat_list_2[0:10]).astype(np.float32)) polarity_val = chainer.Variable(initialpolarityVec.astype(np.float32)) batch_W = F.concat([F.expand_dims(polarity_val,0)] * 10,0) v1 = F.reshape(F.batch_matmul(x_batch_1, batch_W),(10,483)) v2 = F.reshape(F.batch_matmul(x_batch_2, batch_W),(10,483)) l_hidden1(v1) l_hidden2(v1) import chainer from chainer.functions.connection import linear from chainer.fåunctions import batch_matmul from chainer import initializers from chainer import link import chainer.functions as F import numpy as xp class TensorMat(link.Link): def __init__(self, initialW, wscale=1, bias=0, nobias=True, initial_bias=None):
def get_matrix(y): ch = y.data.shape[1] wd = y.data.shape[2] gogh_y = F.reshape(y, (y.data.shape[0],ch,wd**2)) gogh_matrix = F.batch_matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2) return gogh_matrix
def forward_onestep(self, x_data, y_data, state, train=True): batchsize = x_data.shape[0] x = chainer.Variable(x_data, volatile=not train) # lstm if y_data is not None: y = chainer.Variable(self.xp.array(y_data, dtype=self.xp.int32), volatile=not train) h_in = self.chain.lstm_xh(x) + \ self.chain.lstm_yh(y) + \ self.chain.lstm_rh(state['r']) + \ self.chain.lstm_hh(state['h']) else: h_in = self.chain.lstm_xh(x) + \ self.chain.lstm_rh(state['r']) + \ self.chain.lstm_hh(state['h']) c_t, h_t = F.lstm(state['c'], h_in) key = F.reshape(self.chain.l_key(h_t), (batchsize, self.nb_reads, self.memory_shape[1])) add = F.reshape(self.chain.l_add(h_t), (batchsize, self.nb_reads, self.memory_shape[1])) sigma = self.chain.l_sigma(h_t) # Compute least used weight (not differentiable) if self.xp == cp: wu_tm1_data = cp.copy(state['used_weight'].data) lu_index = np.argsort(cuda.to_cpu(wu_tm1_data), axis=1)[:,:self.nb_reads] else: wu_tm1_data = state['used_weight'].data lu_index = np.argsort(wu_tm1_data, axis=1)[:,:self.nb_reads] wlu_tm1_data = self.xp.zeros((batchsize, self.memory_shape[0]), dtype=self.xp.float32) for i in range(batchsize): for j in range(self.nb_reads): wlu_tm1_data[i,lu_index[i,j]] = 1. # 1 for least used index wlu_tm1 = chainer.Variable(wlu_tm1_data, volatile=not train) # write weight _wlu_tm1 = F.broadcast_to( F.reshape(wlu_tm1, (batchsize, 1, self.memory_shape[0])), (batchsize, self.nb_reads, self.memory_shape[0])) _sigma = F.broadcast_to(F.reshape(sigma, (batchsize, 1, 1)), (batchsize, self.nb_reads, self.memory_shape[0])) ww_t = _sigma * state['read_weight'] + (1 - _sigma) * _wlu_tm1 # write to memory _lu_mask = 1 - F.broadcast_to( F.reshape(wlu_tm1, (batchsize, self.memory_shape[0], 1)), (batchsize, self.memory_shape[0], self.memory_shape[1])) M_t = state['M'] * _lu_mask + F.batch_matmul(ww_t, add, transa=True) # read from memory K_t = cosine_similarity(key, M_t) # read weight, used weight wr_t = F.reshape(F.softmax(F.reshape( K_t, (-1, self.memory_shape[0]))), (batchsize, self.nb_reads, self.memory_shape[0])) wu_t = self.gamma * state['used_weight'] + F.sum(wr_t, axis=1) + F.sum(ww_t, axis=1) # read memory r_t = F.reshape(F.batch_matmul(wr_t, M_t), (batchsize, -1)) # (batchsize, nb_reads * memory_shape[1]) # set to state state_new = { 'M': M_t, 'c': c_t, 'h': h_t, 'r': r_t, 'read_weight': wr_t, 'used_weight': wu_t, } return state_new
if n < 60: preW[n][0][0] = np.random.rand() if (n < 80) & (n > 20): preW[n][0][-1] = -np.random.rand() preWdict[n] = dict(zip(preW_namelist_dic[n].T,preW[n].T)) #target4 = np.r_[2 * np.ones(datavolume),np.ones(datavolume), np.zeros(datavolume)] target4 = np.r_[np.ones(datavolume), np.zeros(datavolume)] topdocveccategoryMat3_bow = np.concatenate([topdocveccategoryMat3[i] for i in range(200)],axis = 1) preW_bow = np.concatenate([preW[i] for i in range(200)],axis = 1) F.batch_matmul(x_batch,chainer.Variable(preW_bow)) (x_batch * chainer.Variable(preW_bow)).data.shape topdocveccategoryMat3_bow*(np.ones((2000,1))*preW_bow) W_category = np.concatenate([np.r_[np.zeros((l*30,1)),np.ones((30,1)),np.zeros(((DimentionN - 1 - l)*30,1))] for l in range(DimentionN)],axis = 1) F.batch_matmul(x_batch,preW_bow) class IIalgorithm_highspeed(Chain): def __init__(self,preW_bow, initialW, W_category, DimentionN,binary = True): if binary == True: super(IIalgorithm_highspeed, self).__init__( l_output = L.Linear(len(preW), 2,wscale = 0.01, initialW = initialW), l_polarity = yousoseki.Yousoseki(topdocveccategoryMat3_bow.shape[1],initialW= preW_bow) ) else: super(IIalgorithm_simple_gpu_4, self).__init__(
def test_identity_cpu(self): eye = _make_eye(self.x.shape) x = chainer.Variable(self.x) y = functions.batch_matmul(x, functions.batch_inv(x)) gradient_check.assert_allclose(y.data, eye, **self.check_forward_options)