def encode(self, x_input, x_query, answer): m = self.encode_input(x_input) u = self.encode_query(x_query) # print "m.data.shape", m.data.shape # print "u.data.shape", u.data.shape mu = functions.matmul(m, u, transb=True) # print "mu.data.shape", mu.data.shape # print "mu.data", mu.data p = functions.softmax(mu) # print p.data c = self.encode_output(x_input) # print "p.data.shape:", p.data.shape # print "c.data.shape:", c.data.shape # print c.data.shape #(3,50) # print "functions.swapaxes(c ,1, 1):", functions.swapaxes(c ,1, 1).data.shape o = functions.matmul(functions.swapaxes(c ,1, 0), p) #転置して、内積とる (2, 50, 1) o = functions.swapaxes(o ,1, 0) # (2, 50) # print "u.data.shape:", u.data.shape # print "o.data.shape:", o.data.shape # print "u.data:", u.data # print "o.data:", o.data # print "(u+o).data.shape:", (u+o).data.shape predict = self.W(u + o) loss = functions.softmax_cross_entropy(predict, answer) return loss
def propdown(self, hid): """ This function propagates the hidden units activation downwords to the visible units :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - given h_sample :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1 """ batch_size = hid.data.shape[0] if self.real == 0: W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)) v_mean = F.sigmoid(pre_sigmoid_activation) #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape) #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7]) #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3]) #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10]) #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data) #print('v_mean', v_mean.data.shape) #print('v_mean sum', F.sum(v_mean).data) #print('hid', hid.data.shape) else: # TODO: check W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) return v_mean
def __call__(self, src, is_train=False, xp=np): # Some namings B = len(src) # Batch Size N = len(src[0]) # length of source H = self.H src_col = lambda x: Variable(self.xp.array([src[i][x] for i in range(B)], dtype=np.int32)) embed = lambda e, x: e(self.IE(x), is_train=is_train) bi_rnn = lambda x, y: self.AE(F.concat((x[0], y[1]), axis=1)) concat_source = lambda S, s: s if S is None else F.concat((S, s), axis=2) # State Reset self.EF.reset_state() self.EB.reset_state() # Forward + backward encoding s = [] for j in range(N): s.append(( embed(self.EF, src_col(j)), embed(self.EB, src_col(-j-1)) )) # Joining the encoding data together S = None for j in range(N): s_j = bi_rnn(s[j], s[-j-1]) S = concat_source(S, F.reshape(s_j, (B, H, 1))) S = F.swapaxes(S, 1, 2) return S, s_j
def check_forward(self, x_data): axis1, axis2 = self.axis1, self.axis2 x = chainer.Variable(x_data) y = functions.swapaxes(x, axis1, axis2) self.assertEqual(y.data.dtype, self.dtype) self.assertTrue((self.x.swapaxes(axis1, axis2) == cuda.to_cpu(y.data)).all())
def check_backward(self, x_data, y_grad): x = chainer.Variable(x_data) y = functions.swapaxes(x, self.axis1, self.axis2) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data.copy(),)) gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,), eps=1e-5) gradient_check.assert_allclose(gx, x.grad, rtol=1e-5)
def __call__(self, xs, ilens): '''VGG2L forward :param xs: :param ilens: :return: ''' logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens)) # x: utt x frame x dim xs = F.pad_sequence(xs) # x: utt x 1 (input channel num) x frame x dim xs = F.swapaxes( F.reshape(xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2) xs = F.relu(self.conv1_1(xs)) xs = F.relu(self.conv1_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) xs = F.relu(self.conv2_1(xs)) xs = F.relu(self.conv2_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) # change ilens accordingly ilens = self.xp.array(self.xp.ceil( self.xp.array(ilens, dtype=np.float32) / 2), dtype=np.int32) ilens = self.xp.array(self.xp.ceil( self.xp.array(ilens, dtype=np.float32) / 2), dtype=np.int32) # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim) xs = F.swapaxes(xs, 1, 2) xs = F.reshape(xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3])) xs = [xs[i, :ilens[i], :] for i in range(len(ilens))] return xs, ilens
def query(self, u): xp = cuda.get_array_module(u) size = self.m.shape[1] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, self.m.shape) tc = F.broadcast_to(tc, self.c.shape) p = F.softmax(F.batch_matmul(self.m + tm, u)) o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p) o = F.squeeze(o, -1) u = o + u return u
def forward_cnn(self, h): # Check and prepare for 2d convolutions h = F.expand_dims(h, 2) h = F.swapaxes(h, 1, 2) # Apply each CNN layer for i, cnn_layer in enumerate(self.cnns): # cnn pass h = self[cnn_layer](h) # Apply batch normalization if self.cnn_bn: bn_lname = '{0:s}_bn'.format(cnn_layer) h = self[bn_lname](h) # Apply non-linearity h = F.relu(h) """ Prepare return batch size * num time frames after pooling * cnn out dim """ h = F.swapaxes(h, 1, 2) h = F.reshape(h, h.shape[:2] + tuple([-1])) h = F.rollaxis(h, 1) return h
def maxpooling(self, xs, neighbor): sources = defaultdict(list) for ee in neighbor: for i in neighbor[ee]: sources[i].append(xs[ee]) # sources:键值为实体编号i,值为与实体有关系的所有实体经转换函数后的embedding # 将sources根据实体集的编号排序 # 最后得到的len(result)=len(entities) result = [] for i, xxs in sorted(sources.items(), key=lambda x: x[0]): if len(xxs) == 1: x = xxs[0] x = self.forwardAA(x, xxs) # attention result.append(x) else: x = F.concat(xxs, axis=0) # -> (b,d) x = F.swapaxes(x, 0, 1) # -> (d,b) x = F.maxout(x, len(xxs)) # -> (d,1) x = F.swapaxes(x, 0, 1) # -> (1,d) x = self.forwardAA(x, xxs) # attention result.append(x) return result
def __call__(self, x, pid): x = self.bn(x) x = F.swapaxes(x, axis1=1, axis2=3) y = F.expand_dims(F.expand_dims(pid, axis=-1), axis=-1) y = F.tile(y, reps=(1, 1, self.audio_window_size, 1)) x = F.concat((x, y), axis=1) x = self.branch(x) x = F.reshape(x, shape=(x.shape[0], -1)) x = F.concat((x, pid), axis=1) x = self.fc1(x) x = F.tanh(x) x = self.fc2(x) return x
def predict(self, combined_x): """Forward pass for combined input.""" # combined_x (..., W, H, E+T) in_x = F.reshape(combined_x, (-1, ) + combined_x.shape[-3:]) # (N, W, H, E+T) in_x = F.swapaxes(in_x, -1, -3) # (N, E+T, H, W) out = F.relu(self.conv1(in_x)) # (N, E, H, W) out = F.relu(self.conv2(out)) # (N, E, W', H') out = F.max_pooling_2d(out, tuple(GRID)) # (N, E, W', H') out = self.fc1(out) # (N, V) out = F.squeeze(out) @ self.embed.W.T # (N, V) out = F.reshape(out, combined_x.shape[:-3] + (VOCAB, )) # (..., V) return out
def __call__(self, X, split_into_variables=True, add_noise_to_input=True): xp = self.xp batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.embed(X) # insert noise at <BLANK> (optional) if add_noise_to_input: noise = xp.random.normal(0, 1, enmbedding.shape) mask = X == BLANK mask = xp.broadcast_to(xp.expand_dims(mask, 2), noise.shape) enmbedding += noise * mask enmbedding = F.swapaxes(enmbedding, 1, 2) in_data = [] if self.ndim_embedding == self.ndim_h: in_data.append(enmbedding) out_data = self._forward_layer(0, enmbedding) in_data.append(out_data) for layer_index in xrange(1, self.num_layers): out_data = self._forward_layer(layer_index, sum(in_data) if self.densely_connected else in_data[-1]) # dense conv in_data.append(out_data) out_data = sum(in_data) if self.densely_connected else out_data # dense conv if self.dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.dense(out_data) if split_into_variables: out_data = F.swapaxes(out_data, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 2) return out_data
def attention(self, hs_padded, ht_padded): ht_padded_W = self.W(F.concat(ht_padded, axis=0)).reshape( ht_padded.shape) # bt * maxlen_t * demb hs_swap = F.swapaxes(hs_padded, 1, 2) # bt * demb * maxlen_s attn_matix = F.matmul(ht_padded_W, hs_swap) # bt * maxlen_t * maxlen_s attn_matix_sm = F.softmax(attn_matix, axis=2) # bt * maxlen_t * maxlen_s context_vector = F.matmul( attn_matix_sm, hs_padded ) # (bt * maxlen_t * maxlen_s) * (#bt * maxlen_s * demb) = bt * maxlen_t * demb return context_vector, attn_matix_sm
def seq_encode(self,xs): embed_xs = self.embed(xs) embed_xs.unchain_backward() batchsize, seq_length, dim = embed_xs.shape sum_embed_xs = F.sum(embed_xs,axis=1) embed_xs_reshape = F.reshape(embed_xs, (batchsize, 1, seq_length, dim)) # embed_avg = F.average_pooling_2d(embed_xs_reshape, ksize=(embed_xs.shape[2], 1)) # 1. wide_convolution # 著者はnarrow? xs_conv1 = F.tanh(self.conv1(embed_xs_reshape)) # xs_conv1_swap = F.reshape(F.swapaxes(xs_conv1, 1, 3),(batchsize, seq_length+3, 50)) xs_conv1_swap = F.swapaxes(xs_conv1, 1, 3) # (batchsize, 50, seqlen, 1) --> (batchsize, 1, seqlen, 50) return sum_embed_xs, xs_conv1, xs_conv1_swap
def __call__(self, xs): """ Forward pass of a sentence. :param xs: a batch of sentences :return h: final hidden states """ xs = self.embed(xs) xs = F.swapaxes(xs, 0, 1) # time, batch, embed self.rnn.reset_state() for x in xs: h = self.rnn(x) h = F.tanh(self.linear(h)) return h
def forward_rnn_encode_proj(self, X): # Reset rnn state self.reset_rnn_state() # Get input shape in_size, batch_size, in_dim = X.shape enc_states = X for currL in range(len(self.rnn_enc)): for i in range(in_size): temp_f = F.expand_dims( F.dropout(self[self.rnn_enc[currL]](enc_states[i]), ratio=self.cfg["dropout"]["rnn"]), 0) # if bi-directional if self.bi_rnn: temp_r = F.expand_dims( F.dropout(self[self.rnn_rev_enc[currL]]( enc_states[-1]), ratio=self.cfg["dropout"]["rnn"]), 0) if i > 0: h_fwd = F.concat((h_fwd, temp_f), axis=0) if self.bi_rnn: h_rev = F.concat((h_rev, temp_r), axis=0) else: h_fwd = temp_f if self.bi_rnn: h_rev = temp_r # end current rnn layer if self.bi_rnn: h_rev = F.flipud(h_rev) rnn_states = F.concat((h_fwd, h_rev), axis=2) else: rnn_states = h_fwd """ Apply linear projection """ # print(f"Applying rnn {currL}") if currL < (len(self.rnn_enc) - 1): # print(f"Applying linear linear_proj {currL}") for i in range(0, in_size): currH = F.relu(self[f"enc_proj{currL}_bn"]( self[f"enc_proj{currL}"](rnn_states[i]))) if i > 0: enc_states = F.concat( (enc_states, F.expand_dims(currH, 0)), axis=0) else: enc_states = F.expand_dims(currH, 0) # end for all hidden states # end all layers # Make the batch size as the first dimension self.enc_states = F.swapaxes(enc_states, 0, 1)
def forward_one_step(self, X, ht_enc, H_enc, skip_mask): pad = self._kernel_size - 1 WX = self.W(X)[:, :, -pad - 1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states for t in xrange(T): z = Z[..., t] f = F[..., t] if self.contexts is None: ct = (1 - f) * z self.contexts = [ct] else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t - T] bias = 0 if skip_mask is None else softmax_bias[ ..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if self.H is None: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat( (self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def __call__(self, h1s, h2s): # 散らかってるが,とりあえずコレで seq_len, _ = h1s[0].shape h2s_len = [x.shape[0] for x in h2s] h1s_stack = F.stack(h1s, axis=0) h2s_stack = F.pad_sequence(h2s, padding=-1) h2s_mask = self.xp.swapaxes((h2s_stack.data != -1)[:, :, :seq_len], 1, 2) minfs = self.xp.full(h2s_mask.shape, -np.inf, dtype=np.float32) raw_attn_mat = F.batch_matmul(h1s_stack, F.swapaxes(h2s_stack, 1, 2)) masked_attn_mat = F.where(h2s_mask, raw_attn_mat, minfs) # h1s 方向に重み付き和を計算 # ここを正規化してもいいはず h1s_attn = F.batch_matmul(F.softmax(masked_attn_mat, axis=2), h2s_stack) m1 = calc_vector_interactions(h1s_stack, h1s_attn) if self.drop_local_inference: m1 = F.dropout(m1, 0.5) m1s = F.separate(m1, axis=0) # h2s 方向に重み付き和を計算 # ここを正規化してもいいはず h2s_attn_mat = F.softmax(masked_attn_mat, axis=1) # こっちの方向だと,softmax計算時にnanが生まれるので,それを0埋め # 0埋めしないとnanと実数との積が発生し,全体の計算が死んでしまう masked_h2s_attn_mat = F.where(h2s_mask, h2s_attn_mat, self.xp.zeros(h2s_mask.shape, dtype='f')) h2s_attn = F.swapaxes( F.batch_matmul(F.swapaxes(h1s_stack, 1, 2), masked_h2s_attn_mat), 1, 2) m2 = calc_vector_interactions(h2s_stack, h2s_attn) if self.drop_local_inference: m2 = F.dropout(m2, 0.5) m2s = [h[:l, :] for h, l in zip(F.separate(m2, axis=0), h2s_len)] return m1s, m2s
def __call__(self, x): """ call Args: x: [batch, n_global_capsule, caps_dim, n_local_grid, n_local_grid] ex) [?, 32, 8, 6, 6] -> [?, 32, 6, 6, 8] -> [?, 10, 1152, 8, 1] """ # calculating x_hat x = F.swapaxes(x, self.caps_dim, -1) x = F.reshape(x, (-1, self.in_caps, self.in_dims)) x = F.expand_dims(x, -1) x = F.expand_dims(x, 1) x = F.tile(x, (1, self.out_caps, 1, 1, 1)) Ws = F.tile(self.W, (x.shape[0], 1, 1, 1, 1)) x_hats = F.matmul(Ws, x) # dynamic routing x_hats = F.swapaxes(x_hats, 2, 3) x_hats = F.reshape(x_hats, x_hats.shape[:-1]) v_j = routing(x_hats, self.n_iters) return v_j
def query(self, u): m = self.m c = self.c batch, size = m.data.shape[:2] inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1]) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch,) + tm.data.shape) tc = F.broadcast_to(tc, (batch,) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = F.reshape(o, (batch, m.data.shape[2])) u = o + u return u
def query(self, u): m = self.m c = self.c batch, size = m.data.shape[:2] inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1]) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch, ) + tm.data.shape) tc = F.broadcast_to(tc, (batch, ) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = F.reshape(o, (batch, m.data.shape[2])) u = o + u return u
def __call__(self, x): z = F.relu(self[1](self[0](x))) z = F.dropout( F.max_pooling_2d(z, ksize=(1, 5), stride=(1, 5), pad=(0, 0)), .1) z = F.relu(self[3](self[2](z))) z = F.dropout( F.max_pooling_2d(z, ksize=(1, 5), stride=(1, 5), pad=(0, 0)), .1) z = F.relu(self[5](self[4](z))) z = F.dropout( F.max_pooling_2d(z, ksize=(1, 7), stride=(1, 7), pad=(0, 0)), .1) z = self[6](z) z = F.squeeze(z) z = F.swapaxes(z, 1, 2) return z
def decode_one_step(self, X, encoder_last_hidden_states, test=False): assert len(encoder_last_hidden_states) == self.num_layers batchsize = X.shape[0] seq_length = X.shape[1] xt = X[:, -1, None] enmbedding = self.decoder_embed(xt) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_decoder_layer_one_step( 0, enmbedding, encoder_last_hidden_states[0], test=test) in_data = [out_data] for layer_index in xrange(1, self.num_layers): out_data = self._forward_decoder_layer_one_step( layer_index, sum(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index], test=test) in_data.append(out_data) out_data = sum( in_data) if self.densely_connected else out_data # dense conv out_data = out_data[:, :, -1, None] if self.dropout: out_data = F.dropout(out_data, ratio=self.dropout_ratio, train=not test) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.ndim_h)) Y = self.dense(out_data) if test: Y.unchain_backward() return Y
def decode_one_step(self, X, encoder_last_hidden_states): assert len(encoder_last_hidden_states) == self.num_layers batchsize = X.shape[0] seq_length = X.shape[1] ksize = self.decoder_kernel_size if seq_length < ksize: self.reset_state() return self.decode(X, encoder_last_hidden_states, return_last=True) xt = X[:, -ksize:] enmbedding = self.decoder_embed(xt) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_decoder_layer_one_step( 0, enmbedding, encoder_last_hidden_states[0]) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_decoder_layer_one_step( layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index]) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[ -1] # dense conv out_data = out_data[:, :, -1, None] if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec)) return out_data
def query(self, u): xp = cuda.get_array_module(u.data) m = self.m c = self.c batch, size = m.data.shape[:2] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch,) + tm.data.shape) tc = F.broadcast_to(tc, (batch,) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = o[:, :, 0] u = o + u return u
def __call__(self, x): batch = x.shape[0] batch_seq_len = batch * self.seq_len x = F.reshape(x, shape=(batch_seq_len, 1, self.audio_window_size, self.audio_features)) x = F.swapaxes(x, axis1=1, axis2=3) x = self.conv_branch(x) x = F.reshape(x, shape=(batch_seq_len, 1, -1)) x = self.fc_branch(x) x = F.reshape(x, shape=(batch, self.seq_len, -1)) x = F.swapaxes(x, axis1=1, axis2=2) y = x[:, :, (self.seq_len // 2)] w = self.att_conv_branch(x) w = F.reshape(w, shape=(batch, self.seq_len)) w = self.att_fc(w) w = F.expand_dims(w, axis=-1) x = F.batch_matmul(x, w) x = F.squeeze(x, axis=-1) return x, y
def __call__(self, x, dur=1): x = F.pad(x, [(0, 0), (0, 0), (125 * dur, 125 * dur), (0, 0)], 'constant') z = F.relu(self[1](self[0](x))) z = F.dropout( F.max_pooling_2d(z, ksize=(15, 1), stride=(15, 1), pad=(0, 0)), .1) z = F.relu(self[3](self[2](z))) z = F.dropout( F.max_pooling_2d(z, ksize=(11, 1), stride=(11, 1), pad=(0, 0)), .1) z = F.relu(self[5](self[4](z))) z = F.relu(self[7](self[6](z))) z = self[8](z) z = F.squeeze(z) z = F.swapaxes(z, 1, 2) return z
def __call__(self, x): h = self.conv1(x, self.train) h = self.conv2(h, self.train) h = F.max_pooling_2d(h, (1, 160)) h = F.swapaxes(h, 1, 2) h = self.conv3(h, self.train) h = F.max_pooling_2d(h, 3) h = self.conv4(h, self.train) h = F.max_pooling_2d(h, (1, 3)) h = F.dropout(F.relu(self.fc5(h)), train=self.train) h = F.dropout(F.relu(self.fc6(h)), train=self.train) return self.fc7(h)
def benchmark_cuda_ctc(batchsize, label_length, seq_length, vocab_size, repeat=50): label_unigram = xp.random.randint(1, vocab_size, size=(batchsize, label_length)).astype(xp.int32) length_unigram = xp.full((batchsize, ), label_length, dtype=np.int32) blank_symbol = 0 x = xp.random.normal(0, 1, size=batchsize * vocab_size * seq_length).reshape( (batchsize, vocab_size, seq_length)).astype(xp.float32) in_data = Variable(x) in_data = F.swapaxes(in_data, 1, 2) in_data = F.reshape(in_data, (batchsize, -1)) in_data = F.split_axis(in_data, seq_length, axis=1) x_length = Variable(xp.full((batchsize, ), seq_length, dtype=np.int32)) start_time = time.time() for i in range(repeat): loss_ctc = cuda_ctc.connectionist_temporal_classification( in_data, label_unigram, blank_symbol, x_length, Variable(length_unigram), reduce="mean") forward_time_mean = (time.time() - start_time) / repeat start_time = time.time() for i in range(repeat): loss_ctc = cuda_ctc.connectionist_temporal_classification( in_data, label_unigram, blank_symbol, x_length, Variable(length_unigram), reduce="mean") loss_ctc.backward() backward_time_mean = (time.time() - start_time) / repeat return forward_time_mean, backward_time_mean
def wsd_with_tc(self, sent, trf_encoded_matrix, labels): ### WSD ### if self.model_type == "TRF-Multi" or self.model_type == "TRF-Delay-Multi": y_wsd = self.wsd_only(trf_encoded_matrix, labels) elif self.model_type == "TRF-Sequential": y_wsd, task_type = self.wsd_model(sent, None, None, True) ## 読み込みsequential y_wsd_soft = F.softmax(y_wsd) ## 予測結果にSoftmaxをかける argmax_wsd = F.argmax(y_wsd_soft, axis=1) ## 最大のインデクス値を取ってくる cond = chainer.Variable( self.xp.array([ True if i != "<PAD>" else False for i in list(chain(*labels)) ])) ## 語義のラベルがついていない単語は無視するための条件 pad_array = chainer.Variable( -1 * self.xp.ones(argmax_wsd.shape, dtype=argmax_wsd.dtype)) pad_array_argmax_wsd = F.where(cond, argmax_wsd, pad_array) sense_label_embed = F.embed_id(x=pad_array_argmax_wsd, W=self.xp.array( self.lookup_table_sense_fixed), ignore_label=-1) ## 固定. sense_label_embed = sense_label_embed.reshape( trf_encoded_matrix.shape[0], trf_encoded_matrix.shape[-1], -1) origin_shape = sense_label_embed.shape sense_label_embed = F.moveaxis(sense_label_embed, 1, 2) ## 置き換え ## cond_reshape = cond.reshape(cond.shape[0], -1) cond_reshape = F.broadcast_to( cond_reshape, (cond_reshape.shape[0], trf_encoded_matrix.shape[1])) cond_reshape = cond_reshape.reshape(origin_shape) cond_reshape = F.swapaxes(cond_reshape, 1, 2) replaced_trf_matrix = F.where(cond_reshape, sense_label_embed, trf_encoded_matrix) ### WSDの予測をTCに組み入れる ### tc = replaced_trf_matrix ## 置換後の文書行列 ### TC ### tc_features = F.sum(tc, axis=2) ## TC特徴 y_tc = self.fc2(tc_features) ### TCの予測結果 return (y_tc, y_wsd) if (self.model_type == "TRF-Multi") or ( self.model_type == "TRF-Delay-Multi") else y_tc
def eval(self, **dataset): """Calculate loss function from given datasets and model. Args: **dataset (~numpy.ndarray): Datasets passed as kwargs. Name of each key is in the format 'inputs/N' or 'labels/N'. 'N' is the order of the dataset. Returns: ~chainer.Variable: A scalar value calculated with loss function. """ inputs = [dataset[f'inputs/{i}'] for i in range(self.order['descriptor'] + 1)] labels = [dataset[f'labels/{i}'] for i in range(self.order['property'] + 1)] predictions = self._model.predict(inputs, self.order['descriptor']) loss0 = F.mean_squared_error(predictions[0], labels[0]) loss1 = F.mean_squared_error(predictions[1], labels[1]) loss_sum1 = F.mean(predictions[1]) transverse = F.swapaxes(predictions[2], 2, 3) loss_rot = F.mean(F.square((predictions[2] - transverse) / (predictions[2] + transverse))) total_loss = ((1.0 - self._mixing_beta) * loss0 + self._mixing_beta * loss1 + self._summation * loss_sum1 + self._rotation * loss_rot) RMSE0 = F.sqrt(loss0) RMSE1 = F.sqrt(loss1) AbsMean1 = F.absolute(loss_sum1) RMS_rot = F.sqrt(loss_rot) total = ((1.0 - self._mixing_beta) * RMSE0 + self._mixing_beta * RMSE1 + self._summation * AbsMean1 + self._rotation * RMS_rot) observation = { self._observation_keys[0]: RMSE0, self._observation_keys[1]: RMSE1, self._observation_keys[2]: AbsMean1, self._observation_keys[3]: RMS_rot, self._observation_keys[4]: total, } chainer.report(observation, observer=self._model) return total_loss
def inverse(self, y): scale_sqr = self.scale * self.scale batch, y_channels, y_height, y_width = y.shape assert (y_channels % scale_sqr == 0) x_channels = y_channels // scale_sqr x_height = y_height * self.scale x_width = y_width * self.scale x = F.transpose(y, axes=(0, 2, 3, 1)) x = x.reshape(batch, y_height, y_width, scale_sqr, x_channels) d3_split_seq = F.split_axis(x, indices_or_sections=(x.shape[3] // self.scale), axis=3) d3_split_seq = [t.reshape(batch, y_height, x_width, x_channels) for t in d3_split_seq] x = F.stack(d3_split_seq, axis=0) x = F.transpose(F.swapaxes(x, axis1=0, axis2=1), axes=(0, 2, 1, 3, 4)).reshape( batch, x_height, x_width, x_channels) x = F.transpose(x, axes=(0, 3, 1, 2)) return x
def forward(self, xs, ilens): """Subsample x. :param chainer.Variable x: input tensor :return: subsampled x and mask """ xs = self.xp.array(xs[:, None]) xs = F.relu(self.conv1(xs)) xs = F.relu(self.conv2(xs)) batch, _, length, _ = xs.shape xs = self.out(F.swapaxes(xs, 1, 2).reshape(batch * length, -1)) xs = self.pe(xs.reshape(batch, length, -1)) # change ilens accordingly ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int) ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int) return xs, ilens
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input( inputs) batch_size, past_len, _ = pos_x.shape h = self.pos_encoder(pos_x) h = self.inter(h) h = self.pos_decoder(h) pred_y = self.last(h) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
def reconstruct(self, v): """ :param v: Variable Matrix(batch_size, in_channels, image_height, image_width) :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width) """ batch_size = v.data.shape[0] xp = cuda.get_array_module(v.data) if self.real == 0: h = F.sigmoid(self.conv(v)) else: std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1)) h = F.sigmoid(self.conv(v / std_ch)) # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden))) W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1)) # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))) return reconstructed_v
def c(): label_unigram = np.asarray([ [1, 2, 4, 3, 5], [2, 4, 3, 0, 0], ], dtype=np.int32) label_bigram = np.asarray([ [-1, 6, -1, 7, 8], [-1, 6, 9, 0, 0], ], dtype=np.int32) blank_symbol = 0 path = gram_ctc._label_to_path(label_unigram, label_bigram, blank_symbol, np) length_unigram = np.asarray([5, 3]) length_bigram = length_unigram - 1 path_length = length_unigram * 2 + 1 + length_bigram print("path_length", path_length) vocab_size = 10 seq_length = 5 batchsize = 2 xs = np.random.normal(0, 1, size=batchsize * vocab_size * seq_length).reshape( (batchsize, vocab_size, 1, seq_length)).astype(np.float32) xs = Variable(xs) xs = functions.swapaxes(xs, 1, 3) xs = functions.reshape(xs, (batchsize, -1)) xs = functions.split_axis(xs, seq_length, axis=1) xs = [x.data for x in xs] x_length = np.asarray([seq_length, seq_length // 2], dtype=np.int32) yseq_shape = (len(xs), ) + xs[0].shape print(yseq_shape) yseq = gram_ctc._softmax(np.vstack(xs).reshape(yseq_shape), np) print(yseq) zero_padding = -100 log_yseq = gram_ctc._log_matrix(yseq, np, zero_padding) prob_trans = gram_ctc._compute_transition_probability( log_yseq, x_length, label_unigram, length_unigram, label_bigram, length_bigram, path, path_length, np, zero_padding)
def __call__(self, x): return functions.swapaxes(x, self.axis1, self.axis2)
def check_backward(self, x_data): x = chainer.Variable(x_data) y = functions.swapaxes(x, self.axis1, self.axis2) y.grad = y.data y.backward() gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def f(x): return functions.swapaxes(x, self.axis1, self.axis2)
def f(x): y = functions.swapaxes(x, self.axis1, self.axis2) return y * y
def forward(self, inputs, devices): x, = inputs y = functions.swapaxes(x, self.axis1, self.axis2) return y,