def __call__(self, xs, ys): # Before making a transpose, you need to sort two lists in descending # order of length. inds = numpy.argsort([-len(x) for x in xs]).astype('i') xs = [xs[i] for i in inds] ys = [ys[i] for i in inds] # Make transposed sequences. # Now xs[t] is a batch of words at time t. xs = F.transpose_sequence(xs) ys = F.transpose_sequence(ys) # h[i] is feature vector for each batch of words. hs = [self.feature(x) for x in xs] loss = self.crf(hs, ys) reporter.report({'loss': loss.data}, self) # To predict labels, call argmax method. _, predict = self.crf.argmax(hs) correct = 0 total = 0 for y, p in six.moves.zip(ys, predict): correct += self.xp.sum(y.data == p) total += len(y.data) reporter.report({'correct': correct}, self) reporter.report({'total': total}, self) return loss
def check_forward(self, xs_data): xs = [chainer.Variable(x) for x in xs_data] ys = functions.transpose_sequence(xs) self.assertEqual(len(ys), len(self.trans_lengths)) for y, l in zip(ys, self.trans_lengths): self.assertEqual(len(y.data), l) for i, l in enumerate(self.trans_lengths): for j in six.moves.range(l): gradient_check.assert_allclose(ys[i].data[j], self.xs[j][i])
def forward(self, *inputs): batch = len(inputs) // 6 lefts = inputs[0: batch] rights = inputs[batch: batch * 2] dests = inputs[batch * 2: batch * 3] labels = inputs[batch * 3: batch * 4] sequences = inputs[batch * 4: batch * 5] leaf_labels = inputs[batch * 5: batch * 6] inds = numpy.argsort([-len(l) for l in lefts]) # Sort all arrays in descending order and transpose them lefts = F.transpose_sequence([lefts[i] for i in inds]) rights = F.transpose_sequence([rights[i] for i in inds]) dests = F.transpose_sequence([dests[i] for i in inds]) labels = F.transpose_sequence([labels[i] for i in inds]) sequences = F.transpose_sequence([sequences[i] for i in inds]) leaf_labels = F.transpose_sequence( [leaf_labels[i] for i in inds]) batch = len(inds) maxlen = len(sequences) loss = 0 count = 0 correct = 0 stack = self.xp.zeros( (batch, maxlen * 2, self.n_units), self.xp.float32) for i, (word, label) in enumerate(zip(sequences, leaf_labels)): batch = word.shape[0] es = self.leaf(word) ds = self.xp.full((batch,), i, self.xp.int32) y = self.label(es) loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, ds, es) for left, right, dest, label in zip(lefts, rights, dests, labels): l, stack = thin_stack.thin_stack_get(stack, left) r, stack = thin_stack.thin_stack_get(stack, right) o = self.node(l, r) y = self.label(o) batch = l.shape[0] loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, dest, o) loss /= count reporter.report({'loss': loss}, self) reporter.report({'total': count}, self) reporter.report({'correct': correct}, self) return loss
def __call__(self, x, t): # xを入力した際のネットワーク出力と、回答t との差分を返します。 x = F.transpose_sequence(x) self.eh.reset_state() #return self.predict(h) for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) #y = F.leaky_relu(self.hy(h)) y = self.hy(h) #print(y) t = xp.reshape(t, (len(t), 1)) #print(t) loss = F.mean_squared_error(y, t) chainer.reporter.report({'loss': loss}, self) return loss
def __call__(self, text, label, feature): # textを入力した際のネットワーク出力と、真値label との Rmse を返します。 #print("text = ", text) #print("label = ", label) #print("feature = ", feature) x = F.transpose_sequence(text) label = xp.reshape(label, (len(label), 1)) feature = xp.reshape(feature, (len(feature), 1)) self.eh.reset_state() # model----> for word in range(len(x)): #print("x[word] = ", (x[word]).shape) e = self.xe(x[word]) # print("shape e = ", e.shape) h = self.eh(e) # print("shape h = ", h.shape) cel = h # cel = [10, 200] # <----model for word in range(1, len(x)): ee = self.xe(x[len(x) - word]) hh = self.eh(ee) cel_back = hh # cel_back = [10, 200] blstm = F.concat((cel, cel_back)) # blstm = [10, 400] #print("blstm = ", blstm) #print(type(blstm)) blstm_f = F.concat((blstm, feature)) # blstm_f = [10, 401] predict = self.hy(blstm_f) # predict = [10, 1] mse = F.mean_squared_error(predict, label) rmse = F.sqrt(mse) chainer.reporter.report({'loss': rmse}, self) return rmse
def __call__(self, x): # 順伝播の計算を行う関数 # :param x: 入力値 # エンコード """ ex_block = self.make_input_embedding(self.xe, x) ex_block = F.dropout(ex_block, 0.3) exs = F.transpose(ex_block,(0, 2, 1)) exs2=[i for i in exs] h, _, _ = self.encoder(None, None, exs2) """ x = F.transpose_sequence(x) self.eh.reset_state() for word in x: e = self.xe(word) e = F.dropout(e, ratio=0.1) h = self.eh(e) h = F.dropout(h, ratio=0.1) y = self.hy(h) return y
def __call__(self, x): # 順伝播の計算を行う関数 # :param x: 入力値 # エンコード x = F.transpose_sequence(x) self.eh.reset_state() cel = [] for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) i = self.ii(h) cel.append(i) ''' cel_back = [] self.eh2.reset_state() for word in range(1, len(x)): ee = self.xe(x[len(x) - word]) hh = self.eh2(ee) i = self.ii(hh) cel_back.append(i) zz = F.concat((cel[0], cel_back[0])) for con in range(1, len(cel) - 1): kkk = F.concat((cel[con], cel_back[con])) zz = F.concat((zz, kkk)) # 分類 # z = F.concat((h,hh)) len(zz) ''' y = self.hy(cel) pp = F.softmax(y) # print(pp.data.argmax(axis=1)) return y
def predictor2(self, text): x = F.transpose_sequence(text) self.eh.reset_state() # model----> for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) cel = h # cel = [10, 200] # <----model for word in range(1, len(x)): ee = self.xe(x[len(x) - word]) hh = self.eh(ee) cel_back = hh # cel_back = [10, 200] blstm = F.concat((cel, cel_back)) # blstm = [10, 400] blstm_f = F.concat((blstm, feature)) # blstm_f = [10, 401] predict = self.hy(blstm_f) return predict
def __call__(self, x, Label): # 順伝播の計算を行う関数 # :param x: 入力値 # :param y: label # エンコード x = F.transpose_sequence(x) self.eh.reset_state() cel = [] for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) i = self.ii(h) cel.append(i) # print("cel = ", cel) zz = F.concat((cel[0], cel[1])) for i in range(2, len(cel) - 1): zz = F.concat((zz, cel[i])) y = self.hy(zz) # pp = F.softmax(y) # print(pp.data.argmax(axis=1)) score = F.sigmoid(y) * 6 # print("y = ", y) # print("new_y", new_y) # print("score = ", score) # loss = 1 / 2 * ((score - yl) ** 2) rmse = 0 for i in range(BATCH_SIZE): rmse += ((score[i] - Label[i])**2) rmse = F.sqrt(rmse / BATCH_SIZE) print("RMSE = ", rmse.data) return rmse
def f(*xs): return functions.transpose_sequence(xs)
def __call__(self, xs, ys): xs = permutate_list(xs, argsort_list_descent(xs), inv=False) xs = F.transpose_sequence(xs) ys = permutate_list(ys, argsort_list_descent(ys), inv=False) ys = F.transpose_sequence(ys) return super(CRF, self).__call__(xs, ys)
def argmax(self, xs): xs = permutate_list(xs, argsort_list_descent(xs), inv=False) xs = F.transpose_sequence(xs) score, path = super(CRF, self).argmax(xs) path = F.transpose_sequence(path) return score, path
def __call__(self, xs, ys): xs = permutate_list(xs, argsort_list_descent(xs), inv=False) xs = F.transpose_sequence(xs) ys = permutate_list(ys, argsort_list_descent(ys), inv=False) ys = F.transpose_sequence(ys) return super(CRF, self).__call__(xs, ys)
def argmax(self, xs): xs = permutate_list(xs, argsort_list_descent(xs), inv=False) xs = F.transpose_sequence(xs) score, path = super(CRF, self).argmax(xs) path = F.transpose_sequence(path) return score, path
def _test_mask_recurrent_state_at(self, gpu): in_size = 2 out_size = 4 rseq = StatelessRecurrentSequential( L.Linear(in_size, 3), F.elu, L.NStepGRU(1, 3, out_size, 0), F.softmax, ) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() rseq.to_gpu() xp = rseq.xp seqs_x = [ xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32), xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32), ] transposed_x = F.transpose_sequence(seqs_x) print('transposed_x[0]', transposed_x[0]) def no_mask_n_step_forward(): nomask_nstep_out, nstep_rs = rseq.n_step_forward( seqs_x, None, output_mode='concat') return F.reshape(nomask_nstep_out, (2, 2, out_size)), nstep_rs nstep_out, nstep_rs = no_mask_n_step_forward() # Check if n_step_forward and forward twice results are same def no_mask_forward_twice(): _, rs = rseq(transposed_x[0], None) return rseq(transposed_x[1], rs) nomask_out, nomask_rs = no_mask_forward_twice() xp.testing.assert_allclose( nstep_out.array[:, 1], nomask_out.array, ) xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array) # 1st-only mask forward twice: only 2nd should be the same def mask0_forward_twice(): _, rs = rseq(transposed_x[0], None) rs = rseq.mask_recurrent_state_at(rs, 0) return rseq(transposed_x[1], rs) mask0_out, mask0_rs = mask0_forward_twice() with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out.array[0, 1], mask0_out.array[0], ) xp.testing.assert_allclose( nstep_out.array[1, 1], mask0_out.array[1], ) # 2nd-only mask forward twice: only 1st should be the same def mask1_forward_twice(): _, rs = rseq(transposed_x[0], None) rs = rseq.mask_recurrent_state_at(rs, 1) return rseq(transposed_x[1], rs) mask1_out, mask1_rs = mask1_forward_twice() xp.testing.assert_allclose( nstep_out.array[0, 1], mask1_out.array[0], ) with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out.array[1, 1], mask1_out.array[1], ) # both 1st and 2nd mask forward twice: both should be different def mask01_forward_twice(): _, rs = rseq(transposed_x[0], None) rs = rseq.mask_recurrent_state_at(rs, [0, 1]) return rseq(transposed_x[1], rs) mask01_out, mask01_rs = mask01_forward_twice() with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out.array[0, 1], mask01_out.array[0], ) with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out.array[1, 1], mask01_out.array[1], ) # get and concat recurrent states and resume forward def get_and_concat_rs_forward(): _, rs = rseq(transposed_x[0], None) rs0 = rseq.get_recurrent_state_at(rs, 0, unwrap_variable=True) rs1 = rseq.get_recurrent_state_at(rs, 1, unwrap_variable=True) concat_rs = rseq.concatenate_recurrent_states([rs0, rs1]) return rseq(transposed_x[1], concat_rs) getcon_out, getcon_rs = get_and_concat_rs_forward() xp.testing.assert_allclose(getcon_rs[0].array, nomask_rs[0].array) xp.testing.assert_allclose( nstep_out.array[0, 1], getcon_out.array[0]) xp.testing.assert_allclose( nstep_out.array[1, 1], getcon_out.array[1])
def __call__(self, x): x = F.transpose_sequence(x) for x_ in x: self.lstm(F.dropout(self.embed(x_), train=self.train)) h = self.out(F.dropout(self.lstm.h, train=self.train)) return h
def main(): parser = argparse.ArgumentParser() parser.add_argument('--unit', '-u', type=int, default=100, help='Number of LSTM units in each layer') parser.add_argument('--glove', type=str, default="", help='path to glove vector') parser.add_argument('--model-type', dest='model_type', type=str, required=True, help='bilstm / lstm / charlstm') parser.add_argument('--model', type=str, required=True, help='path to model file') parser.add_argument('--dev', action='store_true', help='If true, use validation data') parser.set_defaults(dev=False) args = parser.parse_args() data = DataProcessor(data_path="../work/", use_gpu=-1, test=False) data.prepare() if args.dev: test = data.dev_data else: test = data.test_data if args.model_type == "lstm": model = CRFNERTagger(n_vocab=len(data.vocab), embed_dim=100, hidden_dim=args.unit, n_tag=len(data.tag), dropout=None) elif args.model_type == 'bilstm': model = CRFBiNERTagger(n_vocab=len(data.vocab), embed_dim=100, hidden_dim=args.unit, n_tag=len(data.tag), dropout=None) elif args.model_type == 'charlstm': model = CRFBiCharNERTagger(n_vocab=len(data.vocab), n_char=len(data.char), embed_dim=100, hidden_dim=args.unit, n_tag=len(data.tag), dropout=None) # load glove vector if args.glove: sys.stderr.write("loading GloVe...") model.load_glove(args.glove, data.vocab) sys.stderr.write("done.\n") optimizer = chainer.optimizers.Adam() optimizer.setup(model) serializers.load_npz(args.model, model) test_iter = chainer.iterators.SerialIterator(test, repeat=False, shuffle=False, batch_size=10) id2tag = data.id2tag id2vocab = data.id2vocab for ys, ts in tqdm(predict(test_iter, args.model_type, model, args.unit)): # minibatch-unit-loop ys = [[id2tag[i] for i in y.data] for y in F.transpose_sequence(ys)] ts = [[id2tag[i] for i in t.data] for t in F.transpose_sequence(ts)] # instance-loop for predict_seq, target_seq in zip(ys, ts): for p, t in zip(predict_seq, target_seq): print("{}\t{}".format(p, t)) print()
def __call__(self, x, Label, feature): # 順伝播の計算を行う関数 # :param x: 入力値 # :param y: label # エンコード #print("x = ", x.shape) x = F.transpose_sequence(x) #print("x^t = ", x.shape) self.eh.reset_state() cel = [] print("x[700] = ", x[700]) print(x[700].shape) abc = self.xe(x[700]) print("abc = ", abc) print(abc.shape) abcd = self.eh(abc) print("abcd = ", abcd) print(abcd.shape) abcde = self.ii(abcd) print("abcde = ", abcde) print(abcde.shape) for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) i = self.ii(h) cel.append(i) # print("cel = ", cel) cel_back = [] self.eh2.reset_state() for word in range(1, len(x)): ee = self.xe(x[len(x) - word]) hh = self.eh2(ee) i = self.ii(hh) cel_back.append(i) # print("cel_back = ", cel) zz = F.concat((cel[0], cel_back[0])) # print("len(zz1) = ", len(zz)) # print("zz1 = ",len(zz[0])) for con in range(1, len(cel) - 1): kkk = F.concat((cel[con], cel_back[con])) zz = F.concat((zz, kkk)) # print(zz) # print("len(zz2) = ", len(zz)) # print("zz2 = ",len(zz[0])) zzz = F.concat((zz, feature), axis=1) # 分類 # z = F.concat((h,hh)) y = self.hy(zzz) # pp = F.softmax(y) # print(pp.data.argmax(axis=1)) #score = F.sigmoid(y) * 6 # print("y = ", y) # print("new_y", new_y) # print("score = ", score) # loss = 1 / 2 * ((score - yl) ** 2) rmse = 0 for i in range(BATCH_SIZE): rmse += ((y[i] - Label[i])**2) rmse = F.sqrt(rmse / BATCH_SIZE) print("RMSE = ", rmse.data) return rmse, y
def translate(self, xs, max_length=100): print("Now translating") batch = len(xs) print("batch", batch) with chainer.no_backprop_mode(), chainer.using_config('train', False): wxs = [ np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs ] wx_len = [len(wx) for wx in wxs] wx_section = np.cumsum(wx_len[:-1]) valid_wx_section = np.insert(wx_section, 0, 0) cxs = [ np.array( [source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs ] wexs = sequence_embed(self.embed_xw, wxs) cexs = sequence_embed(self.embed_xc, cxs) wexs_f = wexs wexs_b = [wex[::-1] for wex in wexs] cexs_f = cexs cexs_b = [cex[::-1] for cex in cexs] _, hfw = self.encoder_fw(None, wexs_f) h1, hbw = self.encoder_bw(None, wexs_b) _, hfc = self.encoder_fc(None, cexs_f) h2, hbc = self.encoder_bc(None, cexs_b) hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw] hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc] htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw)) htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc)) ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc)) ys = self.xp.full(batch, EOS, 'i') result = [] h = F.concat([h1, h2], axis=2) for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder( h, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) h = F.transpose_sequence(h_list)[-1] h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1])) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def __call__(self, *xs): return F.transpose_sequence(xs)
def translate(self, xs, max_length=100): print("Now translating") batch = len(xs) print("batch", batch) #loss_w = 0 #loss_c1 = 0 #loss_c2 = 0 with chainer.no_backprop_mode(), chainer.using_config('train', False): char_hidden = [] wxs = [ np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs ] unk_words = list(map(lambda x, y: np.array(y)[x == UNK], wxs, xs)) unk_xs = list( map( lambda x: np.array([ np.array( [source_char_ids.get(c, UNK) for c in list(w)], dtype=np.int32) for w in x ]), unk_words)) unk_pos = [np.where(x == UNK)[0] for x in wxs] wx_len = [len(wx) for wx in wxs] wx_section = np.cumsum(wx_len[:-1]) valid_wx_section = np.insert(wx_section, 0, 0) concat_wxs = np.concatenate(wxs) #wys = [np.array([target_word_ids.get(w, UNK) for w in y], dtype=np.int32) for y in ys] #eos = self.xp.array([EOS], 'i') #ys_out = [F.concat([y, eos], axis=0) for y in wys] #concat_ys_out = F.concat(ys_out, axis=0) #n_words = len(concat_ys_out) exs = sequence_embed(self.embed_x, wxs) exs = list( map( lambda s, t, u: get_unk_hidden_vector( s, t, u, self.embed_xc, self.char_encoder, char_hidden ), exs, unk_pos, unk_xs)) exs_f = exs exs_b = [ex[::-1] for ex in exs] _, hf = self.encoder_f(None, exs_f) _, hb = self.encoder_b(None, exs_b) ht = list(map(lambda x, y: F.concat([x, y], axis=1), hf, hb)) ys = self.xp.full(batch, EOS, 'i') result = [] h_list = None for a in range(max_length): eys = self.embed_y(ys) eys = F.split_axis(eys, batch, 0) if h_list == None: h0 = h_list else: h0 = F.transpose_sequence(h_list)[-1] h0 = F.reshape(h0, (self.n_layers, h0.shape[0], h0.shape[1])) #h0 : {type:variable, shape:(n_layers*batch*dimentionality)} or None h_list, h_bar_list, c_s_list, z_s_list = self.decoder( h0, ht, eys) os = h_list concat_os = F.concat(os, axis=0) concat_os_out = self.W(concat_os) concat_pred_w = self.xp.argmax(concat_os_out.data, axis=1).astype('i') is_unk = concat_pred_w == UNK if UNK in concat_pred_w: N = np.sum(is_unk) true_wys = concat_ys_out[is_unk] concat_c_s = F.concat(c_s_list, axis=0) concat_h_bar = F.concat(h_bar_list, axis=0) c_ss = concat_c_s[is_unk] h_bars = concat_h_bar[is_unk] c = F.concat([c_ss, h_bars], axis=1) ds_hats = F.relu(self.W_hat(c)) abs_z_s_list = [ z_s_list[i] + valid_wx_section[i] for i in range(len(z_s_list)) ] concat_z_s = F.concat(abs_z_s_list, axis=0) z_ss = concat_z_s[is_unk] #各UNK単語について results_c = [] bow = self.xp.array([BOW], 'i') for i in range(N): wy = true_wys[i] if wy != UNK and wy != EOS: cys = np.array([[ target_char_ids[c] for c in list(target_words[wy]) ]], np.int32) elif wy == UNK: #本来ありえない cys = np.array([[target_char_ids['UNK']]], np.int32) elif wy == EOS: cys = np.array([[target_char_ids['BOW']]], np.int32) cys_out = [F.concat([y, bow], axis=0) for y in cys] concat_cys_out = F.concat(cys_out, axis=0) result_c = [] cy = self.xp.full(1, BOW, 'i') cy = F.split_axis(cy, 1, 0) cey = sequence_embed(self.embed_yc, cy) z_s = int(z_ss[i].data) ds_hat = F.reshape(ds_hats[i], (1, 1, ds_hats[i].shape[0])) cos_out_list = [] if concat_wxs[z_s] != UNK: for b in range(10): #attentionなし文字ベースdecoder ds_hat, cos = self.char_decoder(ds_hat, cey) cos_out = self.W_char(cos[0]) cos_out_list.append(cos_out) pred_cos = self.xp.argmax(cos_out.data, axis=1).astype('i') cey = self.embed_yc(pred_cos) print(pred_cos) print(target_chars[pred_cos]) result_c.append(pred_cos) #concat_cos_out = F.concat(cos_out_list, axis=0) #loss_c1= loss_c1 + F.sum(F.softmax_cross_entropy( # concat_cos_out, concat_cys_out, reduce='no')) else: c_ht = char_hidden[z_s] for b in range(10): #attentionあり文字ベースdecoder if b == 0: c_h0 = ds_hat else: c_h0 = F.transpose_sequence(h_list)[-1] c_h0 = F.reshape( c_h0, (self.n_layers, c_h0.shape[0], c_h0.shape[1])) c_h_list, c_h_bar_list, c_c_s_list, c_z_s_list = self.char_att_decoder( c_h0, c_ht, cey) cos_out = self.W_char(h_list[-1]) cos_out_list.append(cos_out) pred_cos = self.xp.argmax(cos_out.data, axis=1).astype('i') cey = self.embed_yc(pred_cos) print(pred_cos) print(target_chars[pred_cos]) result_c.append(pred_cos) #concat_cos_out = F.concat(cos_out_list, axis=0) #loss_c2 = loss_c2 + F.sum(F.softmax_cross_entropy( # concat_cos_out, concat_cys_out, reduce='no')) r = "" for c in result_c: if c == BOW: break r += target_chars.get(c, UNK) print(r) pred_w = target_word_ids.get(r, UNK) results_c.append(pred_w) concat_pred_w[is_unk] = results_c #loss_w = loss_w + F.sum(F.softmax_cross_entropy( # concat_os_out[is_unk!=1], concat_ys_out[is_unk!=1], reduce='no')) result.append(concat_pred_w) #loss = F.sum(loss_w + Alpha * loss_c1 + Beta * loss_c2) / n_words result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def __call__(self, source, bio, tag, compute_loss=True): """ Conduct forward propagation and acquire the loss value :return: loss (a chainer variable) """ # Order by a sequence length self.inds = np.argsort([-len(x) for x in source]).astype('i') # Remember the original order xs_src = [source[i] for i in self.inds] self.xs_src_len = [len(x) for x in xs_src] # Remember the batch length # Forward propagation pred_list_bio, pred_list_tag = self.forward( source=xs_src, ) # batch_size x (sequence_length, 2 * n_units) # Calculate the loss loss_bio = chainer.Variable(self.xp.array(0, dtype='f')) loss_tag = chainer.Variable(self.xp.array(0, dtype='f')) # Predict the outputs predicts_bio = [] predicts_tag = [] # If we use CRFs as output layers if self.lossfun == 'crf': # ------------------ # bio # ------------------ hs_bio = F.transpose_sequence(pred_list_bio) # sequence_length x (batch_size) # Loop for each batch and get loss values if compute_loss: ys_bio = [bio[i] for i in self.inds] ts_bio = F.transpose_sequence(ys_bio) # sequence_length x (batch_size) loss_bio = self.crf_bio(hs_bio, ts_bio) # Add prediction results _, predicts_trans_bio = self.crf_bio.argmax(hs_bio) predicts_bio = F.transpose_sequence(predicts_trans_bio) # ------------------ # bio # ------------------ hs_tag = F.transpose_sequence(pred_list_tag) # sequence_length x (batch_size) # Loop for each batch and get loss values if compute_loss: ys_tag = [tag[i] for i in self.inds] ts_tag = F.transpose_sequence(ys_tag) # sequence_length x (batch_size) loss_tag = self.crf_tag(hs_tag, ts_tag) # Add prediction results _, predicts_trans_tag = self.crf_tag.argmax(hs_tag) predicts_tag = F.transpose_sequence(predicts_trans_tag) elif self.lossfun == 'softmax': # ------------------ # bio # ------------------ if compute_loss: ys_bio = [bio[i] for i in self.inds] # Loop for each batch and get loss values for p_lst, y_lst in zip(pred_list_bio, ys_bio): loss_bio += F.softmax_cross_entropy(p_lst, y_lst) loss_bio /= len(bio) # Add prediction results for p_lst in pred_list_bio: y_arg_bio = F.argmax(p_lst, axis=1) predicts_bio.append(y_arg_bio) # ------------------ # tag # ------------------ if compute_loss: ys_tag = [tag[i] for i in self.inds] # Loop for each batch and get loss values for p_lst, y_lst in zip(pred_list_tag, ys_tag): loss_tag += F.softmax_cross_entropy(p_lst, y_lst) loss_tag /= len(tag) # Add prediction results for p_lst in pred_list_tag: y_arg_tag = F.argmax(p_lst, axis=1) predicts_tag.append(y_arg_tag) # Transform variable from GPU to CPU cpu_predicts_bio = [] cpu_predicts_tag = [] for pred_bio, pred_tag in zip(predicts_bio, predicts_tag): cpu_predicts_bio.append(chainer.cuda.to_cpu(pred_bio.data).tolist()) cpu_predicts_tag.append(chainer.cuda.to_cpu(pred_tag.data).tolist()) # Re-order inds_rev = sorted([(i, ind) for i, ind in enumerate(self.inds)], key=lambda x: x[1]) cpu_predicts_bio = [cpu_predicts_bio[e_i] for e_i, _ in inds_rev] cpu_predicts_tag = [cpu_predicts_tag[e_i] for e_i, _ in inds_rev] if compute_loss: loss = self.weight_bio * loss_bio + self.weight_tag * loss_tag return loss, cpu_predicts_bio, cpu_predicts_tag else: return cpu_predicts_bio, cpu_predicts_tag
def _wrapper(self, batch): xp = self.xp return F.transpose_sequence( xp.asarray(self.pad(batch), dtype=self.dtype))
def argmax(self, x): ys = self.blstm.GetFeat([Variable(x)]) #y_t = F.transpose_sequence([self.li(y) for y in ys]) y_t = F.transpose_sequence(ys) _, path = self.crf.argmax(y_t) return utils.force_numpy(path).flatten().astype(np.int32)
def f(*xs): return functions.transpose_sequence(xs)
def _test_mask_recurrent_state_at(self, gpu): in_size = 2 out0_size = 2 out1_size = 3 par = StatelessRecurrentBranched( L.NStepGRU(1, in_size, out0_size, 0), StatelessRecurrentSequential(L.NStepLSTM(1, in_size, out1_size, 0), ), ) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() par.to_gpu() xp = par.xp seqs_x = [ xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32), xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32), ] transposed_x = F.transpose_sequence(seqs_x) nstep_out, nstep_rs = par.n_step_forward(seqs_x, None, output_mode='concat') # Check if n_step_forward and forward twice results are same def no_mask_forward_twice(): _, rs = par(transposed_x[0], None) return par(transposed_x[1], rs) nomask_out, nomask_rs = no_mask_forward_twice() # GRU xp.testing.assert_allclose( nstep_out[0].array[[1, 3]], nomask_out[0].array, ) # LSTM xp.testing.assert_allclose( nstep_out[1].array[[1, 3]], nomask_out[1].array, ) xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array) self.assertIsInstance(nomask_rs[1], tuple) self.assertEqual(len(nomask_rs[1]), 1) self.assertEqual(len(nomask_rs[1][0]), 2) xp.testing.assert_allclose(nstep_rs[1][0][0].array, nomask_rs[1][0][0].array) xp.testing.assert_allclose(nstep_rs[1][0][1].array, nomask_rs[1][0][1].array) # 1st-only mask forward twice: only 2nd should be the same def mask0_forward_twice(): _, rs = par(transposed_x[0], None) rs = par.mask_recurrent_state_at(rs, 0) return par(transposed_x[1], rs) mask0_out, mask0_rs = mask0_forward_twice() # GRU with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[0].array[1], mask0_out[0].array[0], ) xp.testing.assert_allclose( nstep_out[0].array[3], mask0_out[0].array[1], ) # LSTM with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[1].array[1], mask0_out[1].array[0], ) xp.testing.assert_allclose( nstep_out[1].array[3], mask0_out[1].array[1], ) # 2nd-only mask forward twice: only 1st should be the same def mask1_forward_twice(): _, rs = par(transposed_x[0], None) rs = par.mask_recurrent_state_at(rs, 1) return par(transposed_x[1], rs) mask1_out, mask1_rs = mask1_forward_twice() # GRU xp.testing.assert_allclose( nstep_out[0].array[1], mask1_out[0].array[0], ) with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[0].array[3], mask1_out[0].array[1], ) # LSTM xp.testing.assert_allclose( nstep_out[1].array[1], mask1_out[1].array[0], ) with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[1].array[3], mask1_out[1].array[1], ) # both 1st and 2nd mask forward twice: both should be different def mask01_forward_twice(): _, rs = par(transposed_x[0], None) rs = par.mask_recurrent_state_at(rs, [0, 1]) return par(transposed_x[1], rs) mask01_out, mask01_rs = mask01_forward_twice() # GRU with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[0].array[1], mask01_out[0].array[0], ) with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[0].array[3], mask01_out[0].array[1], ) # LSTM with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[1].array[1], mask01_out[1].array[0], ) with self.assertRaises(AssertionError): xp.testing.assert_allclose( nstep_out[1].array[3], mask01_out[1].array[1], ) # get and concat recurrent states and resume forward def get_and_concat_rs_forward(): _, rs = par(transposed_x[0], None) rs0 = par.get_recurrent_state_at(rs, 0, unwrap_variable=True) rs1 = par.get_recurrent_state_at(rs, 1, unwrap_variable=True) concat_rs = par.concatenate_recurrent_states([rs0, rs1]) return par(transposed_x[1], concat_rs) getcon_out, getcon_rs = get_and_concat_rs_forward() # GRU xp.testing.assert_allclose( nstep_out[0].array[1], getcon_out[0].array[0], ) xp.testing.assert_allclose( nstep_out[0].array[3], getcon_out[0].array[1], ) # LSTM xp.testing.assert_allclose( nstep_out[1].array[1], getcon_out[1].array[0], ) xp.testing.assert_allclose( nstep_out[1].array[3], getcon_out[1].array[1], )