def test_no_backprop_mode(self): xs_data = numpy.random.uniform(-1, 1, (4, 2, 3)).astype(numpy.float32) t_data = numpy.array([[0, 1], [1, 0]]).astype(numpy.int32) with chainer.no_backprop_mode(): x = [chainer.Variable(x_data) for x_data in xs_data] t = chainer.Variable(t_data) functions.connectionist_temporal_classification(x, t, 2)
def forward_one_sample(model, wavfile, label, SIL_idx, useGPU): try: samplerate, wavdata = wav.read(wavfile) except IOError: return None, None feats = mfcc(wavdata, samplerate).astype(np.float32) feats = (feats - mean) / std model.reset_state() if useGPU: input_seq = [ Variable(cuda.to_gpu(feats[i, :].reshape((1, -1)))) for i in range(feats.shape[0]) ] y = model(input_seq) label = Variable( cuda.to_gpu(xp.array(label, dtype=xp.int32).reshape((1, -1)))) else: input_seq = [ Variable(feats[i, :][np.newaxis, :]) for i in range(feats.shape[0]) ] # y = [model(item) for item in input_seq] y = model(input_seq) label = Variable(xp.array(label, dtype=xp.int32).reshape((1, -1))) loss = F.connectionist_temporal_classification(y, label, SIL_idx) return y, loss
def check_backward(self, t_data, xs_data, l_length, x_length, grad, gx): xs = tuple(chainer.Variable(x_data) for x_data in xs_data) t = chainer.Variable(t_data) loss = functions.connectionist_temporal_classification( xs, t, 2, input_length=chainer.Variable(x_length), label_length=chainer.Variable(l_length)) loss.grad = grad loss.backward() func = loss.creator xs_data = tuple(x.data for x in xs) f = lambda: func.forward(( x_length, l_length, t.data, ) + xs_data) gx_0, gx_1, gx_2, gx_3 = gradient_check.numerical_grad( f, (xs_data), (gx, )) gradient_check.assert_allclose(xs[0].grad, gx_0, atol=1e-04) gradient_check.assert_allclose(xs[1].grad, gx_1, atol=1e-04) gradient_check.assert_allclose(xs[2].grad, gx_2, atol=1e-04) gradient_check.assert_allclose(xs[3].grad, gx_3, atol=1e-04)
def check_forward(self, t_data, xs_data, l_length, x_length): x = tuple(chainer.Variable(x_data) for x_data in xs_data) t = chainer.Variable(t_data) args = (x, t, self.blank_symbol) if self.use_length: args += (chainer.Variable(x_length), chainer.Variable(l_length)) loss = functions.connectionist_temporal_classification(*args) loss_value = float(loss.data) # compute expected value by recursive computation. xp = cuda.get_array_module(self.x) xt = xp.swapaxes(self.x, 0, 1) for b in range(xt.shape[0]): for t in range(xt.shape[1]): xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t])) loss_expect = 0 batch_size = xt.shape[0] path_length = 2 * l_length + 1 for xtb, lb, xlb, plb in zip(xt, self.l, x_length, path_length): loss_expect += -math.log( self.alpha(xtb, lb, int(xlb - 1), int(plb - 1)) + self.alpha(xtb, lb, int(xlb - 1), int(plb - 2))) loss_expect /= batch_size self.assertAlmostEqual(loss_expect, loss_value, places=5)
def check_forward(self, t_data, xs_data, l_length, x_length): x = tuple(chainer.Variable(x_data) for x_data in xs_data) t = chainer.Variable(t_data) args = (x, t, self.blank_symbol) if self.use_length: args += (chainer.Variable(x_length), chainer.Variable(l_length)) loss = functions.connectionist_temporal_classification( *args, reduce=self.reduce).data # compute expected value by recursive computation. xp = cuda.get_array_module(self.x) xt = xp.swapaxes(self.x, 0, 1) for b in range(xt.shape[0]): for t in range(xt.shape[1]): xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t])) batch_size = xt.shape[0] path_length = 2 * l_length + 1 loss_expect = xp.zeros((batch_size,), dtype=xp.float32) for i in range(batch_size): xtb, lb, xlb, plb = xt[i], self.l[i], x_length[i], path_length[i] loss_expect[i] = -math.log( self.alpha(xtb, lb, int(xlb - 1), int(plb - 1)) + self.alpha(xtb, lb, int(xlb - 1), int(plb - 2))) if self.reduce == 'mean': loss_expect = xp.mean(loss_expect) testing.assert_allclose(loss_expect, loss)
def test_ctc_loss(): pytest.importorskip("torch") pytest.importorskip("warpctc_pytorch") import torch import warpctc_pytorch from espnet.nets.e2e_asr_th import pad_list n_out = 7 input_length = numpy.array([11, 17, 15], dtype=numpy.int32) label_length = numpy.array([4, 2, 3], dtype=numpy.int32) np_pred = [ numpy.random.rand(il, n_out).astype(numpy.float32) for il in input_length ] np_target = [ numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32) for ol in label_length ] # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2) ch_target = F.pad_sequence(np_target, padding=-1) ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0, input_length, label_length).data th_pred = pad_list([torch.from_numpy(x) for x in np_pred], 0.0).transpose(0, 1) th_target = torch.from_numpy(numpy.concatenate(np_target)) th_ilen = torch.from_numpy(input_length) th_olen = torch.from_numpy(label_length) th_loss = warpctc_pytorch.CTCLoss(size_average=True)( th_pred, th_target, th_ilen, th_olen).data.numpy()[0] numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
def __call__(self, hs, ys): """CTC forward. Args: hs (list of chainer.Variable | N-dimension array): Input variable from encoder. ys (list of chainer.Variable | N-dimension array): Input variable of decoder. Returns: chainer.Variable: A variable holding a scalar value of the CTC loss. """ self.loss = None ilens = [x.shape[0] for x in hs] olens = [x.shape[0] for x in ys] # zero padding for hs y_hat = linear_tensor(self.ctc_lo, F.dropout( F.pad_sequence(hs), ratio=self.dropout_rate)) y_hat = F.separate(y_hat, axis=1) # ilen list of batch x hdim # zero padding for ys y_true = F.pad_sequence(ys, padding=-1) # batch x olen # get length info input_length = chainer.Variable(self.xp.array(ilens, dtype=np.int32)) label_length = chainer.Variable(self.xp.array(olens, dtype=np.int32)) logging.info(self.__class__.__name__ + ' input lengths: ' + str(input_length.data)) logging.info(self.__class__.__name__ + ' output lengths: ' + str(label_length.data)) # get ctc loss self.loss = F.connectionist_temporal_classification( y_hat, y_true, 0, input_length, label_length) logging.info('ctc loss:' + str(self.loss.data)) return self.loss
def __call__(self, hs, ys): '''CTC forward :param hs: :param ys: :return: ''' self.loss = None ilens = [x.shape[0] for x in hs] olens = [x.shape[0] for x in ys] # zero padding for hs y_hat = linear_tensor(self.ctc_lo, F.dropout( F.pad_sequence(hs), ratio=self.dropout_rate)) y_hat = F.separate(y_hat, axis=1) # ilen list of batch x hdim # zero padding for ys y_true = F.pad_sequence(ys, padding=-1) # batch x olen # get length info input_length = chainer.Variable(self.xp.array(ilens, dtype=np.int32)) label_length = chainer.Variable(self.xp.array(olens, dtype=np.int32)) logging.info(self.__class__.__name__ + ' input lengths: ' + str(input_length.data)) logging.info(self.__class__.__name__ + ' output lengths: ' + str(label_length.data)) # get ctc loss self.loss = F.connectionist_temporal_classification( y_hat, y_true, 0, input_length, label_length) logging.info('ctc loss:' + str(self.loss.data)) return self.loss
def ctc_loss(self, ys, lable_batch): lables = lable_batch (out_ys, input_length) = ys label_length = [len(l) for l in lables] label_length = self.xp.asarray(label_length, dtype=self.xp.int32) input_length = self.xp.asarray(input_length, dtype=self.xp.int32) word_lables = concat_examples(lables, self.device, padding=self.blank) word_loss = F.connectionist_temporal_classification( out_ys, word_lables, self.blank, input_length, label_length) # #confidence penalty # out_ys=F.stack(out_ys,axis=1) # out_ys=[F.softmax(out[:l]) for l,out in zip(input_length,out_ys)] # entropy=-sum([F.sum(out*F.log(out+1e-10)) for out in out_ys])/200 # scale=0.2 # word_loss=word_los-scale*entropy # if char_ys is not None: # char_lables = concat_examples(char_lable, self.device, padding=self.blank) # char_loss = F.connectionist_temporal_classification(char_ys, char_lables, self.blank, input_length, char_label_length) print(word_loss) return word_loss
def test_ctc_loss(): pytest.importorskip("torch") pytest.importorskip("warpctc_pytorch") import torch from warpctc_pytorch import CTCLoss from e2e_asr_attctc_th import pad_list n_out = 7 n_batch = 3 input_length = numpy.array([11, 17, 15], dtype=numpy.int32) label_length = numpy.array([4, 2, 3], dtype=numpy.int32) np_pred = [numpy.random.rand(il, n_out).astype( numpy.float32) for il in input_length] np_target = [numpy.random.randint( 0, n_out, size=ol, dtype=numpy.int32) for ol in label_length] # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr_attctc.py ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2) ch_target = F.pad_sequence(np_target, padding=-1) ch_loss = F.connectionist_temporal_classification( ch_pred, ch_target, 0, input_length, label_length).data th_pred = pad_list([torch.autograd.Variable(torch.from_numpy(x)) for x in np_pred]).transpose(0, 1) th_target = torch.autograd.Variable( torch.from_numpy(numpy.concatenate(np_target))) th_ilen = torch.autograd.Variable(torch.from_numpy(input_length)) th_olen = torch.autograd.Variable(torch.from_numpy(label_length)) # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does th_loss = (CTCLoss()(th_pred, th_target, th_ilen, th_olen) / n_batch).data.numpy()[0] numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
def ctc_loss(self, ys, lable_batch): (word_label, char_lable) = lable_batch (word_ys, input_length) = ys word_label_length = [len(l) for l in word_label] word_label_length = self.xp.asarray(word_label_length, dtype=self.xp.int32) # char_label_length=[len(l) for l in char_lable] # char_label_length=self.xp.asarray(char_label_length,dtype=self.xp.int32) input_length = self.xp.asarray(input_length, dtype=self.xp.int32) word_lables = concat_examples(word_label, self.device, padding=self.blank) word_loss = F.connectionist_temporal_classification( word_ys, word_lables, self.blank, input_length, word_label_length) # if char_ys is not None: # char_lables = concat_examples(char_lable, self.device, padding=self.blank) # char_loss = F.connectionist_temporal_classification(char_ys, char_lables, self.blank, input_length, char_label_length) print(word_loss) return word_loss
def check_forward(self, t_data, xs_data): x = tuple(chainer.Variable(x_data) for x_data in xs_data) t = chainer.Variable(t_data) loss = functions.connectionist_temporal_classification(x, t, 2) loss_value = float(loss.data) # compute expected value by recursive computation. xp = cuda.get_array_module(self.x) xt = xp.swapaxes(self.x, 0, 1) for b in range(xt.shape[0]): for t in range(xt.shape[1]): xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t])) loss_expect = 0 batch_size = xt.shape[0] for b in range(batch_size): loss_expect += -math.log(self.alpha(xt[b], self.l[b], self.x.shape[0]-1, self.l[b].shape[0]-1) + self.alpha(xt[b], self.l[b], self.x.shape[0]-1, self.l[b].shape[0]-2)) loss_expect /= batch_size self.assertAlmostEqual(loss_expect, loss_value, places=5)
def f(input_length, label_length, t, *x): return functions.connectionist_temporal_classification( x, t, self.blank_symbol, x_length, l_length, reduce=self.reduce)
def run_ctc(): x = Variable(x_data) x = x[:, :vocab_size_ctc] x = functions.swapaxes(x, 1, 3) x = functions.reshape(x, (batchsize, -1)) x = functions.split_axis(x, seq_length, axis=1) x_length = Variable(xp.asarray([seq_length, seq_length // 2], dtype=xp.int32)) # 入力系列長は適当 loss_ctc = functions.connectionist_temporal_classification(x, label_unigram, blank_symbol, x_length, Variable(length_unigram), reduce="mean") loss_ctc.backward()
def check_backward(self, t_data, xs_data): xs = tuple(chainer.Variable(x_data) for x_data in xs_data) t = chainer.Variable(t_data) loss = functions.connectionist_temporal_classification(xs, t, 2) loss.grad = self.g loss.backward() func = loss.creator xs_data = tuple(x.data for x in xs) f = lambda: func.forward((t.data,) + xs_data) gl_0, gx_0, gx_1, gx_2, gx_3 = gradient_check.numerical_grad( f, ((t.data,) + xs_data), (self.gx,)) gradient_check.assert_allclose(xs[0].grad, gx_0, atol=1e-04) gradient_check.assert_allclose(xs[1].grad, gx_1, atol=1e-04) gradient_check.assert_allclose(xs[2].grad, gx_2, atol=1e-04) gradient_check.assert_allclose(xs[3].grad, gx_3, atol=1e-04)
def test_ctc_loss(in_length, out_length, use_warpctc): pytest.importorskip("torch") if use_warpctc: pytest.importorskip("warpctc_pytorch") import warpctc_pytorch torch_ctcloss = warpctc_pytorch.CTCLoss(size_average=True) else: if LooseVersion(torch.__version__) < LooseVersion("1.0"): pytest.skip("pytorch < 1.0 doesn't support CTCLoss") _ctcloss_sum = torch.nn.CTCLoss(reduction="sum") def torch_ctcloss(th_pred, th_target, th_ilen, th_olen): th_pred = th_pred.log_softmax(2) loss = _ctcloss_sum(th_pred, th_target, th_ilen, th_olen) # Batch-size average loss = loss / th_pred.size(1) return loss n_out = 7 input_length = numpy.array(in_length, dtype=numpy.int32) label_length = numpy.array(out_length, dtype=numpy.int32) np_pred = [ numpy.random.rand(il, n_out).astype(numpy.float32) for il in input_length ] np_target = [ numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32) for ol in label_length ] # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2) ch_target = F.pad_sequence(np_target, padding=-1) ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0, input_length, label_length).data th_pred = pad_list([torch.from_numpy(x) for x in np_pred], 0.0).transpose(0, 1) th_target = torch.from_numpy(numpy.concatenate(np_target)) th_ilen = torch.from_numpy(input_length) th_olen = torch.from_numpy(label_length) th_loss = torch_ctcloss(th_pred, th_target, th_ilen, th_olen).numpy() numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
def __call__(self, x, phonemes, lengths): y = self.forward(x) # The input of ctc must be list or tuple. ys = [y[:, :, i] for i in range(y.shape[2])] # The input label of ctc must be variable or array. phonemes = F.pad_sequence(phonemes, padding=self.n_category - 1) nll = F.connectionist_temporal_classification( ys, phonemes, blank_symbol=self.n_category - 1, label_length=lengths) likelihood = F.exp(-nll) chainer.reporter.report({'nll': nll, 'likelihood': likelihood}, self) return nll
def check_backward(self, t_data, xs_data, l_length, x_length, grad, gx): xs = tuple(chainer.Variable(x_data) for x_data in xs_data) t = chainer.Variable(t_data) loss = functions.connectionist_temporal_classification( xs, t, 2, input_length=chainer.Variable(x_length), label_length=chainer.Variable(l_length) ) loss.grad = grad loss.backward() func = loss.creator xs_data = tuple(x.data for x in xs) f = lambda: func.forward((x_length, l_length, t.data) + xs_data) gx_0, gx_1, gx_2, gx_3 = gradient_check.numerical_grad(f, (xs_data), (gx,)) gradient_check.assert_allclose(xs[0].grad, gx_0, atol=1e-04) gradient_check.assert_allclose(xs[1].grad, gx_1, atol=1e-04) gradient_check.assert_allclose(xs[2].grad, gx_2, atol=1e-04) gradient_check.assert_allclose(xs[3].grad, gx_3, atol=1e-04)
def calc_actual_loss(self, predictions, grid, labels): predictions = F.separate(predictions, axis=0) return F.connectionist_temporal_classification(predictions, labels, blank_symbol=self.blank_symbol)
def test_not_iterable(self): x = chainer.Variable(numpy.zeros((4, 2, 3), numpy.float32)) t = chainer.Variable(numpy.zeros((2, 2), numpy.int32)) with self.assertRaises(ValueError): functions.connectionist_temporal_classification( tuple(x), t, 0, reduce='invalid_option')
def test_not_iterable(self): x = chainer.Variable(numpy.zeros((4, 2, 3), numpy.float32)) t = chainer.Variable(numpy.zeros((2, 2), numpy.int32)) with self.assertRaises(TypeError): functions.connectionist_temporal_classification(x, t, 0)
def loss_function(predict, label): return F.connectionist_temporal_classification(predict, label, 0)
def test_volatile(self): xs_data = numpy.random.uniform(-1, 1, (4, 2, 3)).astype(numpy.float32) t_data = numpy.array([[0, 1], [1, 0]]).astype(numpy.int32) x = [chainer.Variable(x_data, volatile=True) for x_data in xs_data] t = chainer.Variable(t_data, volatile=True) functions.connectionist_temporal_classification(x, t, 2)
def calc_actual_loss(self, predictions, grid, labels): loss = F.connectionist_temporal_classification(predictions, labels, self.blank_symbol) return loss
sum_accuracy = 0 sum_loss = 0 for i in range(0, N, batchsize): x = img_train[perm[i:i + batchsize]] y = label_train[perm[i:i + batchsize]] padded_y = np.zeros((batchsize, max([len(t) for t in y]))) for index, item in enumerate(y): padded_y[index, :len(item)] = item x = Variable(xp.asarray(x).astype(xp.float32)) output = model(x) print(output[0].shape) print(output[0][0]) model.cleargrads() loss = F.connectionist_temporal_classification( output, xp.asarray(padded_y).astype(xp.int32), 0, xp.full((len(y), ), 63, dtype=xp.int32), xp.asarray([len(t) for t in y]).astype(xp.int32)) loss.backward() optimizer.update() print(loss.data) """ print('train mean loss={}, accuracy={}'.format( sum_loss / N, sum_accuracy / N)) # evaluation with configuration.using_config('train', False): sum_accuracy = 0 sum_loss = 0 for i in range(0, N_test, batchsize): x = x_test[i:i + batchsize]
def main(): model = Model(args.vocab_size, args.ndim_embedding, args.num_layers, args.ndim_h) if args.gpu_device >= 0: chainer.cuda.get_device(args.gpu_device).use() model.to_gpu() train_data, train_labels = generate_data() total_loop = int(math.ceil(len(train_data) / args.batchsize)) train_indices = np.arange(len(train_data), dtype=int) xp = model.xp x_length_batch = xp.full((args.batchsize,), args.sequence_length, dtype=xp.int32) t_length_batch = xp.full((args.batchsize,), args.true_sequence_length, dtype=xp.int32) # optimizer optimizer = optimizers.Adam(args.learning_rate, 0.9) optimizer.setup(model) for epoch in xrange(1, args.total_epoch + 1): # train loop sum_loss = 0 with chainer.using_config("train", True): for itr in xrange(1, total_loop + 1): # sample minibatch np.random.shuffle(train_indices) x_batch = train_data[train_indices[:args.batchsize]] t_batch = train_labels[train_indices[:args.batchsize]] # GPU if xp is cupy: x_batch = cuda.to_gpu(x_batch.astype(xp.int32)) t_batch = cuda.to_gpu(t_batch.astype(xp.int32)) # forward model.reset_state() y_batch = model(x_batch) # list of variables # compute loss loss = F.connectionist_temporal_classification(y_batch, t_batch, BLANK, x_length_batch, t_length_batch) optimizer.update(lossfun=lambda: loss) sum_loss += float(loss.data) # evaluate with chainer.using_config("train", False): # sample minibatch np.random.shuffle(train_indices) x_batch = train_data[train_indices[:args.batchsize]] t_batch = train_labels[train_indices[:args.batchsize]] # GPU if xp is cupy: x_batch = cuda.to_gpu(x_batch.astype(xp.int32)) t_batch = cuda.to_gpu(t_batch.astype(xp.int32)) # forward model.reset_state() y_batch = model(x_batch, split_into_variables=False) y_batch = xp.argmax(y_batch.data, axis=2) average_error = 0 for input_sequence, argmax_sequence, true_sequence in zip(x_batch, y_batch, t_batch): pred_seqence = [] for token in argmax_sequence: if token == BLANK: continue pred_seqence.append(int(token)) print("true:", true_sequence, "pred:", pred_seqence) error = compute_character_error_rate(true_sequence.tolist(), pred_seqence) average_error += error print("CER: {} - loss: {} - lr: {:.4e}".format(int(average_error / args.batchsize * 100), sum_loss / total_loop, optimizer.alpha))
def test_ctc_loss(in_length, out_length, ctc_type): pytest.importorskip("torch") if ctc_type == "warpctc": pytest.importorskip("warpctc_pytorch") import warpctc_pytorch torch_ctcloss = warpctc_pytorch.CTCLoss(size_average=True) elif ctc_type == "builtin" or ctc_type == "cudnnctc": if LooseVersion(torch.__version__) < LooseVersion("1.0"): pytest.skip("pytorch < 1.0 doesn't support CTCLoss") _ctcloss_sum = torch.nn.CTCLoss(reduction="sum") def torch_ctcloss(th_pred, th_target, th_ilen, th_olen): th_pred = th_pred.log_softmax(2) loss = _ctcloss_sum(th_pred, th_target, th_ilen, th_olen) # Batch-size average loss = loss / th_pred.size(1) return loss elif ctc_type == "gtnctc": pytest.importorskip("gtn") from espnet.nets.pytorch_backend.gtn_ctc import GTNCTCLossFunction _ctcloss_sum = GTNCTCLossFunction.apply def torch_ctcloss(th_pred, th_target, th_ilen, th_olen): targets = [t.tolist() for t in th_target] log_probs = torch.nn.functional.log_softmax(th_pred, dim=2) loss = _ctcloss_sum(log_probs, targets, th_ilen, 0, "none") return loss n_out = 7 input_length = numpy.array(in_length, dtype=numpy.int32) label_length = numpy.array(out_length, dtype=numpy.int32) np_pred = [ numpy.random.rand(il, n_out).astype(numpy.float32) for il in input_length ] np_target = [ numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32) for ol in label_length ] # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2) ch_target = F.pad_sequence(np_target, padding=-1) ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0, input_length, label_length).data th_pred = pad_list([torch.from_numpy(x) for x in np_pred], 0.0).transpose(0, 1) if ctc_type == "gtnctc": # gtn implementation expects targets as list th_target = np_target # keep as B x T x H for gtn th_pred = th_pred.transpose(0, 1) else: th_target = torch.from_numpy(numpy.concatenate(np_target)) th_ilen = torch.from_numpy(input_length) th_olen = torch.from_numpy(label_length) th_loss = torch_ctcloss(th_pred, th_target, th_ilen, th_olen).numpy() numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)