def test_sortagrad_trainable_with_batch_bins(module): args = make_arg(sortagrad=1) idim = 10 odim = 5 dummy_json = make_dummy_json(2, [3, 5], [3, 5], idim=idim, odim=odim) if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: import espnet.nets.chainer_backend.e2e_asr as m batch_elems = 2000 batchset = make_batchset(dummy_json, batch_bins=batch_elems, shortest_first=True) for batch in batchset: n = 0 for uttid, info in batch: ilen = int(info["input"][0]["shape"][0]) olen = int(info["output"][0]["shape"][0]) n += ilen * idim + olen * odim assert olen < batch_elems model = m.E2E(idim, odim, args) for batch in batchset: loss = model(*convert_batch(batch, module, idim=idim, odim=odim)) if isinstance(loss, tuple): # chainer return several values as tuple loss[0].backward() # trainable else: loss.backward() # trainable with torch.no_grad(), chainer.no_backprop_mode(): in_data = np.random.randn(10, idim) model.recognize(in_data, args, args.char_list)
def test_sortagrad(swap_io): dummy_json = make_dummy_json(128, [1, 700], [1, 700]) if swap_io: batchset = make_batchset( dummy_json, 16, 2**10, 2**10, batch_sort_key="input", shortest_first=True, swap_io=True, ) key = "output" else: batchset = make_batchset(dummy_json, 16, 2**10, 2**10, shortest_first=True) key = "input" prev_start_ilen = batchset[0][0][1][key][0]["shape"][0] for batch in batchset: cur_start_ilen = batch[0][1][key][0]["shape"][0] assert cur_start_ilen >= prev_start_ilen prev_ilen = cur_start_ilen for sample in batch: cur_ilen = sample[1][key][0]["shape"][0] assert cur_ilen <= prev_ilen prev_ilen = cur_ilen prev_start_ilen = cur_start_ilen
def test_sortagrad_trainable_with_batch_frames(module): args = make_arg(sortagrad=1) idim = 20 odim = 5 dummy_json = make_dummy_json(4, [10, 20], [10, 20], idim=idim, odim=odim) if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: import espnet.nets.chainer_backend.e2e_asr as m batch_frames_in = 50 batch_frames_out = 50 batchset = make_batchset(dummy_json, batch_frames_in=batch_frames_in, batch_frames_out=batch_frames_out, shortest_first=True) for batch in batchset: i = 0 o = 0 for uttid, info in batch: i += int(info['input'][0]['shape'][0]) o += int(info['output'][0]['shape'][0]) assert i <= batch_frames_in assert o <= batch_frames_out model = m.E2E(20, 5, args) for batch in batchset: loss = model(*convert_batch(batch, module, idim=20, odim=5)) if isinstance(loss, tuple): # chainer return several values as tuple loss[0].backward() # trainable else: loss.backward() # trainable with torch.no_grad(), chainer.no_backprop_mode(): in_data = np.random.randn(100, 20) model.recognize(in_data, args, args.char_list)
def test_sortagrad(utils): dummy_json = make_dummy_json(128, [1, 700], [1, 700]) if 'tts' in str(utils): batchset = utils.make_batchset(dummy_json, 16, 2**10, 2**10, batch_sort_key="input", shortest_first=True) key = 'output' else: batchset = utils.make_batchset(dummy_json, 16, 2**10, 2**10, shortest_first=True) key = 'input' prev_start_ilen = batchset[0][0][1][key][0]['shape'][0] for batch in batchset: cur_start_ilen = batch[0][1][key][0]['shape'][0] assert cur_start_ilen >= prev_start_ilen prev_ilen = cur_start_ilen for sample in batch: cur_ilen = sample[1][key][0]['shape'][0] assert cur_ilen <= prev_ilen prev_ilen = cur_ilen prev_start_ilen = cur_start_ilen
def test_sortagrad_trainable_with_batch_frames(module, num_encs): args = make_arg(num_encs=num_encs, sortagrad=1) idim = 2 odim = 2 dummy_json = make_dummy_json(4, [2, 3], [2, 3], idim=idim, odim=odim, num_inputs=num_encs) import espnet.nets.pytorch_backend.e2e_asr_mulenc as m batch_frames_in = 50 batch_frames_out = 50 batchset = make_batchset( dummy_json, batch_frames_in=batch_frames_in, batch_frames_out=batch_frames_out, shortest_first=True, ) for batch in batchset: i = 0 o = 0 for uttid, info in batch: i += int(info["input"][0]["shape"][0]) # based on the first input o += int(info["output"][0]["shape"][0]) assert i <= batch_frames_in assert o <= batch_frames_out model = m.E2E([2 for _ in range(num_encs)], 2, args) for batch in batchset: loss = model( *convert_batch(batch, module, idim=2, odim=2, num_inputs=num_encs)) loss.backward() # trainable with torch.no_grad(): in_data = [np.random.randn(100, 2) for _ in range(num_encs)] model.recognize(in_data, args, args.char_list)
def test_sortagrad_trainable_with_batch_bins(module): args = make_arg(sortagrad=1) idim = 20 odim = 5 dummy_json = make_dummy_json(8, [100, 200], [100, 200], idim=idim, odim=odim) if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: import espnet.nets.chainer_backend.e2e_asr as m batch_elems = 20000 batchset = make_batchset(dummy_json, batch_bins=batch_elems, shortest_first=True) for batch in batchset: n = 0 for uttid, info in batch: ilen = int(info['input'][0]['shape'][0]) olen = int(info['output'][0]['shape'][0]) n += ilen * idim + olen * odim assert olen < batch_elems model = m.E2E(20, 5, args) for batch in batchset: attn_loss = model(*convert_batch(batch, module, idim=20, odim=5))[0] attn_loss.backward() with torch.no_grad(), chainer.no_backprop_mode(): in_data = np.random.randn(100, 20) model.recognize(in_data, args, args.char_list)
def test_sortagrad_trainable_with_batch_bins(module, num_encs): args = make_arg(num_encs=num_encs, sortagrad=1) idim = 20 odim = 5 dummy_json = make_dummy_json(4, [10, 20], [10, 20], idim=idim, odim=odim, num_inputs=num_encs) import espnet.nets.pytorch_backend.e2e_asr_mulenc as m batch_elems = 2000 batchset = make_batchset(dummy_json, batch_bins=batch_elems, shortest_first=True) for batch in batchset: n = 0 for uttid, info in batch: ilen = int( info['input'][0]['shape'][0]) # based on the first input olen = int(info['output'][0]['shape'][0]) n += ilen * idim + olen * odim assert olen < batch_elems model = m.E2E([20 for _ in range(num_encs)], 5, args) for batch in batchset: loss = model(*convert_batch( batch, module, idim=20, odim=5, num_inputs=num_encs)) loss.backward() # trainable with torch.no_grad(), chainer.no_backprop_mode(): in_data = [np.random.randn(100, 20) for _ in range(num_encs)] model.recognize(in_data, args, args.char_list)
def test_model_trainable_and_decodable(module, num_encs, model_dict): args = make_arg(num_encs=num_encs, **model_dict) batch = prepare_inputs("pytorch", num_encs) # test trainable m = importlib.import_module(module) model = m.E2E([40 for _ in range(num_encs)], 5, args) loss = model(*batch) loss.backward() # trainable # test attention plot dummy_json = make_dummy_json(num_encs, [10, 20], [10, 20], idim=40, odim=5, num_inputs=num_encs) batchset = make_batchset(dummy_json, 2, 2 ** 10, 2 ** 10, shortest_first=True) att_ws = model.calculate_all_attentions(*convert_batch( batchset[0], "pytorch", idim=40, odim=5, num_inputs=num_encs)) from espnet.asr.asr_utils import PlotAttentionReport tmpdir = tempfile.mkdtemp() plot = PlotAttentionReport(model.calculate_all_attentions, batchset[0], tmpdir, None, None, None) for i in range(num_encs): # att-encoder att_w = plot.get_attention_weight(0, att_ws[i][0]) plot._plot_and_save_attention(att_w, '{}/att{}.png'.format(tmpdir, i)) # han att_w = plot.get_attention_weight(0, att_ws[num_encs][0]) plot._plot_and_save_attention(att_w, '{}/han.png'.format(tmpdir), han_mode=True) # test decodable with torch.no_grad(), chainer.no_backprop_mode(): in_data = [np.random.randn(10, 40) for _ in range(num_encs)] model.recognize(in_data, args, args.char_list) # decodable if "pytorch" in module: batch_in_data = [[np.random.randn(10, 40), np.random.randn(5, 40)] for _ in range(num_encs)] model.recognize_batch(batch_in_data, args, args.char_list) # batch decodable
def test_context_residual(module): args = make_arg(context_residual=True) dummy_json = make_dummy_json(8, [1, 100], [1, 100], idim=20, odim=5) if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: raise NotImplementedError batchset = make_batchset(dummy_json, 2, 2**10, 2**10, shortest_first=True) model = m.E2E(20, 5, args) for batch in batchset: attn_loss = model(*convert_batch(batch, module, idim=20, odim=5))[0] attn_loss.backward() with torch.no_grad(), chainer.no_backprop_mode(): in_data = np.random.randn(50, 20) model.recognize(in_data, args, args.char_list)
def test_sortagrad_trainable(module, num_encs): args = make_arg(num_encs=num_encs, sortagrad=1) dummy_json = make_dummy_json(6, [10, 20], [10, 20], idim=20, odim=5, num_inputs=num_encs) import espnet.nets.pytorch_backend.e2e_asr_mulenc as m batchset = make_batchset(dummy_json, 2, 2 ** 10, 2 ** 10, shortest_first=True) model = m.E2E([20 for _ in range(num_encs)], 5, args) num_utts = 0 for batch in batchset: num_utts += len(batch) loss = model(*convert_batch(batch, module, idim=20, odim=5, num_inputs=num_encs)) loss.backward() # trainable assert num_utts == 6 with torch.no_grad(), chainer.no_backprop_mode(): in_data = [np.random.randn(50, 20) for _ in range(num_encs)] model.recognize(in_data, args, args.char_list)
def test_sortagrad_trainable(module): args = make_arg(sortagrad=1) dummy_json = make_dummy_json(8, [1, 700], [1, 700], idim=20, odim=5) from espnet.asr.asr_utils import make_batchset if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: import espnet.nets.chainer_backend.e2e_asr as m batchset = make_batchset(dummy_json, 2, 2**10, 2**10, shortest_first=True) model = m.E2E(20, 5, args) for batch in batchset: attn_loss = model(*convert_batch(batch, module, idim=20, odim=5))[0] attn_loss.backward() with torch.no_grad(), chainer.no_backprop_mode(): in_data = np.random.randn(100, 20) model.recognize(in_data, args, args.char_list)
def test_gradient_noise_injection(module, num_encs): args = make_arg(num_encs=num_encs, grad_noise=True) args_org = make_arg(num_encs=num_encs) dummy_json = make_dummy_json(num_encs, [10, 20], [10, 20], idim=20, odim=5, num_inputs=num_encs) import espnet.nets.pytorch_backend.e2e_asr_mulenc as m batchset = make_batchset(dummy_json, 2, 2 ** 10, 2 ** 10, shortest_first=True) model = m.E2E([20 for _ in range(num_encs)], 5, args) model_org = m.E2E([20 for _ in range(num_encs)], 5, args_org) for batch in batchset: loss = model(*convert_batch(batch, module, idim=20, odim=5, num_inputs=num_encs)) loss_org = model_org(*convert_batch(batch, module, idim=20, odim=5, num_inputs=num_encs)) loss.backward() grad = [param.grad for param in model.parameters()][10] loss_org.backward() grad_org = [param.grad for param in model_org.parameters()][10] assert grad[0] != grad_org[0]
def test_make_batchset(self): dummy_json = make_dummy_json(128, [128, 512], [16, 128]) for task in espnet_utils.TASK_SET: # check w/o adaptive batch size batchset = espnet_utils.make_batchset(task, data=dummy_json, batch_size=24, max_length_in=2**10, max_length_out=2**10, min_batch_size=1) self.assertEqual(sum([len(batch) >= 1 for batch in batchset]), len(batchset)) logging.info('batch: {}'.format( ([len(batch) for batch in batchset]))) batchset = espnet_utils.make_batchset(task, dummy_json, 24, 2**10, 2**10, min_batch_size=10) self.assertEqual(sum([len(batch) >= 10 for batch in batchset]), len(batchset)) logging.info('batch: {}'.format( ([len(batch) for batch in batchset]))) # check w/ adaptive batch size batchset = espnet_utils.make_batchset(task, dummy_json, 24, 256, 64, min_batch_size=10) self.assertEqual(sum([len(batch) >= 10 for batch in batchset]), len(batchset)) logging.info('batch: {}'.format( ([len(batch) for batch in batchset]))) batchset = espnet_utils.make_batchset(task, dummy_json, 24, 256, 64, min_batch_size=10) self.assertEqual(sum([len(batch) >= 10 for batch in batchset]), len(batchset))
def test_gradient_noise_injection(module): args = make_arg(grad_noise=True) args_org = make_arg() dummy_json = make_dummy_json(2, [3, 4], [3, 4], idim=10, odim=5) if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: import espnet.nets.chainer_backend.e2e_asr as m batchset = make_batchset(dummy_json, 2, 2**10, 2**10, shortest_first=True) model = m.E2E(10, 5, args) model_org = m.E2E(10, 5, args_org) for batch in batchset: loss = model(*convert_batch(batch, module, idim=10, odim=5)) loss_org = model_org(*convert_batch(batch, module, idim=10, odim=5)) loss.backward() grad = [param.grad for param in model.parameters()][10] loss_org.backward() grad_org = [param.grad for param in model_org.parameters()][10] assert grad[0] != grad_org[0]
def test_sortagrad_trainable(module): args = make_arg(sortagrad=1) dummy_json = make_dummy_json(4, [10, 20], [10, 20], idim=20, odim=5) if module == "pytorch": import espnet.nets.pytorch_backend.e2e_asr as m else: import espnet.nets.chainer_backend.e2e_asr as m batchset = make_batchset(dummy_json, 2, 2 ** 10, 2 ** 10, shortest_first=True) model = m.E2E(20, 5, args) for batch in batchset: loss = model(*convert_batch(batch, module, idim=20, odim=5)) if isinstance(loss, tuple): # chainer return several values as tuple loss[0].backward() # trainable else: loss.backward() # trainable with torch.no_grad(), chainer.no_backprop_mode(): in_data = np.random.randn(50, 20) model.recognize(in_data, args, args.char_list)
def test_make_batchset(swap_io): dummy_json = make_dummy_json(128, [128, 512], [16, 128]) # check w/o adaptive batch size batchset = make_batchset(dummy_json, 24, 2 ** 10, 2 ** 10, min_batch_size=1, swap_io=swap_io) assert sum([len(batch) >= 1 for batch in batchset]) == len(batchset) print([len(batch) for batch in batchset]) batchset = make_batchset(dummy_json, 24, 2 ** 10, 2 ** 10, min_batch_size=10, swap_io=swap_io) assert sum([len(batch) >= 10 for batch in batchset]) == len(batchset) print([len(batch) for batch in batchset]) # check w/ adaptive batch size batchset = make_batchset(dummy_json, 24, 256, 64, min_batch_size=10, swap_io=swap_io) assert sum([len(batch) >= 10 for batch in batchset]) == len(batchset) print([len(batch) for batch in batchset]) batchset = make_batchset(dummy_json, 24, 256, 64, min_batch_size=10, swap_io=swap_io) assert sum([len(batch) >= 10 for batch in batchset]) == len(batchset)
def test_calculate_plot_attention_ctc(module, num_encs, model_dict): args = make_arg(num_encs=num_encs, **model_dict) m = importlib.import_module(module) model = m.E2E([2 for _ in range(num_encs)], 2, args) # test attention plot dummy_json = make_dummy_json(num_encs, [2, 3], [2, 3], idim=2, odim=2, num_inputs=num_encs) batchset = make_batchset(dummy_json, 2, 2**10, 2**10, shortest_first=True) att_ws = model.calculate_all_attentions(*convert_batch( batchset[0], "pytorch", idim=2, odim=2, num_inputs=num_encs)) from espnet.asr.asr_utils import PlotAttentionReport tmpdir = tempfile.mkdtemp() plot = PlotAttentionReport(model.calculate_all_attentions, batchset[0], tmpdir, None, None, None) for i in range(num_encs): # att-encoder att_w = plot.trim_attention_weight("utt_%d" % 0, att_ws[i][0]) plot._plot_and_save_attention(att_w, "{}/att{}.png".format(tmpdir, i)) # han att_w = plot.trim_attention_weight("utt_%d" % 0, att_ws[num_encs][0]) plot._plot_and_save_attention(att_w, "{}/han.png".format(tmpdir), han_mode=True) # test CTC plot ctc_probs = model.calculate_all_ctc_probs(*convert_batch( batchset[0], "pytorch", idim=2, odim=2, num_inputs=num_encs)) from espnet.asr.asr_utils import PlotCTCReport tmpdir = tempfile.mkdtemp() plot = PlotCTCReport(model.calculate_all_ctc_probs, batchset[0], tmpdir, None, None, None) if args.mtlalpha > 0: for i in range(num_encs): # ctc-encoder plot._plot_and_save_ctc(ctc_probs[i][0], "{}/ctc{}.png".format(tmpdir, i))
def test_sortagrad(self): dummy_json = make_dummy_json(128, [1, 700], [1, 700]) for task in espnet_utils.TASK_SET: if task == 'tts': batchset = espnet_utils.make_batchset(task, dummy_json, 16, 2**10, 2**10, batch_sort_key="input", shortest_first=True) key = 'output' elif task == 'asr': batchset = espnet_utils.make_batchset(task, dummy_json, 16, 2**10, 2**10, batch_sort_key='input', shortest_first=True) key = 'input' prev_start_ilen = batchset[0][0][1][key][0]['shape'][0] for batch in batchset: # short to long cur_start_ilen = batch[0][1][key][0]['shape'][0] self.assertGreaterEqual(cur_start_ilen, prev_start_ilen) prev_ilen = cur_start_ilen for sample in batch: cur_ilen = sample[1][key][0]['shape'][0] # long to short in minibatch self.assertLessEqual(cur_ilen, prev_ilen) prev_ilen = cur_ilen prev_start_ilen = cur_start_ilen