def test_from_base_matrix(self): num_rows = 5 num_cols = 6 m = kaldi.FloatMatrix(row=num_rows, col=num_cols) mg = kaldi.GeneralMatrix(m) mi = kaldi.FloatMatrix() mg.GetMatrix(mi) self.assertEqual(mi.NumRows(), num_rows) self.assertEqual(mi.NumCols(), num_cols) for r in range(num_rows): for c in range(num_cols): self.assertEqual(mi[r, c], 0)
def test_matrix_reader_writer(self): kp_matrix = kaldi.FloatMatrix(2, 3) wspecifier = 'ark,t:test.ark' rspecifier = 'ark:test.ark' matrix_writer = kaldi.MatrixWriter(wspecifier) kp_matrix[0, 0] = 10 matrix_writer.Write('id_1', kp_matrix) matrix_writer.Close() matrix_reader = kaldi.SequentialMatrixReader(rspecifier) key = matrix_reader.Key() self.assertEqual(key, 'id_1') value = matrix_reader.Value() gold = np.array([[10, 0, 0], [0, 0, 0]]) np.testing.assert_array_equal(value.numpy(), gold) matrix_reader.Close() # test with context manager kp_matrix[0, 0] = 20 with kaldi.MatrixWriter(wspecifier) as writer: writer.Write('id_2', kp_matrix) with kaldi.SequentialMatrixReader(rspecifier) as reader: key = reader.Key() self.assertEqual(key, 'id_2') value = reader.Value() gold = np.array([[20, 0, 0], [0, 0, 0]]) np.testing.assert_array_equal(value.numpy(), gold) os.remove('test.ark')
def test_mfcc(self): mfcc = feat.Mfcc(feat.MfccOptions()) reader = SequentialWaveReader('ark:wav.ark') # gold set is feature extracted using featbin/compute-mfcc-feats gold_reader = SequentialMatrixReader('ark:feat.ark') for key, value in reader: print('Validate utterance: {}'.format(key)) self.assertEqual(value.SampFreq(), 16000) wave_data = value.Data() nd = wave_data.numpy() nsamp = wave_data.NumCols() self.assertAlmostEqual(nsamp, value.Duration() * value.SampFreq(), places=1) waveform = kaldi.FloatSubVector(nd.reshape(nsamp)) features = kaldi.FloatMatrix(1, 1) mfcc.ComputeFeatures(waveform, value.SampFreq(), 1.0, features) self.assertEqual(key, gold_reader.Key()) gold_feat = gold_reader.Value().numpy() np.testing.assert_almost_equal(features.numpy(), gold_feat, decimal=3) gold_reader.Next()
def test_from_float_matrix(self): num_rows = 2 num_cols = 3 m = kaldi.FloatMatrix(num_rows, num_cols) cm = kaldi.CompressedMatrix(m) self.assertEqual(cm.NumRows(), num_rows) self.assertEqual(cm.NumCols(), num_cols)
def test_float_matrix(self): # test FloatMatrix kp_matrix = kaldi.FloatMatrix(4, 5) kp_matrix[2, 3] = 2.0 gold = np.array([ [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 2, 0], [0, 0, 0, 0, 0], ]) np.testing.assert_array_equal(kp_matrix.numpy(), gold)
def test_matrix_random_access_reader(self): kp_matrix = kaldi.FloatMatrix(2, 3) wspecifier = 'ark,t:test.ark' rspecifier = 'ark:test.ark' matrix_writer = kaldi.MatrixWriter(wspecifier) matrix_writer.Write('id_1', kp_matrix) matrix_writer.Close() reader = kaldi.RandomAccessMatrixReader(rspecifier) gold = np.array([[0, 0, 0], [0, 0, 0]]) self.assertTrue('id_1' in reader) np.testing.assert_array_equal(reader['id_1'].numpy(), gold) self.assertFalse('id_2' in reader) reader.Close() os.remove('test.ark')
def test_cu_matrix_to_pytorch_cpu_tensor(self): if kaldi.CudaCompiled(): print('This test is for constructing a CPU tensor from a CuMatrix') print('Kaldi is compiled with GPU, skip it') return num_rows = 1 num_cols = 2 cpu_m = kaldi.FloatMatrix(row=num_rows, col=num_cols) cpu_m[0, 0] = 1 cpu_m[0, 1] = 2 m = kaldi.FloatCuMatrix(cpu_m) self.assertEqual(m[0, 0], 1) self.assertEqual(m[0, 1], 2) m_reference_count = sys.getrefcount(m) # memory is shared between `m` and `tensor` tensor = from_dlpack(m.to_dlpack()) self.assertEqual(m_reference_count + 1, sys.getrefcount(m)) self.assertTrue(tensor.is_cuda == False) self.assertTrue(tensor[0, 0], 1) self.assertTrue(tensor[0, 1], 2) tensor[0, 0] = 6 # also changes `m` tensor[0, 1] = 8 self.assertEqual(m[0, 0], 6) self.assertEqual(m[0, 1], 8) m.Add(2) # also changes `tensor` self.assertTrue(tensor[-1, 0], 8) self.assertTrue(tensor[0, 1], 10) del tensor gc.collect() self.assertEqual(m_reference_count, sys.getrefcount(m)) self.assertEqual(m[0, 0], 8) # `m` is still alive self.assertEqual(m[0, 1], 10)
def test_matrix_to_pytorch_cpu_tensor(self): num_rows = 1 num_cols = 2 m = kaldi.FloatMatrix(row=num_rows, col=num_cols) m[0, 0] = 10 m[0, 1] = 20 m_reference_count = sys.getrefcount(m) # memory is shared between `tensor` and `m` tensor = from_dlpack(m.to_dlpack()) self.assertEqual(m_reference_count + 1, sys.getrefcount(m)) self.assertEqual(tensor.is_cuda, False) self.assertEqual(tensor.ndim, 2) self.assertEqual(tensor[0, 0], 10) self.assertEqual(tensor[0, 1], 20) m[0, 0] = 100 self.assertEqual(tensor[0, 0], 100) tensor[0, 0] = 1000 self.assertEqual(m[0, 0], 1000) del tensor gc.collect() self.assertEqual(m_reference_count, sys.getrefcount(m)) # one more time self.assertEqual(m[0, 0], 1000) # m is still alive self.assertEqual(m[0, 1], 20) tensor = from_dlpack(m.to_dlpack()) self.assertEqual(m_reference_count + 1, sys.getrefcount(m)) self.assertEqual(tensor.is_cuda, False) tensor[0, 0] = 8 self.assertEqual(m[0, 0], 8) del tensor self.assertEqual(m_reference_count, sys.getrefcount(m))
def test_matrix_reader_iterator(self): kp_matrix = kaldi.FloatMatrix(2, 3) wspecifier = 'ark,t:test.ark' rspecifier = 'ark:test.ark' matrix_writer = kaldi.MatrixWriter(wspecifier) matrix_writer.Write('id_1', kp_matrix) matrix_writer.Close() gold_key_list = ['id_1'] gold_value_list = [np.array([[0, 0, 0], [0, 0, 0]])] reader = kaldi.SequentialMatrixReader(rspecifier) for (key, value), gold_key, gold_value in zip(reader, gold_key_list, gold_value_list): self.assertEqual(key, gold_key) np.testing.assert_array_equal(value.numpy(), gold_value) reader.Close() os.remove('test.ark')
def test_to_numpy(self): # first, build a kaldi matrix num_rows = 6 num_cols = 8 m = kaldi.FloatMatrix(row=num_rows, col=num_cols) for r in range(num_rows): for c in range(num_cols): self.assertEqual(m[r, c], 0) m_reference_count = sys.getrefcount(m) # now to numpy; memory is shared d = m.numpy() self.assertEqual(m_reference_count + 1, sys.getrefcount(m)) d += 10 for r in range(num_rows): for c in range(num_cols): self.assertEqual(d[r, c], m[r, c]) del d self.assertEqual(m_reference_count, sys.getrefcount(m))
def test_pytorch_and_kaldi_gpu_tensor_zero_copy(self): # (fangjun): we put all tests in this function to avoid # invoking SelectGpuDevice() twice if torch.cuda.is_available() == False: print('No GPU detected! Skip it') return if kaldi.CudaCompiled() == False: print('Kaldi is not compiled with CUDA! Skip it') return device_id = 0 # Kaldi and PyTorch will use the same GPU kaldi.SelectGpuDevice(device_id=device_id) device = torch.device('cuda', device_id) tensor = torch.arange(3).float() tensor = tensor.to(device) # make sure the tensor from PyTorch is indeed on GPU self.assertTrue(tensor.is_cuda) # GPU data is shared between kaldi::CuSubVector and PyTorch GPU tensor # no data is copied v = kaldi.CuSubVectorFromDLPack(to_dlpack(tensor)) self.assertIsInstance(v, kaldi.FloatCuSubVector) v.Add(value=10) self.assertEqual(tensor[0], 10) self.assertEqual(tensor[1], 11) self.assertEqual(tensor[2], 12) v.Scale(value=6) self.assertEqual(tensor[0], 60) self.assertEqual(tensor[1], 66) self.assertEqual(tensor[2], 72) v.SetZero() self.assertEqual(tensor[0], 0) self.assertEqual(tensor[1], 0) self.assertEqual(tensor[2], 0) # Now for CuSubMatrix tensor = torch.arange(3).reshape(1, 3).float() tensor = tensor.to(device) # make sure the tensor from PyTorch is indeed on GPU self.assertTrue(tensor.is_cuda) m = kaldi.CuSubMatrixFromDLPack(to_dlpack(tensor)) m.ApplyExp() self.assertAlmostEqual(tensor[0, 0], math.exp(0), places=7) self.assertAlmostEqual(tensor[0, 1], math.exp(1), places=7) self.assertAlmostEqual(tensor[0, 2], math.exp(2), places=7) m.SetZero() self.assertEqual(tensor[0, 0], 0) self.assertEqual(tensor[0, 1], 0) self.assertEqual(tensor[0, 2], 0) # now from Kaldi to PyTorch dim = 2 cpu_v = kaldi.FloatVector(size=dim) cpu_v[0] = 10 cpu_v[1] = 20 gpu_v = kaldi.FloatCuVector(cpu_v) self.assertEqual(gpu_v[0], 10) self.assertEqual(gpu_v[1], 20) gpu_v_reference_count = sys.getrefcount(gpu_v) # memory is shared between `gpu_v` and `tensor` tensor = from_dlpack(gpu_v.to_dlpack()) # `gpu_v.to_dlpack()` increases the reference count of `gpu_v` self.assertEqual(gpu_v_reference_count + 1, sys.getrefcount(gpu_v)) self.assertTrue(tensor.is_cuda) self.assertEqual(tensor.device.index, device_id) self.assertTrue(tensor[0], 10) self.assertTrue(tensor[1], 20) tensor[0] = 1 # also changes `gpu_v` tensor[1] = 2 self.assertEqual(gpu_v[0], 1) self.assertEqual(gpu_v[1], 2) gpu_v.Add(10) # also changes `tensor` self.assertEqual(tensor[0], 11) self.assertEqual(tensor[1], 12) del tensor gc.collect() # now the reference count for gpu_v is decreased by one self.assertEqual(gpu_v_reference_count, sys.getrefcount(gpu_v)) self.assertEqual(gpu_v[0], 11) # gpu_v is still alive self.assertEqual(gpu_v[1], 12) # now for CuMatrix num_rows = 1 num_cols = 2 cpu_m = kaldi.FloatMatrix(row=num_rows, col=num_cols) cpu_m[0, 0] = 1 cpu_m[0, 1] = 2 gpu_m = kaldi.FloatCuMatrix(cpu_m) self.assertEqual(gpu_m[0, 0], 1) self.assertEqual(gpu_m[0, 1], 2) gpu_m_reference_count = sys.getrefcount(gpu_m) # memory is shared between `gpu_m` and `tensor` tensor = from_dlpack(gpu_m.to_dlpack()) self.assertEqual(gpu_m_reference_count + 1, sys.getrefcount(gpu_m)) self.assertTrue(tensor.is_cuda) self.assertEqual(tensor.device.index, device_id) self.assertTrue(tensor[0, 0], 1) self.assertTrue(tensor[0, 1], 2) tensor[0, 0] = 6 # also changes `gpu_m` tensor[0, 1] = 8 self.assertEqual(gpu_m[0, 0], 6) self.assertEqual(gpu_m[0, 1], 8) gpu_m.Add(2) # also changes `tensor` self.assertTrue(tensor[0, 0], 8) self.assertTrue(tensor[0, 1], 10) del tensor gc.collect() self.assertEqual(gpu_m_reference_count, sys.getrefcount(gpu_m)) self.assertEqual(gpu_m[0, 0], 8) # `gpu_m` is still alive self.assertEqual(gpu_m[0, 1], 10) # now for CuVector from_dlpack tensor = torch.tensor([1, 2]).float() tensor = tensor.to(device) # memory is shared between `tensor` and `v` v = kaldi.DLPackFloatCuSubVector.from_dlpack(to_dlpack(tensor)) self.assertEqual(v[0], 1) v.Add(1) # also changes `tensor` self.assertEqual(tensor[0], 2) self.assertEqual(tensor[1], 3) del v del tensor # now for CuMatrix from_dlpack tensor = torch.tensor([1, 2]).reshape(1, 2).float() tensor = tensor.to(device) # memory is shared between `tensor` and `m` m = kaldi.DLPackFloatCuSubMatrix.from_dlpack(to_dlpack(tensor)) self.assertEqual(m[0, 0], 1) m.Add(100) # also changes `tensor` self.assertEqual(tensor[0, 0], 101) del m del tensor gc.collect() # now test the issue: https://github.com/pytorch/pytorch/issues/9261 # it will not consume all GPU memory for i in range(100): b = torch.randn(1024 * 1024 * 1024 // 4, 1, device=device) # 1G a = kaldi.CuSubMatrixFromDLPack(to_dlpack(b)) gc.collect() torch.cuda.empty_cache() for i in range(100 * 4): b = kaldi.FloatCuMatrix(1024 * 1024, 64) # 256 MB a = from_dlpack(b.to_dlpack()) gc.collect()
def __call__(self, batch): ''' batch is a list of [key, rxfilename] returned from `__getitem__()` of `NnetChainExampleDataset` Since we combined egs offline, the batch size is usually one. ''' key_list = [] # contains a list of a 3D array # of shape [batch_size, seq_len, feat_dim] feature_list = [] supervision_list = [] for b in batch: key, rxfilename = b key_list.append(key) eg = read_nnet_chain_example(rxfilename) assert len(eg.outputs) == 1 assert eg.outputs[0].name == 'output' supervision = eg.outputs[0].supervision supervision_list.append(supervision) batch_size = supervision.num_sequences frames_per_sequence = (supervision.frames_per_sequence * \ self.frame_subsampling_factor) + \ self.egs_left_context + self.egs_right_context # TODO(fangjun): support ivector assert len(eg.inputs) == 1 assert eg.inputs[0].name == 'input' _feats = kaldi.FloatMatrix() eg.inputs[0].features.GetMatrix(_feats) feats = _feats.numpy() assert feats.shape[0] == batch_size * frames_per_sequence feat_list = [] for i in range(batch_size): start_index = i * frames_per_sequence if self.frame_subsampling_factor == 3: shift = np.random.choice([-1, 0, 1], 1)[0] start_index += shift end_index = start_index + frames_per_sequence start_index += 1 # remove the leftmost frame added for frame shift end_index -= 1 # remove the rightmost frame added for frame shift feat = feats[start_index:end_index:, :] feat = splice_feats(feat) feat_list.append(feat) batched_feat = np.stack(feat_list, axis=0) assert batched_feat.shape[0] == batch_size # -4 = -2 -2 # the first -2 is from extra left/right context # the second -2 is from lda feats splicing assert batched_feat.shape[1] == frames_per_sequence - 4 assert batched_feat.shape[2] == feats.shape[-1] * 3 torch_feat = torch.from_numpy(batched_feat).float() feature_list.append(torch_feat) return key_list, feature_list, supervision_list
def test_duration(self): waveform = kaldi.FloatMatrix(1, 16000) wave_data = kaldi.feat.WaveData(samp_freq=16000, data=waveform) self.assertEqual(1, wave_data.Duration())