def test_write_compress_feat(): r = SDRF.create(feat_scp_b, path_prefix=input_prefix) key1, data1 = r.read(0) for i, cm in enumerate(compression_methods): # write compressed print('') w = DWF.create(feat_both_co[i], compress=True, compression_method=cm) w.write(key1, data1) w.close() # read compressed by kaldi copy-feats r = SDRF.create(feat_scp_c[i], path_prefix=input_prefix) key1c, data1c = r.read(0) # read compressed r = SDRF.create(feat_scp_co[i]) key2, data2 = r.read(0) for d1, d1c, d2 in zip(data1, data1c, data2): #idx = np.argmin(np.abs(d1)) #atol = np.abs(d1.ravel()[idx]-d1c.ravel()[idx]) #rtol = np.max(np.abs(np.abs(d1-d1c)-atol)/np.abs(d1)) #f = np.isclose(d1, d2, rtol=rtol, atol=atol) == False err11c = np.abs(d1 - d1c) + np.abs(d1) * 0.001 err1c2 = np.abs(d1c - d2) err12 = np.abs(d1 - d2) f = np.logical_and(err11c < err1c2, err11c < err12) #print(atol, rtol) for a, b, c in zip(d1[f], d1c[f], d2[f]): print(a, b, c, a - b, b - c, a - c) assert not np.any(f), 'Write compression %s failed' % cm
def test_with_write_feat(): r = SDRF.create(feat_scp_b, path_prefix=input_prefix) key1 = [] data1 = [] while not r.eof(): key_i, data_i = r.read(1) key1.append(key_i[0]) data1.append(data_i[0]) # binary with with DWF.create(feat_both_bo) as w: w.write(key1, data1) r = SDRF.create(feat_scp_bo) key2 = [] data2 = [] while not r.eof(): key_i, data_i = r.read(1) key2.append(key_i[0]) data2.append(data_i[0]) for k1, k2, d1, d2 in zip(key1, key2, data1, data2): assert k1 == k2 assert_allclose(d1, d2)
def test_write_squeeze_vec(): r = SDRF.create(vec_scp_b, path_prefix=input_prefix) key1 = [] data1 = [] while not r.eof(): key_i, data_i = r.read(1) key1.append(key_i[0]) data1.append(data_i[0]) data1s = [np.expand_dims(d, axis=0) for d in data1] data1s = np.concatenate(tuple(data1s), axis=0) # binary w = DWF.create(vec_both_bo) w.write(key1, data1s) w.close() r = SDRF.create(vec_scp_bo) key2 = [] data2 = [] while not r.eof(): key_i, data_i = r.read(1) key2.append(key_i[0]) data2.append(data_i[0]) for k1, k2, d1, d2 in zip(key1, key2, data1, data2): assert k1 == k2 assert_allclose(d1, d2)
def test_write_read_seq_scp_compress_feat(): r = SDRF.create(feat_scp_ho) key1, data1 = r.read(0) for i, cm in enumerate(compression_methods): # write compressed print('') w = DWF.create(feat_both_hco[i], compress=True, compression_method=cm) w.write(key1, data1) w.close() # read kaldi compressed r = SDRF.create(feat_scp_c[i], path_prefix=input_prefix) key1c, data1c = r.read(0) # read compressed r = SDRF.create(feat_scp_hco[i]) key2, data2 = r.read(0) for d1, d1c, d2 in zip(data1, data1c, data2): err11c = np.abs(d1 - d1c) + np.abs(d1) * 0.001 err1c2 = np.abs(d1c - d2) err12 = np.abs(d1 - d2) f = np.logical_and(err11c < err1c2, err11c < err12) for a, b, c in zip(d1[f], d1c[f], d2[f]): print(a, b, c, a - b, b - c, a - c) assert not np.any(f), 'Write compression %s failed' % cm
def test_write_read_seq_file_feat(): for k in xrange(2): r = SDRF.create(feat_ark_b[k], path_prefix=input_prefix) key1, data1 = r.read(0) # write w = DWF.create(feat_both_ho[k]) w.write(key1, data1) w.close() r = SDRF.create(feat_h5_ho[k]) key2 = [] data2 = [] while not r.eof(): key_i, data_i = r.read(1) key2.append(key_i[0]) data2.append(data_i[0]) f, loc = ismember(key1, key2) assert np.all(f) for i, (k1, d1) in enumerate(zip(key1, data1)): assert k1 == key2[loc[i]] assert_allclose(d1, data2[loc[i]]) with open(feat_scp_ho2, 'w') as fw: for k in xrange(2): with open(feat_scp_ho1[k], 'r') as fr: for l in fr: fw.write(l)
def test_write_flush_feat(): r = SDRF.create(feat_scp_b, path_prefix=input_prefix) key1 = [] data1 = [] while not r.eof(): key_i, data_i = r.read(1) key1.append(key_i[0]) data1.append(data_i[0]) # binary w = DWF.create(feat_both_bfo) w.write(key1, data1) w.close() r = SDRF.create(feat_scp_bo) key2 = [] data2 = [] while not r.eof(): key_i, data_i = r.read(1) key2.append(key_i[0]) data2.append(data_i[0]) for k1, k2, d1, d2 in zip(key1, key2, data1, data2): assert k1 == k2 assert_allclose(d1, d2) # text w = DWF.create(feat_both_tfo) w.write(key1, data1) w.close() r = SDRF.create(feat_scp_bo) key2 = [] data2 = [] while not r.eof(): key_i, data_i = r.read(1) key2.append(key_i[0]) data2.append(data_i[0]) for k1, k2, d1, d2 in zip(key1, key2, data1, data2): assert k1 == k2 # i = np.isclose(d1,d2) == False # print(d1[i]) # print(d2[i]) assert_allclose(d1, d2, rtol=1e-4)
def test_with_write_feat(): r = SDRF.create(feat_ark_b[0], path_prefix=input_prefix) key1, data1 = r.read(0) # write with DWF.create(feat_h5_ho[0]) as w: w.write(key1, data1) r = SDRF.create(feat_h5_ho[0]) key2, data2 = r.read(0) f, loc = ismember(key1, key2) assert np.all(f) for i, (k1, d1) in enumerate(zip(key1, data1)): assert k1 == key2[loc[i]] assert_allclose(d1, data2[loc[i]])
def test_write_squeeze_feat(): r = SDRF.create(feat_scp_ho) key1 = [] data1 = [] while not r.eof(): key_i, data_i = r.read(1) key1.append(key_i[0]) data1.append(data_i[0][:10]) data1s = [np.expand_dims(d, axis=0) for d in data1] data1s = np.concatenate(tuple(data1s), axis=0) w = DWF.create(feat_both_hso) w.write(key1, data1s) w.close() r = SDRF.create(feat_scp_hso) key2, data2 = r.read(0) for k1, k2, d1, d2 in zip(key1, key2, data1, data2): assert k1 == k2 assert_allclose(d1, d2)
def test_write_read_seq_file_vec(): r = SDRF.create(vec_scp_b, path_prefix=input_prefix) key1, data1 = r.read(0) # write w = DWF.create(vec_both_ho) w.write(key1, data1) w.close() r = SDRF.create(vec_h5_ho) key2 = [] data2 = [] while not r.eof(): key_i, data_i = r.read(1) key2.append(key_i[0]) data2.append(data_i[0]) f, loc = ismember(key1, key2) assert np.all(f) for i, (k1, d1) in enumerate(zip(key1, data1)): assert k1 == key2[loc[i]] assert_allclose(d1, data2[loc[i]])