Ejemplo n.º 1
0
def test_read_range2_compress_seq_file_feat():

    for k, cm in enumerate(compression_methods):
        r = SDRF.create(feat_h5_hco[k])
        key1 = []
        data1 = []
        i = 0
        while not r.eof():
            key_i, data_i = r.read(1)
            if len(key_i) == 0:
                break
            key1.append(key_i[0])
            data1.append(data_i[0][i:i + 10])
            i += 1

        r = SDRF.create(feat_h5_hco[k])
        key2 = []
        data2 = []
        i = 0
        while not r.eof():
            key_i, data_i = r.read(1, row_offset=i, num_rows=10)
            if len(key_i) == 0:
                break
            key2.append(key_i[0])
            data2.append(data_i[0])
            i += 1

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2
            assert_allclose(d1,
                            d2,
                            rtol=1e-5,
                            atol=1e-4,
                            err_msg=('Read compression %s failed' % cm))
Ejemplo n.º 2
0
def test_write_compress_feat():

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1, data1 = r.read(0)

    for i, cm in enumerate(compression_methods):
        # write compressed
        print('')
        w = DWF.create(feat_both_co[i], compress=True, compression_method=cm)
        w.write(key1, data1)
        w.close()

        # read compressed by kaldi copy-feats
        r = SDRF.create(feat_scp_c[i], path_prefix=input_prefix)
        key1c, data1c = r.read(0)

        # read compressed
        r = SDRF.create(feat_scp_co[i])
        key2, data2 = r.read(0)

        for d1, d1c, d2 in zip(data1, data1c, data2):
            #idx = np.argmin(np.abs(d1))
            #atol = np.abs(d1.ravel()[idx]-d1c.ravel()[idx])
            #rtol = np.max(np.abs(np.abs(d1-d1c)-atol)/np.abs(d1))
            #f = np.isclose(d1, d2, rtol=rtol, atol=atol) == False
            err11c = np.abs(d1 - d1c) + np.abs(d1) * 0.001
            err1c2 = np.abs(d1c - d2)
            err12 = np.abs(d1 - d2)

            f = np.logical_and(err11c < err1c2, err11c < err12)
            #print(atol, rtol)
            for a, b, c in zip(d1[f], d1c[f], d2[f]):
                print(a, b, c, a - b, b - c, a - c)

            assert not np.any(f), 'Write compression %s failed' % cm
Ejemplo n.º 3
0
def test_read_rangex2_seq_scp_feat():

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    i = 0
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0][2 * i:2 * i + 10])
        i += 1

    r = SDRF.create(feat_range_b, path_prefix=input_prefix)
    key2 = []
    data2 = []
    i = 0
    while not r.eof():
        key_i, data_i = r.read(1, row_offset=i, num_rows=10)
        key2.append(key_i[0])
        data2.append(data_i[0])
        i += 1

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-4)
Ejemplo n.º 4
0
def test_read_rangex2_compress_seq_scp_feat():

    for k, cm in enumerate(compression_methods):
        r = SDRF.create(feat_scp_uc[k], path_prefix=input_prefix)
        key1 = []
        data1 = []
        i = 0
        while not r.eof():
            key_i, data_i = r.read(1)
            key1.append(key_i[0])
            data1.append(data_i[0][2 * i:2 * i + 10])
            i += 1

        r = SDRF.create(feat_range_c[k], path_prefix=input_prefix)
        key2 = []
        data2 = []
        i = 0
        while not r.eof():
            key_i, data_i = r.read(1, row_offset=i, num_rows=10)
            key2.append(key_i[0])
            data2.append(data_i[0])
            i += 1

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2
            assert_allclose(d1,
                            d2,
                            rtol=1e-5,
                            atol=1e-4,
                            err_msg=('Read compression %s failed' % cm))
Ejemplo n.º 5
0
def test_read_seq_scp_split_feat():

    # scp binary
    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0])

    key2 = []
    data2 = []
    for i in xrange(4):
        r = SDRF.create(feat_scp_b,
                        path_prefix=input_prefix,
                        part_idx=i + 1,
                        num_parts=4)
        key_i, data_i = r.read(0)
        key2 = key2 + key_i
        data2 = data2 + data_i

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-4)
Ejemplo n.º 6
0
def test_read_range_compress_seq_scp_feat():

    for k, cm in enumerate(compression_methods):
        # scp uncompressed binary
        r = SDRF.create(feat_scp_uc[k], path_prefix=input_prefix)
        key1 = []
        data1 = []
        i = 0
        while not r.eof():
            key_i, data_i = r.read(1)
            key1.append(key_i[0])
            data1.append(data_i[0][i:i + 50])
            i += 1

        # scp compressed
        print(feat_range_c[i])
        r = SDRF.create(feat_range_c[k], path_prefix=input_prefix)
        key2, data2 = r.read(0)

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2
            assert_allclose(d1,
                            d2,
                            rtol=1e-5,
                            atol=1e-4,
                            err_msg=('Read compression %s failed' % cm))
Ejemplo n.º 7
0
def test_read_compress_seq_file_feat():

    for i, cm in enumerate(compression_methods):
        # ark uncompressed binary
        r = SDRF.create(feat_ark_uc[i], path_prefix=input_prefix)
        key1, data1 = r.read(0)
        # key1 = []
        # data1 = []
        # while not r.eof():
        #     key_i, data_i = r.read(1)
        #     if len(key_i)==0:
        #         break
        #     key1.append(key_i[0])
        #     data1.append(data_i[0])

        # ark compressed
        r = SDRF.create(feat_ark_c[i], path_prefix=input_prefix)
        key2, data2 = r.read(0)
        # key2 = []
        # data2 = []
        # while not r.eof():
        #     key_i, data_i = r.read(1)
        #     if len(key_i)==0:
        #         break
        #     key2.append(key_i[0])
        #     data2.append(data_i[0])

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2
            assert_allclose(d1,
                            d2,
                            rtol=1e-5,
                            atol=1e-4,
                            err_msg=('Read compression %s failed' % cm))
Ejemplo n.º 8
0
def test_read_range_compress_seq_scp_feat():

    for k, cm in enumerate(compression_methods):
        with open(feat_range_hco1[k], 'w') as w:
            with open(feat_scp_hco1[k], 'r') as r:
                i = 0
                for l in r:
                    w.write('%s[%d:%d]\n' % (l.strip(), i, i + 50))
                    i += 1

        r = SDRF.create(feat_scp_hco[k])
        key1 = []
        data1 = []
        i = 0
        while not r.eof():
            key_i, data_i = r.read(1)
            key1.append(key_i[0])
            data1.append(data_i[0][i:i + 50])
            i += 1

        r = SDRF.create(feat_range_hco[k])
        key2, data2 = r.read(0)

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2
            assert_allclose(d1,
                            d2,
                            rtol=1e-5,
                            atol=1e-4,
                            err_msg=('Read compression %s failed' % cm))
Ejemplo n.º 9
0
def test_write_read_seq_scp_compress_feat():

    r = SDRF.create(feat_scp_ho)
    key1, data1 = r.read(0)

    for i, cm in enumerate(compression_methods):
        # write compressed
        print('')
        w = DWF.create(feat_both_hco[i], compress=True, compression_method=cm)
        w.write(key1, data1)
        w.close()

        # read kaldi compressed
        r = SDRF.create(feat_scp_c[i], path_prefix=input_prefix)
        key1c, data1c = r.read(0)

        # read compressed
        r = SDRF.create(feat_scp_hco[i])
        key2, data2 = r.read(0)

        for d1, d1c, d2 in zip(data1, data1c, data2):
            err11c = np.abs(d1 - d1c) + np.abs(d1) * 0.001
            err1c2 = np.abs(d1c - d2)
            err12 = np.abs(d1 - d2)

            f = np.logical_and(err11c < err1c2, err11c < err12)
            for a, b, c in zip(d1[f], d1c[f], d2[f]):
                print(a, b, c, a - b, b - c, a - c)

            assert not np.any(f), 'Write compression %s failed' % cm
Ejemplo n.º 10
0
def test_with_write_feat():

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0])

    # binary with
    with DWF.create(feat_both_bo) as w:
        w.write(key1, data1)

    r = SDRF.create(feat_scp_bo)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2)
Ejemplo n.º 11
0
def test_read_range_seq_scp_feat():

    with open(feat_range_ho1, 'w') as w:
        with open(feat_scp_ho2, 'r') as r:
            i = 0
            for l in r:
                w.write('%s[%d:%d]\n' % (l.strip(), i, i + 50))
                i += 1

    r = SDRF.create(feat_scp_ho)
    key1 = []
    data1 = []
    i = 0
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0][i:i + 50])
        i += 1

    r = SDRF.create(feat_range_ho)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-4)
Ejemplo n.º 12
0
def test_write_read_seq_file_feat():

    for k in xrange(2):
        r = SDRF.create(feat_ark_b[k], path_prefix=input_prefix)
        key1, data1 = r.read(0)

        # write
        w = DWF.create(feat_both_ho[k])
        w.write(key1, data1)
        w.close()

        r = SDRF.create(feat_h5_ho[k])
        key2 = []
        data2 = []
        while not r.eof():
            key_i, data_i = r.read(1)
            key2.append(key_i[0])
            data2.append(data_i[0])

        f, loc = ismember(key1, key2)
        assert np.all(f)
        for i, (k1, d1) in enumerate(zip(key1, data1)):
            assert k1 == key2[loc[i]]
            assert_allclose(d1, data2[loc[i]])

    with open(feat_scp_ho2, 'w') as fw:
        for k in xrange(2):
            with open(feat_scp_ho1[k], 'r') as fr:
                for l in fr:
                    fw.write(l)
Ejemplo n.º 13
0
def test_read_shapes_seq_file_feat():

    r = SDRF.create(feat_h5_ho[0])
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        if len(key_i) == 0:
            break
        key1.append(key_i[0])
        data1.append(data_i[0].shape)

    r = SDRF.create(feat_h5_ho[0])
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read_shapes(1)
        if len(key_i) == 0:
            break
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert d1 == d2
Ejemplo n.º 14
0
def test_read_iterator_seq_scp_feat():

    # scp binary
    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0])

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key2 = []
    data2 = []
    for key_i, data_i in r:
        key2.append(key_i)
        data2.append(data_i)

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-5)

    # scp text
    r = SDRF.create(feat_scp_t, path_prefix=input_prefix)
    key2 = []
    data2 = []
    for key_i, data_i in r:
        key2.append(key_i)
        data2.append(data_i)

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-4)
Ejemplo n.º 15
0
def test_read_dims_seq_file_feat():

    # ark binary
    r = SDRF.create(feat_ark_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        if len(key_i) == 0:
            break
        key1.append(key_i[0])
        data1.append(data_i[0].shape[1])

    r = SDRF.create(feat_ark_b, path_prefix=input_prefix)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read_dims(1)
        if len(key_i) == 0:
            break
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert d1 == d2
Ejemplo n.º 16
0
def test_read_seq_file_feat():

    # ark binary
    r = SDRF.create(feat_ark_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        if len(key_i) == 0:
            break
        key1.append(key_i[0])
        data1.append(data_i[0])

    # ark text
    r = SDRF.create(feat_ark_t, path_prefix=input_prefix)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        if len(key_i) == 0:
            break
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-5)
Ejemplo n.º 17
0
def test_read_range2_seq_file_feat():

    # ark binary
    r = SDRF.create(feat_ark_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    i = 0
    while not r.eof():
        key_i, data_i = r.read(1)
        if len(key_i) == 0:
            break
        key1.append(key_i[0])
        data1.append(data_i[0][i:i + 10])
        i += 1

    r = SDRF.create(feat_ark_b, path_prefix=input_prefix)
    key2 = []
    data2 = []
    i = 0
    while not r.eof():
        key_i, data_i = r.read(1, row_offset=i, num_rows=10)
        if len(key_i) == 0:
            break
        print(key_i[0])
        key2.append(key_i[0])
        data2.append(data_i[0])
        i += 1

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-5)
Ejemplo n.º 18
0
def test_write_squeeze_vec():

    r = SDRF.create(vec_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0])

    data1s = [np.expand_dims(d, axis=0) for d in data1]
    data1s = np.concatenate(tuple(data1s), axis=0)
    # binary
    w = DWF.create(vec_both_bo)
    w.write(key1, data1s)
    w.close()

    r = SDRF.create(vec_scp_bo)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2)
Ejemplo n.º 19
0
def test_read_shapes_compress_seq_file_feat():

    # ark binary
    r = SDRF.create(feat_ark_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        if len(key_i) == 0:
            break
        key1.append(key_i[0])
        data1.append(data_i[0].shape)

    for i, cm in enumerate(compression_methods):
        r = SDRF.create(feat_ark_c[i], path_prefix=input_prefix)
        key2 = []
        data2 = []
        while not r.eof():
            key_i, data_i = r.read_shapes(1)
            if len(key_i) == 0:
                break
            key2.append(key_i[0])
            data2.append(data_i[0])

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2, 'Wrong key for method %s' % cm
            assert d1 == d2, 'Wrong shape for method %s' % cm
Ejemplo n.º 20
0
def test_read_shapes_compress_seq_scp_feat():

    r = SDRF.create(feat_scp_ho)
    key1, data1 = r.read_shapes(0)

    for i, cm in enumerate(compression_methods):
        r = SDRF.create(feat_scp_hco[i])
        key2, data2 = r.read_shapes(0)

        for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
            assert k1 == k2, 'Wrong key for method %s' % cm
            assert d1 == d2, 'Wrong shape for method %s' % cm
Ejemplo n.º 21
0
def test_with_read_seq_scp_feat():

    # without with
    r = SDRF.create(feat_scp_ho)
    key1, data1 = r.read(0)

    # with with
    with SDRF.create(feat_scp_ho) as r:
        key2, data2 = r.read(0)

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-5)
Ejemplo n.º 22
0
def test_read_iterator_seq_scp_feat():

    # scp binary
    r = SDRF.create(feat_scp_ho)
    key1, data1 = r.read(0)

    r = SDRF.create(feat_scp_ho)
    key2 = []
    data2 = []
    for key_i, data_i in r:
        key2.append(key_i)
        data2.append(data_i)

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-5)
Ejemplo n.º 23
0
def test_read_iterator_seq_file_feat():

    r = SDRF.create(feat_h5_ho[0])
    key1, data1 = r.read(0)

    r = SDRF.create(feat_h5_ho[0])
    key2 = []
    data2 = []
    for key_i, data_i in r:
        key2.append(key_i)
        data2.append(data_i)
    print(key1)
    print(key2)
    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-5)
Ejemplo n.º 24
0
def test_read_dims_seq_scp_feat():

    r = SDRF.create(feat_scp_ho)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0].shape[1])

    r = SDRF.create(feat_scp_ho)
    key2, data2 = r.read_dims(0)

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert d1 == d2
Ejemplo n.º 25
0
def test_read_seq_scp_split_feat():

    r = SDRF.create(feat_scp_ho)
    key1, data1 = r.read(0)

    key2 = []
    data2 = []
    for i in xrange(4):
        r = SDRF.create(feat_scp_ho, part_idx=i + 1, num_parts=4)
        key_i, data_i = r.read(0)
        key2 += key_i
        data2 += data_i

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2, rtol=1e-4)
Ejemplo n.º 26
0
def test_read_random_feat_permissive():

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0])

    key1.append('unk')

    # binary
    r = RDRF.create('p,' + feat_scp_b, path_prefix=input_prefix)
    data2 = r.read(key1)

    for d1, d2 in zip(data1, data2[:-1]):
        assert_allclose(d1, d2)
    assert data2[-1].shape == (0, )

    # text
    r = RDRF.create('p,' + feat_scp_t, path_prefix=input_prefix)
    data2 = r.read(key1)

    for d1, d2 in zip(data1, data2[:-1]):
        assert_allclose(d1, d2, rtol=1e-5)
    assert data2[-1].shape == (0, )
Ejemplo n.º 27
0
def test_write_flush_feat():

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0])

    # binary
    w = DWF.create(feat_both_bfo)
    w.write(key1, data1)
    w.close()

    r = SDRF.create(feat_scp_bo)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert_allclose(d1, d2)

    # text
    w = DWF.create(feat_both_tfo)
    w.write(key1, data1)
    w.close()

    r = SDRF.create(feat_scp_bo)
    key2 = []
    data2 = []
    while not r.eof():
        key_i, data_i = r.read(1)
        key2.append(key_i[0])
        data2.append(data_i[0])

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        # i = np.isclose(d1,d2) == False
        # print(d1[i])
        # print(d2[i])

        assert_allclose(d1, d2, rtol=1e-4)
Ejemplo n.º 28
0
def test_with_write_feat():

    r = SDRF.create(feat_ark_b[0], path_prefix=input_prefix)
    key1, data1 = r.read(0)

    # write
    with DWF.create(feat_h5_ho[0]) as w:
        w.write(key1, data1)

    r = SDRF.create(feat_h5_ho[0])
    key2, data2 = r.read(0)

    f, loc = ismember(key1, key2)
    assert np.all(f)
    for i, (k1, d1) in enumerate(zip(key1, data1)):
        assert k1 == key2[loc[i]]
        assert_allclose(d1, data2[loc[i]])
Ejemplo n.º 29
0
def test_read_compress_seq_file_feat():

    for i, cm in enumerate(compression_methods):
        r = SDRF.create(feat_h5_hco[i])
        key1, data1 = r.read(0)

        r = SDRF.create(feat_scp_hco[i])
        key2, data2 = r.read(0)

        f, loc = ismember(key2, key1)
        for i, (k2, d2) in enumerate(zip(key2, data2)):
            assert key1[loc[i]] == k2
            assert_allclose(data1[loc[i]],
                            d2,
                            rtol=1e-5,
                            atol=1e-4,
                            err_msg=('Read compression %s failed' % cm))
Ejemplo n.º 30
0
def test_read_range_shapes_seq_scp_feat():

    r = SDRF.create(feat_scp_b, path_prefix=input_prefix)
    key1 = []
    data1 = []
    i = 0
    while not r.eof():
        key_i, data_i = r.read(1)
        key1.append(key_i[0])
        data1.append(data_i[0][i:i + 50].shape)
        i += 1

    r = SDRF.create(feat_range_b, path_prefix=input_prefix)
    key2, data2 = r.read_shapes(0)

    for k1, k2, d1, d2 in zip(key1, key2, data1, data2):
        assert k1 == k2
        assert d1 == d2