コード例 #1
0
 def testInt32VectorReadWrite(self):
     """
     Test read/write for int32 vectors.
     """
     # read,
     i32_vec = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary,
     i32_vec2 = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii,
     # re-save the data,
     with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark','wb') as f:
         for k,v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k)
     # read and make sure it is the same,
     for k,v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'):
         self.assertTrue(np.array_equal(v,i32_vec[k]), msg="int32 vector same after re-saving")
コード例 #2
0
 def testInt32VectorReadWrite(self):
     """
     Test read/write for int32 vectors.
     """
     # read,
     i32_vec = {
         k: v
         for k, v in kaldi_io.read_vec_int_ark('tests/data/ali.ark')
     }  # binary,
     i32_vec2 = {
         k: v
         for k, v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark')
     }  # ascii,
     # re-save the data,
     with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark', 'wb') as f:
         for k, v in i32_vec.items():
             kaldi_io.write_vec_int(f, v, k)
     # read and make sure it is the same,
     for k, v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'):
         self.assertTrue(np.array_equal(v, i32_vec[k]),
                         msg="int32 vector same after re-saving")
コード例 #3
0
#!/usr/bin/env python

import numpy as np
import kaldi_io

print('testing int32-vector i/o')
i32_vec = {k: v
           for k, v in kaldi_io.read_vec_int_ark('data/ali.ark')}  # binary,
i32_vec2 = {k: v
            for k, v in kaldi_io.read_vec_int_ark('data/ali_ascii.ark')
            }  # ascii,
# - store,
with kaldi_io.open_or_fd('data_re-saved/ali.ark', 'wb') as f:
    for k, v in i32_vec.items():
        kaldi_io.write_vec_int(f, v, k)
# - read and compare,
for k, v in kaldi_io.read_vec_int_ark('data_re-saved/ali.ark'):
    assert (np.array_equal(v, i32_vec[k]))

print('testing float-vector i/o')
flt_vec = {k: v for k, v in kaldi_io.read_vec_flt_scp('data/conf.scp')}  # scp,
flt_vec2 = {k: v
            for k, v in kaldi_io.read_vec_flt_ark('data/conf.ark')
            }  # binary-ark,
flt_vec3 = {k: v
            for k, v in kaldi_io.read_vec_flt_ark('data/conf_ascii.ark')
            }  # ascii-ark,
# - store,
with kaldi_io.open_or_fd('data_re-saved/conf.ark', 'wb') as f:
    for k, v in flt_vec.items():
        kaldi_io.write_vec_flt(f, v, k)
コード例 #4
0
ファイル: fsts2align.py プロジェクト: underdogliu/kaldi
    tid_path = np.array([arc[2] for arc in path], dtype=np.int32)
    return tid_path


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    #TidAMModel.add_args(parser)
    parser.add_argument('--seed', type=int, default=0, help='Random seed')
    parser.add_argument("train_graphs_ark", nargs='?', type=argparse.FileType(mode="r"),
                        default=sys.stdin)
    parser.add_argument('ali_wspecifier', help='WSpecifier for avg logits')


    args = parser.parse_args()
    logger.info(vars(args))
    if args.ali_wspecifier == 'ark:-':
        args.ali_wspecifier = sys.stdout.buffer
    #logger.info(f"Random seed is {args.seed}")
    np.random.seed(args.seed)

    fst_reader = FstParser()
    with kaldi_io.open_or_fd(args.ali_wspecifier, mode='wb') as f_out:
        i=0
        for utt, fst in tqdm.tqdm(fst_reader.iterate_file(args.train_graphs_ark)):
            i+=1
            logger.debug(f'Process {utt}')
            random_ali = get_random_ali(fst)
            logger.debug(f'Generated {random_ali.shape} utt ali')
            kaldi_io.write_vec_int(f_out, random_ali, key=utt)
        logger.info(f"Done. Processed {i} utterance")
コード例 #5
0


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    AliStretchModel.add_args(parser)
    parser.add_argument('--seed', type=int, default=0, help='Random seed')
    parser.add_argument("ali_rspecifier",  help="align RSpecifier")
    parser.add_argument('ali_wspecifier', help='WSpecifier for straighted ali')

    args = parser.parse_args()
    logger.info(vars(args))
    if args.ali_wspecifier == 'ark:-':
        args.ali_wspecifier = sys.stdout.buffer
    if args.ali_rspecifier == 'ark:-':
        args.ali_rspecifier = sys.stdin.buffer
    # logger.info(f"Random seed is {args.seed}")
    np.random.seed(args.seed)

    model = AliStretchModel.build_from_disk(args)

    with kaldi_io.open_or_fd(args.ali_wspecifier, mode='wb') as f_out:
        i = 0
        for utt, ali in kaldi_io.read_vec_int_ark(args.ali_rspecifier):
            i += 1
            logger.debug(f'Process {utt}')
            s_ali = model(ali)
            logger.debug(f'Generated {s_ali.shape} ali from {ali.shape}')
            kaldi_io.write_vec_int(f_out, s_ali, key=utt)
        logger.info(f"Done. Processed {i} utterance")