def testInt32VectorReadWrite(self): """ Test read/write for int32 vectors. """ # read, i32_vec = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary, i32_vec2 = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii, # re-save the data, with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark','wb') as f: for k,v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k) # read and make sure it is the same, for k,v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'): self.assertTrue(np.array_equal(v,i32_vec[k]), msg="int32 vector same after re-saving")
def testInt32VectorReadWrite(self): """ Test read/write for int32 vectors. """ # read, i32_vec = { k: v for k, v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary, i32_vec2 = { k: v for k, v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii, # re-save the data, with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark', 'wb') as f: for k, v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k) # read and make sure it is the same, for k, v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'): self.assertTrue(np.array_equal(v, i32_vec[k]), msg="int32 vector same after re-saving")
#!/usr/bin/env python import numpy as np import kaldi_io print('testing int32-vector i/o') i32_vec = {k: v for k, v in kaldi_io.read_vec_int_ark('data/ali.ark')} # binary, i32_vec2 = {k: v for k, v in kaldi_io.read_vec_int_ark('data/ali_ascii.ark') } # ascii, # - store, with kaldi_io.open_or_fd('data_re-saved/ali.ark', 'wb') as f: for k, v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k) # - read and compare, for k, v in kaldi_io.read_vec_int_ark('data_re-saved/ali.ark'): assert (np.array_equal(v, i32_vec[k])) print('testing float-vector i/o') flt_vec = {k: v for k, v in kaldi_io.read_vec_flt_scp('data/conf.scp')} # scp, flt_vec2 = {k: v for k, v in kaldi_io.read_vec_flt_ark('data/conf.ark') } # binary-ark, flt_vec3 = {k: v for k, v in kaldi_io.read_vec_flt_ark('data/conf_ascii.ark') } # ascii-ark, # - store, with kaldi_io.open_or_fd('data_re-saved/conf.ark', 'wb') as f: for k, v in flt_vec.items(): kaldi_io.write_vec_flt(f, v, k)
tid_path = np.array([arc[2] for arc in path], dtype=np.int32) return tid_path if __name__ == "__main__": parser = argparse.ArgumentParser() #TidAMModel.add_args(parser) parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument("train_graphs_ark", nargs='?', type=argparse.FileType(mode="r"), default=sys.stdin) parser.add_argument('ali_wspecifier', help='WSpecifier for avg logits') args = parser.parse_args() logger.info(vars(args)) if args.ali_wspecifier == 'ark:-': args.ali_wspecifier = sys.stdout.buffer #logger.info(f"Random seed is {args.seed}") np.random.seed(args.seed) fst_reader = FstParser() with kaldi_io.open_or_fd(args.ali_wspecifier, mode='wb') as f_out: i=0 for utt, fst in tqdm.tqdm(fst_reader.iterate_file(args.train_graphs_ark)): i+=1 logger.debug(f'Process {utt}') random_ali = get_random_ali(fst) logger.debug(f'Generated {random_ali.shape} utt ali') kaldi_io.write_vec_int(f_out, random_ali, key=utt) logger.info(f"Done. Processed {i} utterance")
if __name__ == "__main__": parser = argparse.ArgumentParser() AliStretchModel.add_args(parser) parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument("ali_rspecifier", help="align RSpecifier") parser.add_argument('ali_wspecifier', help='WSpecifier for straighted ali') args = parser.parse_args() logger.info(vars(args)) if args.ali_wspecifier == 'ark:-': args.ali_wspecifier = sys.stdout.buffer if args.ali_rspecifier == 'ark:-': args.ali_rspecifier = sys.stdin.buffer # logger.info(f"Random seed is {args.seed}") np.random.seed(args.seed) model = AliStretchModel.build_from_disk(args) with kaldi_io.open_or_fd(args.ali_wspecifier, mode='wb') as f_out: i = 0 for utt, ali in kaldi_io.read_vec_int_ark(args.ali_rspecifier): i += 1 logger.debug(f'Process {utt}') s_ali = model(ali) logger.debug(f'Generated {s_ali.shape} ali from {ali.shape}') kaldi_io.write_vec_int(f_out, s_ali, key=utt) logger.info(f"Done. Processed {i} utterance")