po.register_int("frame-shift", 10, "Length of frame shift in ms " "default is 10ms") po.register_int("nfft", 256, "Number of DFT points " "default is 256") po.register_int( "arma-order", 5, "Length of ARMA window that will be applied " "to the spectrogram") po.register_int("ltsv-ctx-window", 50, "Context window for LTSV computation " "default is 50") po.register_float( "threshold", 0.01, "Parameter for sigmoid scaling in LTSV " "default is 0.01") po.register_float( "slope", 0.001, "Parameter for sigmoid scaling in LTSV " "default is 0.001") po.register_bool("sigmoid-scale", True, "Apply sigmoid scaling in LTSV " "default is True") po.register_int("dct-num-cep", 5, "DCT number of coefficitents " "default is 5") po.register_int("dct-ctx-window", 30, "DCT context window " "default is 30") po.register_bool("test-plot", False, "Produces a plot for testing " "default is False") opts = po.parse_args() if (po.num_args() != 2): po.print_usage() sys.exit() wav_rspecifier = po.get_arg(1) feats_wspecifier = po.get_arg(2)
"Length of ARMA window that will be applied to the spectrogram", ) po.register_int( "ltsv-ctx-window", 50, "Context window for LTSV computation (default: 50)", ) po.register_float( "threshold", 0.01, "Parameter for sigmoid scaling in LTSV (default: 0.01)", ) po.register_float( "slope", 0.001, "Parameter for sigmoid scaling in LTSV (default: 0.001)") po.register_bool("sigmoid-scale", True, "Apply sigmoid scaling in LTSV (default: True)") po.register_int("dct-num-cep", 5, "DCT number of coefficitents (default: 5)") po.register_int("dct-ctx-window", 30, "DCT context window (default: 30)") po.register_bool("test-plot", False, "Produces a plot for testing (default: False)") opts = po.parse_args() if po.num_args() != 2: po.print_usage() sys.exit() wav_rspecifier = po.get_arg(1) feats_wspecifier = po.get_arg(2)
.format(__version__), level=logging.INFO) usage = """Decode features using GMM-based model. Usage: gmm-decode-faster.py [options] model-in fst-in features-rspecifier words-wspecifier [alignments-wspecifier [lattice-wspecifier]] Note: lattices, if output, will just be linear sequences; use gmm-latgen-faster if you want "real" lattices. """ po = ParseOptions(usage) decoder_opts = FasterDecoderOptions() decoder_opts.register(po, True) po.register_float("acoustic-scale", 0.1, "Scaling factor for acoustic likelihoods") po.register_bool("allow-partial", True, "Produce output even when final state was not reached") po.register_str("word-symbol-table", "", "Symbol table for words [for debug output]"); opts = po.parse_args() if po.num_args() < 4 or po.num_args() > 6: po.print_usage() sys.exit() model_rxfilename = po.get_arg(1) fst_rxfilename = po.get_arg(2) feature_rspecifier = po.get_arg(3) words_wspecifier = po.get_arg(4) alignment_wspecifier = po.get_opt_arg(5) lattice_wspecifier = po.get_opt_arg(6)
return num_success != 0 if __name__ == '__main__': usage = """Create MFCC feature files. Usage: compute-mfcc-feats [options...] <wav-rspecifier> <feats-wspecifier> """ po = ParseOptions(usage) mfcc_opts = MfccOptions() mfcc_opts.register(po) po.register_bool( "subtract-mean", False, "Subtract mean of each feature" "file [CMS]; not recommended to do it this way.") po.register_float( "vtln-warp", 1.0, "Vtln warp factor (only applicable " "if vtln-map not specified)") po.register_str( "vtln-map", "", "Map from utterance or speaker-id to " "vtln warp factor (rspecifier)") po.register_str( "utt2spk", "", "Utterance to speaker-id map rspecifier" "(if doing VTLN and you have warps per speaker)") po.register_int( "channel", -1, "Channel to extract (-1 -> expect mono, " "0 -> left, 1 -> right)") po.register_float( "min-duration", 0.0, "Minimum duration of segments "
Posterior-formatted posterior: <uttid> [[(0,0.1), (1,0.89), (5,0.01)], [(1,0,9), (5,0.1)], ... [(0,0.8), (1,0.2)]] ... Usage: feat-to-post.py [options] feature_rspecifier posteriors_wspecifier e.g. feat-to-post scp:feats.scp ark:post.ark """ po = ParseOptions(usage) po.register_int("top-n", 10, "only keep highest N posteriors per frame, 10 by default") po.register_bool("rescale", False, "rescale top N posteriors to let summation equals to 1, false by default") opts = po.parse_args() if (po.num_args() != 2): po.print_usage() sys.exit() feature_rspecifier = po.get_arg(1) posterior_wspecifier = po.get_arg(2) isSuccess = feat_to_post(feature_rspecifier, posterior_wspecifier, opts.top_n, opts.rescale) if not isSuccess: sys.exit()
usage = """Copy matrices, or archives of matrices (e.g. features or transforms) Also see copy-feats which has other format options Usage: copy-matrix [options] <matrix-in-rspecifier> <matrix-out-wspecifier> or copy-matrix [options] <matrix-in-rxfilename> <matrix-out-wxfilename> e.g. copy-matrix --binary=false 1.mat - copy-matrix ark:2.trans ark,t:- """ po = ParseOptions(usage) po.register_bool( "binary", True, "Write in binary mode (only relevant if output is a wxfilename)") po.register_float( "scale", 1.0, "This option can be used to scale the matrices being copied.") po.register_bool( "apply-log", False, "This option can be used to apply log on the matrices. Must be avoided if matrix has negative quantities." ) po.register_bool("apply-exp", False, "This option can be used to apply exp on the matrices") po.register_float( "apply-power", 1.0, "This option can be used to apply a power on the matrices") po.register_bool( "apply-softmax-per-row", False,
if __name__ == '__main__': # Configure log messages to look like Kaldi messages from kaldi import __version__ logging.addLevelName(20, "LOG") logging.basicConfig(format="%(levelname)s (%(module)s[{}]:%(funcName)s():" "%(filename)s:%(lineno)s) %(message)s" .format(__version__), level=logging.INFO) usage = """Compute the counts of *feature-formatted* posterior for each mixture. If --normalize=True and --per-utt=False, the counts will be averaged by the number of utterances. Usage: post-count.py [options] feature_rspecifier posteriors_wspecifier e.g. post-count scp:feats.scp ark,t:count.txt """ po = ParseOptions(usage) po.register_bool("normalize", False, "normalize the counts, False by default") po.register_bool("per-utt", False, "Count per utterance, False by default") opts = po.parse_args() if (po.num_args() != 2): po.print_usage() sys.exit() feature_rspecifier = po.get_arg(1) posterior_wspecifier = po.get_arg(2) isSuccess = post_to_count(feature_rspecifier, posterior_wspecifier, normalize=opts.normalize, per_utt=opts.per_utt) if not isSuccess: sys.exit()