Exemple #1
0
        "ltsv-ctx-window",
        50,
        "Context window for LTSV computation (default: 50)",
    )
    po.register_float(
        "threshold",
        0.01,
        "Parameter for sigmoid scaling in LTSV (default: 0.01)",
    )
    po.register_float(
        "slope", 0.001,
        "Parameter for sigmoid scaling in LTSV (default: 0.001)")
    po.register_bool("sigmoid-scale", True,
                     "Apply sigmoid scaling in LTSV (default: True)")
    po.register_int("dct-num-cep", 5,
                    "DCT number of coefficitents (default: 5)")
    po.register_int("dct-ctx-window", 30, "DCT context window (default: 30)")
    po.register_bool("test-plot", False,
                     "Produces a plot for testing (default: False)")

    opts = po.parse_args()

    if po.num_args() != 2:
        po.print_usage()
        sys.exit()

    wav_rspecifier = po.get_arg(1)
    feats_wspecifier = po.get_arg(2)

    compute_vad(wav_rspecifier, feats_wspecifier, opts)
Exemple #2
0
        "%(filename)s:%(lineno)s) %(message)s".format(__version__),
        level=logging.INFO)
    usage = """Use Principal component analysis for dimension reduction.
  For the details, Please refer to website:
  https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

  Usage: pca-vector.py [options] <vector-rspecifier> <vector-wspecifier

  e.g.
      pca-vector.py scp:data/train/ivector.scp ark:data/train/low_dim_vector.ark

  see also: two-dim-vector-visual.py
  """
    po = ParseOptions(usage)
    po.register_int(
        "output-dim", 2, "dimension of the output vectors."
        " For visualization, only 2 is allowed in this program. (2 by default)"
    )
    opts = po.parse_args()
    if (po.num_args() != 2):
        po.print_usage()
        sys.exit()

    vector_rspecifier = po.get_arg(1)
    vector_wspecifier = po.get_arg(2)
    isSuccess = pca_vector(vector_rspecifier,
                           vector_wspecifier,
                           output_dim=opts.output_dim)
    if not isSuccess:
        sys.exit()
Exemple #3
0
   Posterior-formatted posterior:
     <uttid> [[(0,0.1), (1,0.89), (5,0.01)],
              [(1,0,9), (5,0.1)],
                ...
              [(0,0.8), (1,0.2)]]
       ... 

  Usage: feat-to-post.py [options] feature_rspecifier posteriors_wspecifier

  e.g.
      feat-to-post scp:feats.scp ark:post.ark

  """
  po = ParseOptions(usage)
  po.register_int("top-n", 10,
                  "only keep highest N posteriors per frame, 10 by default")
  po.register_bool("rescale", False,
                   "rescale top N posteriors to let summation equals to 1, false by default")
  opts = po.parse_args()

  if (po.num_args() != 2):
    po.print_usage()
    sys.exit()

  feature_rspecifier = po.get_arg(1)
  posterior_wspecifier = po.get_arg(2)
  isSuccess = feat_to_post(feature_rspecifier, posterior_wspecifier,
                           opts.top_n, opts.rescale)
  if not isSuccess:
    sys.exit()
Exemple #4
0
    Note: lattices, if output, will just be linear sequences;
          use gmm-latgen-faster if you want "real" lattices.
    """
    po = ParseOptions(usage)
    decoder_opts = FasterDecoderOptions()
    decoder_opts.register(po, True)
    po.register_float("acoustic-scale", 0.1,
                      "Scaling factor for acoustic likelihoods")
    po.register_bool("allow-partial", True,
                     "Produce output even when final state was not reached")
    po.register_str("word-symbol-table", "",
                    "Symbol table for words [for debug output]");
    opts = po.parse_args()

    if po.num_args() < 4 or po.num_args() > 6:
        po.print_usage()
        sys.exit()

    model_rxfilename = po.get_arg(1)
    fst_rxfilename = po.get_arg(2)
    feature_rspecifier = po.get_arg(3)
    words_wspecifier = po.get_arg(4)
    alignment_wspecifier = po.get_opt_arg(5)
    lattice_wspecifier = po.get_opt_arg(6)

    gmm_decode_faster(model_rxfilename, fst_rxfilename,
                      feature_rspecifier, words_wspecifier,
                      alignment_wspecifier, lattice_wspecifier,
                      opts.word_symbol_table, opts.acoustic_scale,
                      opts.allow_partial, decoder_opts)
Exemple #5
0
    from kaldi import __version__
    logging.addLevelName(20, 'LOG')
    logging.basicConfig(
        format='%(levelname)s (%(module)s[{}]:%(funcName)s():'
        '%(filename)s:%(lineno)s) %(message)s'.format(__version__),
        level=logging.INFO)

    usage = """Extract segments from a large audio file in WAV format.
    Usage:
        extract-segments [options] <wav-rspecifier> <segments-file> <wav-wspecifier>
    """
    po = ParseOptions(usage)
    po.register_float(
        "min-segment-length", 0.1, "Minimum segment length "
        "in seconds (reject shorter segments)")
    po.register_float(
        "max_overshoot", 0.5, "End segments overshooting audio "
        "by less than this (in seconds) are truncated, "
        "else rejected.")

    opts = po.parse_args()
    if po.num_args() != 3:
        po.print_usage()
        sys.exit()

    wav_rspecifier = po.get_arg(1)
    segments_rxfilename = po.get_arg(2)
    wav_wspecifier = po.get_arg(3)

    extract_segments(wav_rspecifier, segments_rxfilename, wav_wspecifier, opts)
Exemple #6
0
    opts = po.parse_args()

    if po.num_args() != 2:
        po.print_usage()
        sys.exit(1)

    if (opts.apply_log and opts.apply_exp) or (opts.apply_softmax_per_row
                                               and opts.apply_exp) or (
                                                   opts.apply_softmax_per_row
                                                   and opts.apply_log):
        print(
            "Only one of apply-log, apply-exp and apply-softmax-per-row can be given",
            file=sys.stderr)
        sys.exit(1)

    matrix_in_fn = po.get_arg(1)
    matrix_out_fn = po.get_arg(2)

    in_is_rspecifier = classify_rspecifier(
        matrix_in_fn)[0] != RspecifierType.NO_SPECIFIER
    out_is_wspecifier = classify_wspecifier(
        matrix_out_fn)[0] != WspecifierType.NO_SPECIFIER

    if in_is_rspecifier != out_is_wspecifier:
        print("Cannot mix archives with regular files (copying matrices)",
              file=sys.stderr)
        sys.exit(1)

    if not in_is_rspecifier:
        mat = read_matrix(matrix_in_fn)
        if opts.scale != 1.0:
    sw02005-B sw02005 B
    interpreted as <utterance-id> <call-id> <side> and for each <call-id>
    that has two sides, does the 'only-the-louder' computation, else does
    per-utterance stats in the normal way.
    Note: loudness is judged by the first feature component, either energy or c0
    only applicable to MFCCs or PLPs (this code could be modified to handle filterbanks).

    Usage: compute-cmvn-stats-two-channel [options] <reco2file-and-channel> <feats-rspecifier> <stats-wspecifier>
    e.g.: compute-cmvn-stats-two-channel data/train_unseg/reco2file_and_channel scp:data/train_unseg/feats.scp ark,t:-
    """

    po = ParseOptions(usage)

    po.register_float(
        "quieter_channel_weight", 0.01, "For the quieter channel,"
        " apply this weight to the stats, so that we still get "
        "stats if one channel always dominates.")

    opts = po.parse_args()

    if po.num_args() != 3:
        po.print_usage()
        sys.exit(1)

    reco2file_and_channel_rxfilename = po.get_arg(1)
    feats_rspecifier = po.get_arg(2)
    stats_wspecifier = po.get_arg(3)

    compute_cmvn_stats_two_channel(reco2file_and_channel_rxfilename,
                                   feats_rspecifier, stats_wspecifier, opts)
Exemple #8
0
if __name__ == '__main__':
  # Configure log messages to look like Kaldi messages
  from kaldi import __version__
  logging.addLevelName(20, "LOG")
  logging.basicConfig(format="%(levelname)s (%(module)s[{}]:%(funcName)s():"
                             "%(filename)s:%(lineno)s) %(message)s"
                             .format(__version__), level=logging.INFO)
  usage = """save the visualization plot of 2-dimensional vectors to hardisk.

  Usage: two-dim-vector-visual.py [options] <vector-rspecifier> <utt2spk-rxfilename> <figure-rxfilename>

  e.g.
      two-dim-vector-visual.py scp:data/train/2d_vectors.scp data/train/utt2spk data/train/2d_vectors.png
  """
  po = ParseOptions(usage)
  opts = po.parse_args()

  if (po.num_args() != 3):
    po.print_usage()
    sys.exit()

  vector_rspecifier = po.get_arg(1)
  utt2spk_rxfilename = po.get_arg(2)
  figure_rxfilename = po.get_arg(3)
  isSuccess = two_dim_vector_visual(vector_rspecifier,
                                    utt2spk_rxfilename,
                                    figure_rxfilename)
  if not isSuccess:
    sys.exit()
    po = ParseOptions(usage)
    po.register_int(
        "n-clusters", 8,
        "The number of clusters to form as well as the number of centroids to generate. default=8"
    )
    po.register_int(
        "random-state", 0,
        "Determines random number generation for centroid initialization and random reassignment. "
        "Use an int to make the randomness deterministic. ")
    po.register_int("batch-size", 6, "Size of the mini batches.")
    po.register_int(
        "max-iter", 100,
        "Maximum number of iterations over the complete dataset before stopping independently of "
        "any early stopping criterion heuristics.")
    opts = po.parse_args()

    if (po.num_args() != 2):
        po.print_usage()
        sys.exit()

    vector_rspecifier = po.get_arg(1)
    utt2clusterid_rxfilename = po.get_arg(2)
    isSuccess = kmeans_vector(vector_rspecifier,
                              utt2clusterid_rxfilename,
                              n_clusters=opts.n_clusters,
                              random_state=opts.random_state,
                              batch_size=opts.batch_size,
                              max_iter=opts.max_iter)
    if not isSuccess:
        sys.exit()