예제 #1
0
def tsne_vector(vector_rspecifier,
                vector_wspecifier,
                output_dim=2,
                perplexity=30,
                learning_rate=200.0,
                n_iter=1000,
                distance='euclidean',
                verbose=0):
    vectors = []
    with SequentialVectorReader(vector_rspecifier) as vector_reader:
        for uttid, vector in vector_reader:
            vectors.append(vector.numpy())
    # vectors is a set of row vectors indexed by utterance id
    vectors = np.array(vectors)
    tsne = TSNE(n_components=output_dim,
                perplexity=perplexity,
                learning_rate=learning_rate,
                metric=distance,
                verbose=verbose)
    low_dim_vectors = tsne.fit_transform(
        vectors
    )  ## return a numpy array of row vectors indexed by utterance id
    with SequentialVectorReader(vector_rspecifier) as vector_reader, \
          VectorWriter(vector_wspecifier) as vector_writer:
        for i, (uttid, _) in enumerate(vector_reader):
            vector_writer[uttid] = low_dim_vectors[i]
    return True
예제 #2
0
 def extract_embedding_to_hardisk(self, test_loader, embed_wspecifier):
     print(
         '>> Extracting utternace embeddings and write it to {}...'.format(
             embed_wspecifier))
     uttids = test_loader.dataset.uttids
     self.model.eval()
     with torch.no_grad():
         with VectorWriter(embed_wspecifier) as vector_writer:
             for i, (feat2d, _) in enumerate(test_loader):
                 feat2d = feat2d.to(self.root_device)
                 embed = self.model.get_embed(feat2d)
                 vector_writer[
                     uttids[i]] = embed.squeeze().cpu().data.numpy()
     print(">> finish extracting utterance embeddings")
예제 #3
0
class PosteriorWriter():
    def __init__(self, wxspecifier):
        self.posterior_writer = VectorWriter(wxspecifier)

    def write(self, utt_id, counts, posteriors, indices):
        """Writes posteriors to disk in KALDI format.
        
        Arguments:
            utt_id {string} -- Utterance ID to be written to scp file
            counts {Tensor} -- Tensor containing the numbers of selected posteriors for each frame
            posteriors {Tensor} -- Flattened Tensor containing all posteriors
            indices {Tensor} -- Flattened Tensor containing all Gaussian indices
        """

        counts = counts.numpy()
        posteriors = posteriors.numpy()
        indices = indices.numpy()
        nframes = np.atleast_1d(np.array([counts.size]))
        datavector = np.hstack([nframes, counts, posteriors, indices])
        datavector = Vector(datavector)
        self.posterior_writer.write(utt_id, datavector)

    def close(self):
        self.posterior_writer.close()
예제 #4
0
def post_to_count(feature_rspecifier, cnt_wspecifier, normalize=False, per_utt=False):
  with SequentialMatrixReader(feature_rspecifier) as feature_reader, \
          VectorWriter(cnt_wspecifier) as cnt_writer:
      if per_utt:
        for uttid, feat in feature_reader:
          cnt_writer[uttid] = Vector(feat.numpy().mean(axis=0))
      else:
        vec = 0
        num_done = 0
        for uttid, feat in feature_reader:
          vec = vec + feat.numpy().mean(axis=0)
          num_done = num_done + 1
        if normalize:
          vec = vec / num_done
        cnt_writer[str(num_done)] = Vector(vec)
  return True
예제 #5
0
def pca_vector(vector_rspecifier, vector_wspecifier, output_dim=2):
    vectors = []
    uttids = []
    with SequentialVectorReader(vector_rspecifier) as vector_reader:
        for uttid, vector in vector_reader:
            uttids.append(uttid)
            vectors.append(vector.numpy())
    # vectors is a set of row vectors indexed by utterance id
    vectors = np.array(vectors)
    pca = PCA(n_components=output_dim)
    low_dim_vectors = pca.fit_transform(vectors)
    logging.info(
        "The variance explained ratio for each dim of the dim-reduced vectors is {}"
        .format(pca.explained_variance_ratio_))
    with VectorWriter(vector_wspecifier) as vector_writer:
        for i, vector in enumerate(low_dim_vectors):
            vector_writer[uttids[i]] = vector
    return True
예제 #6
0
def feat_to_count(feature_rspecifier,
                  cnt_wspecifier,
                  normalize=False,
                  per_utt=False):
    with SequentialMatrixReader(feature_rspecifier) as feature_reader, \
            VectorWriter(cnt_wspecifier) as cnt_writer:
        if per_utt:
            for uttid, feat in feature_reader:
                cnt_writer[uttid] = Vector(feat.numpy().mean(axis=0))
        else:
            vec = 0
            num_done = 0
            for uttid, feat in feature_reader:
                vec = vec + feat.numpy().mean(axis=0)
                num_done = num_done + 1
            if normalize:
                vec = vec / num_done
            # post = zip(range(len(vec)), vec.tolist())
            # posterior_writer[str(num_done)] = Posterior().from_posteriors([post])
            cnt_writer[str(num_done)] = Vector(vec)
    return True
예제 #7
0
def compute_vad(wav_rspecifier, feats_wspecifier, opts):
    """This function computes the vad based on ltsv features.

    The output is written in the file denoted by feats_wspecifier,
    and if the test_plot flag is set, it produces a plot.

    Args:
        wav_rspecifier: Kaldi specifier for reading wav files.
        feats_wspecifier:  Kaldi wpscifier for writing feature files.
        opts: Options. See main function for list of options

    Returns:
        True if computation was successful for at least one file.
        False otherwise.
    """

    num_utts, num_success = 0, 0
    with SequentialWaveReader(wav_rspecifier) as reader, \
         VectorWriter(feats_wspecifier) as writer:

        for num_utts, (key, wave) in enumerate(reader, 1):
            if wave.duration < opts.min_duration:
                print(
                    "File: {} is too short ({} sec): "
                    "producing no output.".format(key, wave.duration),
                    file=sys.stderr,
                )
                continue

            num_chan = wave.data().num_rows
            if opts.channel >= num_chan:
                print(
                    "File with id {} has {} channels but you specified "
                    "channel {}, producing no output.",
                    file=sys.stderr,
                )
                continue

            channel = 0 if opts.channel == -1 else opts.channel

            fr_length_samples = int(opts.frame_window * wave.samp_freq *
                                    (10**(-3)))
            fr_shift_samples = int(opts.frame_shift * wave.samp_freq *
                                   (10**(-3)))

            assert opts.nfft >= fr_length_samples

            wav_data = np.squeeze(wave.data()[channel].numpy())

            sample_freqs, segment_times, spec = signal.spectrogram(
                wav_data,
                fs=wave.samp_freq,
                nperseg=fr_length_samples,
                nfft=opts.nfft,
                noverlap=fr_length_samples - fr_shift_samples,
                scaling="spectrum",
                mode="psd",
            )

            specT = np.transpose(spec)

            spect_n = ARMA.ApplyARMA(specT, opts.arma_order)

            ltsv_f = LTSV.ApplyLTSV(
                spect_n,
                opts.ltsv_ctx_window,
                opts.threshold,
                opts.slope,
                opts.sigmoid_scale,
            )

            vad_feat = DCTF.ApplyDCT(opts.dct_num_cep, opts.dct_ctx_window,
                                     ltsv_f)

            if opts.test_plot:
                show_plot(
                    key,
                    segment_times,
                    sample_freqs,
                    spec,
                    wave.duration,
                    wav_data,
                    vad_feat,
                )

            writer[key] = Vector(vad_feat)
            num_success += 1

            if num_utts % 10 == 0:
                print("Processed {} utterances".format(num_utts),
                      file=sys.stderr)

    print(
        "Done {} out of {} utterances".format(num_success, num_utts),
        file=sys.stderr,
    )

    return num_success != 0
예제 #8
0
def compute_vad(wav_rspecifier, feats_wspecifier, opts):
    """This function computes the vad based on ltsv features.
  The output is written in the file denoted by feats_wspecifier,
  and if the test_plot flaf is set, it produces a plot.

  Args:
      wav_rspecifier: An ark or scp file as in Kaldi, that contains the input audio
      feats_wspecifier:  An ark or scp file as in Kaldi, that contains the input audio
      opts: Options. See main function for list of options
 
  Returns:
      The number of successful trials.
  """

    num_utts, num_success = 0, 0
    with SequentialWaveReader(wav_rspecifier) as reader, \
           VectorWriter(feats_wspecifier) as writer:

        for num_utts, (key, wave) in enumerate(reader, 1):
            if wave.duration < opts.min_duration:
                print("File: {} is too short ({} sec): producing no output.".
                      format(key, wave.duration),
                      file=sys.stderr)
                continue

            num_chan = wave.data().num_rows
            if opts.channel >= num_chan:
                print(
                    "File with id {} has {} channels but you specified "
                    "channel {}, producing no output.",
                    file=sys.stderr)
                continue
            channel = 0 if opts.channel == -1 else opts.channel

            fr_length_samples = int(opts.frame_window * wave.samp_freq *
                                    (10**(-3)))
            fr_shift_samples = int(opts.frame_shift * wave.samp_freq *
                                   (10**(-3)))

            try:

                wav_data = np.squeeze(wave.data()[channel].numpy())

                sample_freqs, segment_times, spec = signal.spectrogram(
                    wav_data,
                    fs=wave.samp_freq,
                    nperseg=fr_length_samples,
                    nfft=opts.nfft,
                    noverlap=fr_length_samples - fr_shift_samples,
                    scaling='spectrum',
                    mode='psd')

                specT = np.transpose(spec)

                spect_n = ARMA.ApplyARMA(specT, opts.arma_order)

                ltsv_f = LTSV.ApplyLTSV(spect_n, opts.ltsv_ctx_window,
                                        opts.threshold, opts.slope,
                                        opts.sigmoid_scale)

                vad_feat = DCTF.ApplyDCT(opts.dct_num_cep, opts.dct_ctx_window,
                                         ltsv_f)

                feats = Vector(vad_feat)

                if opts.test_plot:
                    show_plot(segment_times, sample_freqs, spec, wave,
                              wav_data, vad_feat)

            except:
                print("Failed to compute features for utterance",
                      key,
                      file=sys.stderr)
                continue

            writer[key] = feats
            num_success += 1

            if num_utts % 10 == 0:
                print("Processed {} utterances".format(num_utts),
                      file=sys.stderr)

    print("Done {} out of {} utterances".format(num_success, num_utts),
          file=sys.stderr)

    return num_success != 0
예제 #9
0
 def __init__(self, wxspecifier):
     self.posterior_writer = VectorWriter(wxspecifier)