def tsne_vector(vector_rspecifier, vector_wspecifier, output_dim=2, perplexity=30, learning_rate=200.0, n_iter=1000, distance='euclidean', verbose=0): vectors = [] with SequentialVectorReader(vector_rspecifier) as vector_reader: for uttid, vector in vector_reader: vectors.append(vector.numpy()) # vectors is a set of row vectors indexed by utterance id vectors = np.array(vectors) tsne = TSNE(n_components=output_dim, perplexity=perplexity, learning_rate=learning_rate, metric=distance, verbose=verbose) low_dim_vectors = tsne.fit_transform( vectors ) ## return a numpy array of row vectors indexed by utterance id with SequentialVectorReader(vector_rspecifier) as vector_reader, \ VectorWriter(vector_wspecifier) as vector_writer: for i, (uttid, _) in enumerate(vector_reader): vector_writer[uttid] = low_dim_vectors[i] return True
def extract_embedding_to_hardisk(self, test_loader, embed_wspecifier): print( '>> Extracting utternace embeddings and write it to {}...'.format( embed_wspecifier)) uttids = test_loader.dataset.uttids self.model.eval() with torch.no_grad(): with VectorWriter(embed_wspecifier) as vector_writer: for i, (feat2d, _) in enumerate(test_loader): feat2d = feat2d.to(self.root_device) embed = self.model.get_embed(feat2d) vector_writer[ uttids[i]] = embed.squeeze().cpu().data.numpy() print(">> finish extracting utterance embeddings")
class PosteriorWriter(): def __init__(self, wxspecifier): self.posterior_writer = VectorWriter(wxspecifier) def write(self, utt_id, counts, posteriors, indices): """Writes posteriors to disk in KALDI format. Arguments: utt_id {string} -- Utterance ID to be written to scp file counts {Tensor} -- Tensor containing the numbers of selected posteriors for each frame posteriors {Tensor} -- Flattened Tensor containing all posteriors indices {Tensor} -- Flattened Tensor containing all Gaussian indices """ counts = counts.numpy() posteriors = posteriors.numpy() indices = indices.numpy() nframes = np.atleast_1d(np.array([counts.size])) datavector = np.hstack([nframes, counts, posteriors, indices]) datavector = Vector(datavector) self.posterior_writer.write(utt_id, datavector) def close(self): self.posterior_writer.close()
def post_to_count(feature_rspecifier, cnt_wspecifier, normalize=False, per_utt=False): with SequentialMatrixReader(feature_rspecifier) as feature_reader, \ VectorWriter(cnt_wspecifier) as cnt_writer: if per_utt: for uttid, feat in feature_reader: cnt_writer[uttid] = Vector(feat.numpy().mean(axis=0)) else: vec = 0 num_done = 0 for uttid, feat in feature_reader: vec = vec + feat.numpy().mean(axis=0) num_done = num_done + 1 if normalize: vec = vec / num_done cnt_writer[str(num_done)] = Vector(vec) return True
def pca_vector(vector_rspecifier, vector_wspecifier, output_dim=2): vectors = [] uttids = [] with SequentialVectorReader(vector_rspecifier) as vector_reader: for uttid, vector in vector_reader: uttids.append(uttid) vectors.append(vector.numpy()) # vectors is a set of row vectors indexed by utterance id vectors = np.array(vectors) pca = PCA(n_components=output_dim) low_dim_vectors = pca.fit_transform(vectors) logging.info( "The variance explained ratio for each dim of the dim-reduced vectors is {}" .format(pca.explained_variance_ratio_)) with VectorWriter(vector_wspecifier) as vector_writer: for i, vector in enumerate(low_dim_vectors): vector_writer[uttids[i]] = vector return True
def feat_to_count(feature_rspecifier, cnt_wspecifier, normalize=False, per_utt=False): with SequentialMatrixReader(feature_rspecifier) as feature_reader, \ VectorWriter(cnt_wspecifier) as cnt_writer: if per_utt: for uttid, feat in feature_reader: cnt_writer[uttid] = Vector(feat.numpy().mean(axis=0)) else: vec = 0 num_done = 0 for uttid, feat in feature_reader: vec = vec + feat.numpy().mean(axis=0) num_done = num_done + 1 if normalize: vec = vec / num_done # post = zip(range(len(vec)), vec.tolist()) # posterior_writer[str(num_done)] = Posterior().from_posteriors([post]) cnt_writer[str(num_done)] = Vector(vec) return True
def compute_vad(wav_rspecifier, feats_wspecifier, opts): """This function computes the vad based on ltsv features. The output is written in the file denoted by feats_wspecifier, and if the test_plot flag is set, it produces a plot. Args: wav_rspecifier: Kaldi specifier for reading wav files. feats_wspecifier: Kaldi wpscifier for writing feature files. opts: Options. See main function for list of options Returns: True if computation was successful for at least one file. False otherwise. """ num_utts, num_success = 0, 0 with SequentialWaveReader(wav_rspecifier) as reader, \ VectorWriter(feats_wspecifier) as writer: for num_utts, (key, wave) in enumerate(reader, 1): if wave.duration < opts.min_duration: print( "File: {} is too short ({} sec): " "producing no output.".format(key, wave.duration), file=sys.stderr, ) continue num_chan = wave.data().num_rows if opts.channel >= num_chan: print( "File with id {} has {} channels but you specified " "channel {}, producing no output.", file=sys.stderr, ) continue channel = 0 if opts.channel == -1 else opts.channel fr_length_samples = int(opts.frame_window * wave.samp_freq * (10**(-3))) fr_shift_samples = int(opts.frame_shift * wave.samp_freq * (10**(-3))) assert opts.nfft >= fr_length_samples wav_data = np.squeeze(wave.data()[channel].numpy()) sample_freqs, segment_times, spec = signal.spectrogram( wav_data, fs=wave.samp_freq, nperseg=fr_length_samples, nfft=opts.nfft, noverlap=fr_length_samples - fr_shift_samples, scaling="spectrum", mode="psd", ) specT = np.transpose(spec) spect_n = ARMA.ApplyARMA(specT, opts.arma_order) ltsv_f = LTSV.ApplyLTSV( spect_n, opts.ltsv_ctx_window, opts.threshold, opts.slope, opts.sigmoid_scale, ) vad_feat = DCTF.ApplyDCT(opts.dct_num_cep, opts.dct_ctx_window, ltsv_f) if opts.test_plot: show_plot( key, segment_times, sample_freqs, spec, wave.duration, wav_data, vad_feat, ) writer[key] = Vector(vad_feat) num_success += 1 if num_utts % 10 == 0: print("Processed {} utterances".format(num_utts), file=sys.stderr) print( "Done {} out of {} utterances".format(num_success, num_utts), file=sys.stderr, ) return num_success != 0
def compute_vad(wav_rspecifier, feats_wspecifier, opts): """This function computes the vad based on ltsv features. The output is written in the file denoted by feats_wspecifier, and if the test_plot flaf is set, it produces a plot. Args: wav_rspecifier: An ark or scp file as in Kaldi, that contains the input audio feats_wspecifier: An ark or scp file as in Kaldi, that contains the input audio opts: Options. See main function for list of options Returns: The number of successful trials. """ num_utts, num_success = 0, 0 with SequentialWaveReader(wav_rspecifier) as reader, \ VectorWriter(feats_wspecifier) as writer: for num_utts, (key, wave) in enumerate(reader, 1): if wave.duration < opts.min_duration: print("File: {} is too short ({} sec): producing no output.". format(key, wave.duration), file=sys.stderr) continue num_chan = wave.data().num_rows if opts.channel >= num_chan: print( "File with id {} has {} channels but you specified " "channel {}, producing no output.", file=sys.stderr) continue channel = 0 if opts.channel == -1 else opts.channel fr_length_samples = int(opts.frame_window * wave.samp_freq * (10**(-3))) fr_shift_samples = int(opts.frame_shift * wave.samp_freq * (10**(-3))) try: wav_data = np.squeeze(wave.data()[channel].numpy()) sample_freqs, segment_times, spec = signal.spectrogram( wav_data, fs=wave.samp_freq, nperseg=fr_length_samples, nfft=opts.nfft, noverlap=fr_length_samples - fr_shift_samples, scaling='spectrum', mode='psd') specT = np.transpose(spec) spect_n = ARMA.ApplyARMA(specT, opts.arma_order) ltsv_f = LTSV.ApplyLTSV(spect_n, opts.ltsv_ctx_window, opts.threshold, opts.slope, opts.sigmoid_scale) vad_feat = DCTF.ApplyDCT(opts.dct_num_cep, opts.dct_ctx_window, ltsv_f) feats = Vector(vad_feat) if opts.test_plot: show_plot(segment_times, sample_freqs, spec, wave, wav_data, vad_feat) except: print("Failed to compute features for utterance", key, file=sys.stderr) continue writer[key] = feats num_success += 1 if num_utts % 10 == 0: print("Processed {} utterances".format(num_utts), file=sys.stderr) print("Done {} out of {} utterances".format(num_success, num_utts), file=sys.stderr) return num_success != 0
def __init__(self, wxspecifier): self.posterior_writer = VectorWriter(wxspecifier)