def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("scale", help="varをscale倍するよう、二次統計量をいじる", type=float) ap.add_argument("dim_for_scaling", help="33-52,3 (same as select-feats)", type=str) ap.add_argument("stats_rspecifier", help="in", type=str) ap.add_argument("stats_wspecifier", help="out", type=str) args = ap.parse_args() dim_for_scaling = FeatureOperator.parse_dim_specifier(args.dim_for_scaling) scale = args.scale with kaldi_io.open_or_fd(args.stats_wspecifier, "wb") as stats_writer: for spk, rstats in kaldi_io.read_mat_ark(args.stats_rspecifier): T, D = rstats.shape assert (T == 2) count = rstats[0, -1] wstats = np.array(rstats) for d in dim_for_scaling: x, x2 = rstats[0, d], rstats[1, d] mean = x / count y1 = scale * x2 y2 = (1 - scale) * count * mean * mean y = y1 + y2 assert (y > 0) wstats[1, d] = y del x, x2, mean, y, y1, y2 kaldi_io.write_mat(stats_writer, wstats, key=spk)
def main_(): ap = argparse.ArgumentParser() ap.add_argument("vec_rspecifier", type=str) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.feat_wspecifier, mode="wb") as feat_writer: for utt, vec in kaldi_io.read_vec_flt_ark(args.vec_rspecifier): feat = vec.reshape((-1, 1)) kaldi_io.write_mat(feat_writer, feat, key=utt)
def main_(): ap = argparse.ArgumentParser() ap.add_argument("ali_rspecifier", type=str) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.feat_wspecifier, mode="wb") as feat_writer: for utt, ali in kaldi_io.read_ali_ark(args.ali_rspecifier): kaldi_io.write_mat(feat_writer, ali.reshape(-1, 1).astype("float32"), key=utt)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): kaldi_io.write_mat(feature_writer, np.abs(feats), key=key)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("--base", type=float, default=math.e) ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): new_feats = args.base**feats kaldi_io.write_mat(feature_writer, new_feats, key=key)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() thresh = 0.5 with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): new_feats = np.where(feats >= thresh, 1.0, 0.0) kaldi_io.write_mat(feature_writer, new_feats, key=key)
def main_(): ap = argparse.ArgumentParser() ap.add_argument("--dim", default=1, type=int) ap.add_argument("feat_rspecifier", type=str) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.feat_wspecifier, "wb") as feature_writer: for utt, feat in kaldi_io.read_mat_ark(args.feat_rspecifier): length = feat.shape[0] new_feats = np.zeros((length, args.dim)) kaldi_io.write_mat(feature_writer, new_feats, key=utt)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("--decimate", default=2, type=int) ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() decimate = args.decimate with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): new_feats = signal.decimate(feats, decimate, axis=0) kaldi_io.write_mat(feature_writer, new_feats, key=key)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("--min", type=float, default=-1) ap.add_argument("--max", type=float, default=1) ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): new_feats = np.where(feats>=args.max, args.max, feats) new_feats = np.where(feats<=args.min, args.min, new_feats) kaldi_io.write_mat(feature_writer, new_feats, key=key)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("--window", default=3, type=int) ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() avg_mask = np.ones(args.window) / args.window with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): dim = feats.shape[1] feats_avg = np.zeros(feats.shape, dtype=float) for d in range(dim): feats_avg[:, d] = np.convolve(feats[:, d], avg_mask, 'same') kaldi_io.write_mat(feature_writer, feats_avg, key=key)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("--write_int", type=str_to_bool, default=True) ap.add_argument("--thresh", type=float, default=0.5) ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): new_feats = np.where(feats>=args.thresh, 1, feats) new_feats = np.where(feats<args.thresh, 0, new_feats) if args.write_int: assert (new_feats.shape[1] == 1) kaldi_io.write_vec_int(feature_writer, new_feats.astype("int32").reshape(new_feats.size), key=key) else: kaldi_io.write_mat(feature_writer, new_feats, key=key)
def main_(): ap = argparse.ArgumentParser() ap.add_argument("feat_src_rspecifier", type=str) ap.add_argument("feat_tgt_rspecifier", type=str) ap.add_argument("tgt_start_dim", type=int) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() tgt_start_dim = args.tgt_start_dim with kaldi_io.open_or_fd(args.feat_wspecifier, mode="wb") as feat_writer: with kaldi_io.open_or_fd(args.feat_tgt_rspecifier, mode="rb") as feat_tgt_reader: for utt, feat_src in kaldi_io.read_mat_ark(args.feat_src_rspecifier): new_feat = feat_src.copy() T1, _ = new_feat.shape feat_tgt = FO.read_next_feat(feat_tgt_reader, key=utt, num_frame=T1) _, D2 = feat_tgt.shape new_feat[:, tgt_start_dim:tgt_start_dim+D2] = feat_tgt kaldi_io.write_mat(feat_writer, new_feat, key=utt)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("--floor_value", default=0, type=float) ap.add_argument("uv_rspecifier", help="input uv") ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() thresh = 0.5 floor = args.floor_value with kaldi_io.open_or_fd(args.uv_rspecifier, "rb") as uv_reader: with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): assert (key == kaldi_io.read_key(uv_reader)) uv = kaldi_io.read_mat(uv_reader) new_feats = np.where(uv>=thresh, feats, floor) kaldi_io.write_mat(feature_writer, new_feats, key=key)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("meanvar_rspecifier", help="in", type=str) ap.add_argument("denormscale_wspecifier", help="out", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.denormscale_wspecifier, "wb") as denormscale_writer: for spk, rstats in kaldi_io.read_mat_ark(args.meanvar_rspecifier): T, D = rstats.shape assert (T == 2) mean, var = rstats[0, :], rstats[1, :] stdv = np.sqrt(var) wstats = np.empty_like(rstats) wstats[0], wstats[1] = stdv, mean kaldi_io.write_mat(denormscale_writer, wstats, key=spk)
def main_(): ap = argparse.ArgumentParser() ap.add_argument("--trans", type=str_to_bool, default=True) ap.add_argument("npy_list", type=str) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() npy_fp = open(args.npy_list, mode='rt') if args.npy_list != "-" else sys.stdin with kaldi_io.open_or_fd(args.feat_wspecifier, "wb") as feat_writer: for line in npy_fp: s = line.strip().replace("\t", " ").split(" ") assert len(s) == 2 utt, npy_file = s data = np.load(npy_file) if args.trans: data = data.T kaldi_io.write_mat(feat_writer, data, key=utt) npy_fp.close()
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("scalevec_rspecifier", help="in", type=str) ap.add_argument("trans_wspecifier", help="out", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.trans_wspecifier, "wb") as trans_writer: for spk, rstats in kaldi_io.read_mat_ark(args.scalevec_rspecifier): T, D = rstats.shape assert (T == 2) trans = np.zeros([D, D+1]).astype(float) scale, bias = rstats[0, :], rstats[1, :] assert isinstance(scale, np.ndarray) for d in range(0, D): trans[d, d] = scale[d] trans[:, -1] = bias # print(trans) kaldi_io.write_mat(trans_writer, trans, key=spk)
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("uv_rspecifier", help="input uv") ap.add_argument("feature_rspecifier", help="input feat") ap.add_argument("feature_wspecifier", help="output feat") args = ap.parse_args() thresh = 0.5 with kaldi_io.open_or_fd(args.uv_rspecifier, "rb") as uv_reader: with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer: for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier): assert (key == kaldi_io.read_key(uv_reader)) # import pdb # pdb.set_trace() uv = kaldi_io.read_mat(uv_reader) idx = np.where(uv < thresh) new_feats = np.delete(feats, idx[0], axis=0) if new_feats.shape[0] < 1: continue kaldi_io.write_mat(feature_writer, new_feats, key=key) del uv, idx, new_feats
def main_(): ap = argparse.ArgumentParser(usage="") # positional args ap.add_argument("meanvar_rspecifier", help="in", type=str) ap.add_argument("normscale_wspecifier", help="out", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.normscale_wspecifier, "wb") as normscale_writer: for spk, rstats in kaldi_io.read_mat_ark(args.meanvar_rspecifier): T, D = rstats.shape assert (T == 2) mean, var = rstats[0, :], rstats[1, :] assert isinstance(var, np.ndarray) stdv = np.sqrt(var) scale = 1.0 / stdv offset = -(mean * scale) wstats = np.empty_like(rstats) wstats[0], wstats[1] = scale, offset kaldi_io.write_mat(normscale_writer, wstats, key=spk)
mix_file = os.path.join(wav_dir, 'mix', file_name) s1_file = os.path.join(wav_dir, 's1', file_name) s2_file = os.path.join(wav_dir, 's2', file_name) mix_wav = audioread(mix_file, samp_rate=8000) s1_wav = audioread(s1_file, samp_rate=8000) s2_wav = audioread(s2_file, samp_rate=8000) s1_gender = gender_dict[key.split('_')[0][0:3]] s2_gender = gender_dict[key.split('_')[2][0:3]] gender = np.array([s1_gender, s2_gender]).astype(np.int32) kaldi_io.write_vec_int(gender_f, gender, key=key) mix_stft = stft(mix_wav, size=256, shift=64) mix_abs = np.abs(mix_stft) mix_angle = np.angle(mix_stft) s1_stft = stft(s1_wav, size=256, shift=64) s1_abs = np.abs(s1_stft) s1_angle = np.angle(s1_stft) s2_stft = stft(s2_wav, size=256, shift=64) s2_abs = np.abs(s2_stft) s2_angle = np.angle(s2_stft) mix_data = np.concatenate((mix_abs, mix_angle), axis=1) s1_data = np.concatenate((s1_abs, s1_angle), axis=1) s2_data = np.concatenate((s2_abs, s2_angle), axis=1) feats = np.concatenate((mix_data, s1_data, s2_data), axis=0).astype(np.float32) kaldi_io.write_mat(feats_f, feats, key=key) feats_f.close() gender_f.close()
#!/usr/bin/env python import argparse import numpy as np from utils import kaldi_io if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument("--const", type=float, default=0) ap.add_argument("feat_rspecifier", type=str) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.feat_wspecifier, "wb") as feats_writer: for utt, feat in kaldi_io.read_mat_ark(args.feat_rspecifier): new_feat = feat + args.const kaldi_io.write_mat(feats_writer, new_feat, key=utt)
s1_angle = np.angle(s1_stft) s2_stft = stft(s2_wav, size=size, shift=shift).astype(np.complex64) s2_abs = np.abs(s2_stft) s2_angle = np.angle(s2_stft) s3_stft = stft(s3_wav, size=size, shift=shift).astype(np.complex64) s3_abs = np.abs(s3_stft) s3_angle = np.angle(s3_stft) num_frames = mix_stft.shape[0] log_mix_magn = np.log(mix_abs + 1) write_lenf.write(key + ' ' + str(num_frames) + '\n') for i in range(num_frames): feat_mean, feat_variance = moving_average( feat_mean, feat_variance, log_mix_magn[i], decay) mix_data = np.concatenate((mix_abs, mix_angle), axis=1) s1_data = np.concatenate((s1_abs, s1_angle), axis=1) s2_data = np.concatenate((s2_abs, s2_angle), axis=1) s3_data = np.concatenate((s3_abs, s3_angle), axis=1) feats = np.concatenate((mix_data, s1_data, s2_data, s3_data), axis=0).astype(np.float32) write_mat(write_ff, feats, key=key) if (i_line + 1) % 1000 == 0: print('processed %d sentence' % (i_line + 1)) write_ff.close() write_gf.close() write_lenf.close() mean_variance = np.stack((feat_mean, feat_variance), axis=1) np.savetxt(mean_var_file, mean_variance) print('finished task for directory ' + i_type)
stftparser.add_argument('-power', default=False, action="store_true") stftparser.set_defaults(extractfeat=extractstft) rawparser = subparsers.add_parser('raw') rawparser.add_argument('-hop_length', type=int, default=1024) rawparser.add_argument('-frame_length', type=int, default=2048) rawparser.set_defaults(extractfeat=extractraw) waveletparser = subparsers.add_parser('wave') waveletparser.add_argument('-level', default=10, type=int) waveletparser.add_argument('-type', default='db4', type=str) waveletparser.set_defaults(extractfeat=extractwavelet) args = parser.parse_args() argsdict = vars(args) # Just for TQDM, usually its not that large anyway for line in tqdm(args.wavfilelist, ascii=True): assert os.path.exists(line) and not line.endswith('scp'), "Passed only .scp file, you need to cat it e.g. python featextract.py `cat FILE`" y, sr = librosa.load(line, sr=None, mono=not args.nomono) # Stereo if y.ndim > 1: feat = np.array([args.extractfeat(i, sr, **argsdict) for i in y]) else: feat = args.extractfeat(y, sr, **argsdict) # Transpose feat, nsamples to nsamples, feat feat = np.vstack(feat).transpose() filename = os.path.splitext(os.path.basename(line.strip()))[0] key = args.prefix + "_" + filename if args.prefix != '' else filename kaldi_io.write_mat(args.out, feat, key=key)
#!/usr/bin/env python import argparse import numpy as np from utils import kaldi_io if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument("scale", type=float) ap.add_argument("feat_rspecifier", type=str) ap.add_argument("feat_wspecifier", type=str) args = ap.parse_args() with kaldi_io.open_or_fd(args.feat_wspecifier, "wb") as feats_writer: for utt, feat in kaldi_io.read_mat_ark(args.feat_rspecifier): feat2 = feat * args.scale kaldi_io.write_mat(feats_writer, feat2, key=utt)
def write_concated_feats(writer, feat_buf: list, key: str): new_feats = np.vstack(feat_buf).astype(float) kaldi_io.write_mat(writer, new_feats, key=key) feat_buf.clear()
def write_(writer, mat, utt, *, is_int=False): if is_int: kaldi_io.write_vec_int(writer, mat, key=utt) else: kaldi_io.write_mat(writer, mat, key=utt)