Ejemplo n.º 1
0
def main_():
    ap = argparse.ArgumentParser()
    ap.add_argument("--hop_size", type=int, default=110)
    ap.add_argument("--fft_size", type=int, default=2048)
    ap.add_argument("wav_list", type=str)
    ap.add_argument("ali_rspecifier", type=str)

    args = ap.parse_args()

    wavlist_fp = open(args.wav_list,
                      mode='rt') if args.wav_list != "-" else sys.stdin

    with kaldi_io.open_or_fd(args.ali_rspecifier, mode="wb") as ali_writer:
        for line in wavlist_fp:
            s = line.strip().replace("\t", " ").split(" ")
            assert len(s) == 2
            utt, wav_file = s

            rate, wav = read(wav_file)
            wav = wav.astype("float")

            voiced = np.zeros_like(wav, dtype=np.int32)
            ans = split(wav,
                        frame_length=args.fft_size,
                        hop_length=args.hop_size)
            for a in ans:
                sidx, eidx = a
                voiced[sidx:eidx] = 1
            kaldi_io.write_vec_int(ali_writer, voiced, key=utt)

    wavlist_fp.close()
Ejemplo n.º 2
0
def main_():
    ap = ArgumentParser()
    ap.add_argument("feat_rspecifier", type=str)
    ap.add_argument("ali_wspecifier", type=str)

    args = ap.parse_args()

    with kaldi_io.open_or_fd(args.ali_wspecifier, mode="wb") as ali_writer:
        for utt, feat in kaldi_io.read_mat_ark(args.feat_rspecifier):
            T, dim = feat.shape
            assert (dim == 1)
            ali = feat.squeeze(1).astype(np.int)
            kaldi_io.write_vec_int(ali_writer, ali, key=utt)
Ejemplo n.º 3
0
def main_():
	ap = argparse.ArgumentParser()
	ap.add_argument("npy_list", type=str)
	ap.add_argument("ali_wspecifier", type=str)

	args = ap.parse_args()

	npy_fp = open(args.npy_list, mode='rt') if args.npy_list != "-" else sys.stdin

	with kaldi_io.open_or_fd(args.ali_wspecifier, "wb") as ali_writer:
		for line in npy_fp:
			s = line.strip().replace("\t", " ").split(" ")
			assert len(s) == 2
			utt, npy_file = s
			data = np.load(npy_file)
			assert len(data.shape) == 1
			kaldi_io.write_vec_int(ali_writer, data, key=utt)
		npy_fp.close()
Ejemplo n.º 4
0
def main_():
	ap = argparse.ArgumentParser(usage="")

	# positional args
	ap.add_argument("--write_int", type=str_to_bool, default=True)
	ap.add_argument("--thresh", type=float, default=0.5)
	ap.add_argument("feature_rspecifier", help="input feat")
	ap.add_argument("feature_wspecifier", help="output feat")
	
	args = ap.parse_args()
	with kaldi_io.open_or_fd(args.feature_wspecifier, "wb") as feature_writer:
		for key, feats in kaldi_io.read_mat_ark(args.feature_rspecifier):
			new_feats = np.where(feats>=args.thresh, 1, feats)
			new_feats = np.where(feats<args.thresh, 0, new_feats)
			if args.write_int:
				assert (new_feats.shape[1] == 1)
				kaldi_io.write_vec_int(feature_writer, new_feats.astype("int32").reshape(new_feats.size), key=key)
			else:
				kaldi_io.write_mat(feature_writer, new_feats, key=key)
Ejemplo n.º 5
0
def main_():
    ap = argparse.ArgumentParser()
    ap.add_argument("wav_list", type=str)
    ap.add_argument("ali_rspecifier", type=str)

    args = ap.parse_args()

    wavlist_fp = open(args.wav_list,
                      mode='rt') if args.wav_list != "-" else sys.stdin

    with kaldi_io.open_or_fd(args.ali_rspecifier, mode="wb") as ali_writer:
        for line in wavlist_fp:
            s = line.strip().replace("\t", " ").split(" ")
            assert len(s) == 2
            utt, wav_file = s

            with wave.open(wav_file, 'r') as wr:
                data = wr.readframes(wr.getnframes())
                wav = np.frombuffer(data, dtype=np.int16)
                kaldi_io.write_vec_int(ali_writer, wav, key=utt)

    wavlist_fp.close()
Ejemplo n.º 6
0
def write_(writer, mat, utt, *, is_int=False):
    if is_int:
        kaldi_io.write_vec_int(writer, mat, key=utt)
    else:
        kaldi_io.write_mat(writer, mat, key=utt)
Ejemplo n.º 7
0
with open(test_list, 'r') as f:
    for line in f.readlines():
        key = line.strip()
        file_name = key + '.wav'
        mix_file = os.path.join(wav_dir, 'mix', file_name)
        s1_file = os.path.join(wav_dir, 's1', file_name)
        s2_file = os.path.join(wav_dir, 's2', file_name)

        mix_wav = audioread(mix_file, samp_rate=8000)
        s1_wav = audioread(s1_file, samp_rate=8000)
        s2_wav = audioread(s2_file, samp_rate=8000)

        s1_gender = gender_dict[key.split('_')[0][0:3]]
        s2_gender = gender_dict[key.split('_')[2][0:3]]
        gender = np.array([s1_gender, s2_gender]).astype(np.int32)
        kaldi_io.write_vec_int(gender_f, gender, key=key)

        mix_stft = stft(mix_wav, size=256, shift=64)
        mix_abs = np.abs(mix_stft)
        mix_angle = np.angle(mix_stft)
        s1_stft = stft(s1_wav, size=256, shift=64)
        s1_abs = np.abs(s1_stft)
        s1_angle = np.angle(s1_stft)
        s2_stft = stft(s2_wav, size=256, shift=64)
        s2_abs = np.abs(s2_stft)
        s2_angle = np.angle(s2_stft)

        mix_data = np.concatenate((mix_abs, mix_angle), axis=1)
        s1_data = np.concatenate((s1_abs, s1_angle), axis=1)
        s2_data = np.concatenate((s2_abs, s2_angle), axis=1)
        feats = np.concatenate((mix_data, s1_data, s2_data),
Ejemplo n.º 8
0
            mix_file = os.path.join(wav_dir, i_type, 'mix', key + '.wav')
            s1_file = os.path.join(wav_dir, i_type, 's1', key + '.wav')
            s2_file = os.path.join(wav_dir, i_type, 's2', key + '.wav')
            s3_file = os.path.join(wav_dir, i_type, 's3', key + '.wav')

            mix_wav = audioread(mix_file, samp_rate=fs8k)
            s1_wav = audioread(s1_file, samp_rate=fs8k)
            s2_wav = audioread(s2_file, samp_rate=fs8k)
            s3_wav = audioread(s3_file, samp_rate=fs8k)

            s1_gender = gender_dict[key.split('_')[0][0:3]]
            s2_gender = gender_dict[key.split('_')[2][0:3]]
            s3_gender = gender_dict[key.split('_')[4][0:3]]
            gender = np.array([s1_gender, s2_gender,
                               s3_gender]).astype(np.int32)
            write_vec_int(write_gf, gender, key=key)

            mix_stft = stft(mix_wav, size=size,
                            shift=shift).astype(np.complex64)
            mix_abs = np.abs(mix_stft)
            mix_angle = np.angle(mix_stft)
            s1_stft = stft(s1_wav, size=size, shift=shift).astype(np.complex64)
            s1_abs = np.abs(s1_stft)
            s1_angle = np.angle(s1_stft)
            s2_stft = stft(s2_wav, size=size, shift=shift).astype(np.complex64)
            s2_abs = np.abs(s2_stft)
            s2_angle = np.angle(s2_stft)
            s3_stft = stft(s3_wav, size=size, shift=shift).astype(np.complex64)
            s3_abs = np.abs(s3_stft)
            s3_angle = np.angle(s3_stft)