def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--fs', type=int, default=22050,
                        help='Sampling frequency')
    parser.add_argument('--fmax', type=int, default=None, nargs='?',
                        help='Maximum frequency')
    parser.add_argument('--fmin', type=int, default=None, nargs='?',
                        help='Minimum frequency')
    parser.add_argument('--n_fft', type=int, default=1024,
                        help='FFT length in point')
    parser.add_argument('--n_shift', type=int, default=512,
                        help='Shift length in point')
    parser.add_argument('--win_length', type=int, default=None, nargs='?',
                        help='Analisys window length in point')
    parser.add_argument('--n_mels', type=int, default=80,
                        help='Number of mel basis')
    parser.add_argument('--window', type=str, default='hann',
                        choices=['hann', 'hamming'],
                        help='Type of window')
    parser.add_argument('scp', type=str,
                        help='Feat scp files')
    parser.add_argument('outdir', type=str,
                        help='Output directory')
    args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")

    # load scp
    reader = kaldi_io_py.read_mat_scp(args.scp)

    # chech direcitory
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # extract feature and then write as ark with scp format
    for idx, (utt_id, lmspc) in enumerate(reader, 1):
        spc = logmelspc_to_linearspc(
            lmspc,
            fs=args.fs,
            n_mels=args.n_mels,
            n_fft=args.n_fft,
            fmin=args.fmin,
            fmax=args.fmax)
        y = griffin_lim(
            spc,
            n_fft=args.n_fft,
            n_shift=args.n_shift,
            win_length=args.win_length,
            window=args.window)
        logging.info("(%d) %s" % (idx, utt_id))
        write(args.outdir + "/%s.wav" % utt_id,
              args.fs,
              (y * np.iinfo(np.int16).max).astype(np.int16))
Beispiel #2
0
def test_voxforge_feats():
    import kaldi_io_py
    pytest.importorskip("kaldi_io")
    import kaldi_io

    train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp"
    if not os.path.exists(train_scp):
        pytest.skip("voxforge scp has not been created")

    r1 = kaldi_io_py.read_mat_scp(train_scp)
    r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp)

    for k, v1 in r1:
        k = str(k)
        print(k)
        v2 = r2[k]
        assert v1.shape == v2.shape
        numpy.testing.assert_allclose(v1, v2, atol=1e-5)
Beispiel #3
0
# coding: utf-8
import numpy
import kaldi_io
import kaldi_io_py
import lazy_io

train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp"

r1 = kaldi_io_py.read_mat_scp(train_scp)
r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp)
r3 = lazy_io.read_dict_scp(train_scp)

for k, v1 in r1:
    k = str(k)
    print(k)
    v2 = r2[k]
    v3 = r3[k]
    assert v1.shape == v2.shape
    assert v1.shape == v3.shape
    numpy.testing.assert_allclose(v1, v2, atol=1e-5)
    numpy.testing.assert_allclose(v1, v3, atol=0)

Beispiel #4
0
                    type=str,
                    required=True,
                    help='output file')
parser.add_argument('--cmvn', type=str, help='cmvn file')
args = parser.parse_args()

#ark='ark: copy-feats --print-args=false scp:%s ark:- | ' % (args.input)
#print(args.input)

# read global mean/stddev for normalize
if args.cmvn:
    with h5py.File(args.cmvn, 'r') as f:
        cmvn_mean = f['mean'].value
        cmvn_dev = f['stddev'].value

generator = kaldi_io_py.read_mat_scp(args.input)
with h5py.File(args.output, 'w') as f:
    for key, mat in generator:
        if args.cmvn:
            cp = np.ndarray(mat.shape)
            for row in range(mat.shape[0]):
                cp[row] = mat[row] - cmvn_mean.reshape(mat.shape[1], )
                cp[row] = mat[row] / cmvn_dev.reshape(mat.shape[1], )
            f.create_dataset(key,
                             data=cp,
                             compression='gzip',
                             compression_opts=9)
        else:
            f.create_dataset(key,
                             data=mat,
                             compression='gzip',
Beispiel #5
0
    spk_file=os.path.join(args.outdir,'speakers')
    
    with open(spk_file, 'w') as wf:
        with open(args.feats, 'r') as f:
            lines=f.readlines()
            for line in lines:
                spk = re.sub('\S\S\S$', "", re.sub('_DT',"",line.split()[0]))
                if spk in speakers:
                    speakers[spk]=spk_num
                    out="{0} {1}\n".format(spk, spk_num)
                    wf.write(out)
                    spk_num+=1

    hdf_file=os.path.join(args.outdir,'data.h5')
    generator=kaldi_io_py.read_mat_scp(args.feats)

    with h5py.File(hdf_file, 'w') as hdf:
        for key, mat in generator:
            label = self.textset[key]
            hdf.create_group(key)
            hdf.create_dataset(key+'/data', data=mat, compression='gzip', compression_opts=9)
            if '_DT' in key:
                label=0
            else:
                label=1 # 1 if speaker is deaf
            spk=re.sub('\S\S\S$', "", re.sub('_DT',"", key))
            spk_label=speakers[$spk]
            hdf.create_dataset(key+'/label', data=label)
            hdf.create_dataset(key+'/speaker',data=spk_label)