Example #1
0
def test_librispeech():
    """Summary
    """
    batch_size = 24
    filter_length = 2
    n_stages = 7
    n_layers_per_stage = 9
    n_hidden = 48
    n_skip = 384
    total_length = 16000
    sequence_length = get_sequence_length(n_stages, n_layers_per_stage)
    prime_length = sequence_length
    ckpt_path = 'wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}/'.format(
        filter_length, batch_size, sequence_length, n_stages,
        n_layers_per_stage, n_hidden, n_skip)

    dataset = librispeech.get_dataset()
    batch = next(librispeech.batch_generator(dataset, batch_size,
                                             prime_length))[0]

    sess = tf.Session()
    net = create_wavenet(batch_size=batch_size,
                         filter_length=filter_length,
                         n_hidden=n_hidden,
                         n_skip=n_skip,
                         n_layers_per_stage=n_layers_per_stage,
                         n_stages=n_stages,
                         shift=False)
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    saver = tf.train.Saver()
    if tf.train.latest_checkpoint(ckpt_path) is not None:
        saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
    else:
        print('Could not find checkpoint')

    synth = np.zeros([batch_size, total_length], dtype=np.float32)
    synth[:, :prime_length] = batch

    print('Synthesize...')
    for sample_i in range(0, total_length - prime_length):
        print('{}/{}/{}'.format(sample_i, prime_length, total_length),
              end='\r')
        probs = sess.run(net["probs"],
                         feed_dict={
                             net["X"]:
                             synth[:, sample_i:sample_i + sequence_length]
                         })
        idxs = sample_categorical(probs)
        idxs = idxs.reshape((batch_size, sequence_length))
        if sample_i == 0:
            audio = wnu.inv_mu_law_numpy(idxs - 128)
            synth[:, :prime_length] = audio
        else:
            audio = wnu.inv_mu_law_numpy(idxs[:, -1] - 128)
            synth[:, prime_length + sample_i] = audio

    for i in range(batch_size):
        wavfile.write('synthesis-{}.wav'.format(i), 16000, synth[i])
Example #2
0
def test_librispeech():
    """Summary
    """
    batch_size = 24
    filter_length = 2
    n_stages = 7
    n_layers_per_stage = 9
    n_hidden = 48
    n_skip = 384
    total_length = 16000
    sequence_length = get_sequence_length(n_stages, n_layers_per_stage)
    prime_length = sequence_length
    ckpt_path = 'wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}/'.format(
        filter_length, batch_size, sequence_length, n_stages,
        n_layers_per_stage, n_hidden, n_skip)

    dataset = librispeech.get_dataset()
    batch = next(
        librispeech.batch_generator(dataset, batch_size, prime_length))[0]

    sess = tf.Session()
    net = create_wavenet(
        batch_size=batch_size,
        filter_length=filter_length,
        n_hidden=n_hidden,
        n_skip=n_skip,
        n_layers_per_stage=n_layers_per_stage,
        n_stages=n_stages,
        shift=False)
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    saver = tf.train.Saver()
    if tf.train.latest_checkpoint(ckpt_path) is not None:
        saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
    else:
        print('Could not find checkpoint')

    synth = np.zeros([batch_size, total_length], dtype=np.float32)
    synth[:, :prime_length] = batch

    print('Synthesize...')
    for sample_i in range(0, total_length - prime_length):
        print('{}/{}/{}'.format(sample_i, prime_length, total_length), end='\r')
        probs = sess.run(
            net["probs"],
            feed_dict={net["X"]: synth[:, sample_i:sample_i + sequence_length]})
        idxs = sample_categorical(probs)
        idxs = idxs.reshape((batch_size, sequence_length))
        if sample_i == 0:
            audio = wnu.inv_mu_law_numpy(idxs - 128)
            synth[:, :prime_length] = audio
        else:
            audio = wnu.inv_mu_law_numpy(idxs[:, -1] - 128)
            synth[:, prime_length + sample_i] = audio

    for i in range(batch_size):
        wavfile.write('synthesis-{}.wav'.format(i), 16000, synth[i])
Example #3
0
def test_librispeech():
    """Summary
    """
    prime_length = 6144
    total_length = 16000 * 3
    batch_size = 32
    n_stages = 6
    n_layers_per_stage = 9
    n_hidden = 32
    filter_length = 2
    n_skip = 256
    onehot = False

    sequence_length = get_sequence_length(n_stages, n_layers_per_stage)
    ckpt_path = 'vctk-wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}/'.format(
        filter_length, batch_size, sequence_length,
        n_stages, n_layers_per_stage, n_hidden, n_skip)

    dataset = librispeech.get_dataset()
    batch = next(librispeech.batch_generator(dataset,
                                             batch_size, prime_length))[0]

    with tf.Graph().as_default(), tf.Session() as sess:
        net = create_generation_model(batch_size=batch_size,
                                      filter_length=filter_length,
                                      n_hidden=n_hidden,
                                      n_skip=n_skip,
                                      n_layers_per_stage=n_layers_per_stage,
                                      n_stages=n_stages,
                                      onehot=onehot)
        saver = tf.train.Saver()
        if tf.train.latest_checkpoint(ckpt_path) is not None:
            saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        else:
            print('Could not find checkpoint')
        sess.run(net['init_ops'])
        synth = np.zeros([batch_size, total_length], dtype=np.float32)
        synth[:, :prime_length] = batch

        print('Synthesize...')
        for sample_i in range(total_length - 1):
            print('{}/{}/{}'.format(sample_i, prime_length, total_length),
                  end='\r')
            probs = sess.run(
                [net["probs"], net["push_ops"]],
                feed_dict={net["X"]: synth[:, [sample_i]]})[0]
            idxs = sample_categorical(probs)
            audio = wnu.inv_mu_law_numpy(idxs - 128)
            if sample_i >= prime_length:
                synth[:, sample_i + 1] = audio

        for i in range(batch_size):
            wavfile.write('synthesis-{}.wav'.format(i),
                          16000, synth[i])
Example #4
0
def test_librispeech():
    """Summary
    """
    prime_length = 6144
    total_length = 16000 * 3
    batch_size = 32
    n_stages = 6
    n_layers_per_stage = 9
    n_hidden = 32
    filter_length = 2
    n_skip = 256
    onehot = False

    sequence_length = get_sequence_length(n_stages, n_layers_per_stage)
    ckpt_path = 'vctk-wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}/'.format(
        filter_length, batch_size, sequence_length,
        n_stages, n_layers_per_stage, n_hidden, n_skip)

    dataset = librispeech.get_dataset()
    batch = next(librispeech.batch_generator(dataset,
                                             batch_size, prime_length))[0]

    with tf.Graph().as_default(), tf.Session() as sess:
        net = create_generation_model(batch_size=batch_size,
                                      filter_length=filter_length,
                                      n_hidden=n_hidden,
                                      n_skip=n_skip,
                                      n_layers_per_stage=n_layers_per_stage,
                                      n_stages=n_stages,
                                      onehot=onehot)
        saver = tf.train.Saver()
        if tf.train.latest_checkpoint(ckpt_path) is not None:
            saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        else:
            print('Could not find checkpoint')
        sess.run(net['init_ops'])
        synth = np.zeros([batch_size, total_length], dtype=np.float32)
        synth[:, :prime_length] = batch

        print('Synthesize...')
        for sample_i in range(total_length - 1):
            print('{}/{}/{}'.format(sample_i, prime_length, total_length),
                  end='\r')
            probs = sess.run(
                [net["probs"], net["push_ops"]],
                feed_dict={net["X"]: synth[:, [sample_i]]})[0]
            idxs = sample_categorical(probs)
            audio = wnu.inv_mu_law_numpy(idxs - 128)
            if sample_i >= prime_length:
                synth[:, sample_i + 1] = audio

        for i in range(batch_size):
            wavfile.write('synthesis-{}.wav'.format(i),
                          16000, synth[i])
Example #5
0
def train_librispeech():
    dataset = librispeech.get_dataset(convert_to_wav=False)
    it_i = 0
    n_epochs = 10000

    batch_size = 32
    n_stages = 6
    n_layers_per_stage = 9
    n_hidden = 32
    filter_length = 2
    n_skip = 256

    sequence_length = get_sequence_length(n_stages, n_layers_per_stage)
    ckpt_path = 'vctk-wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}/'.format(
        filter_length, batch_size, sequence_length,
        n_stages, n_layers_per_stage, n_hidden, n_skip)
    with tf.Graph().as_default() as g, tf.Session(graph=g) as sess:
        net = create_generation_model(n_stages=n_stages,
                n_layers_per_stage=n_layers_per_stage,
                n_hidden=n_hidden,
                batch_size=batch_size,
                n_skip=n_skip,
                filter_length=filter_length)
        batch = librispeech.batch_generator
        opt = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(
                net['loss'])
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        writer = tf.summary.FileWriter(ckpt_path)
        if tf.train.latest_checkpoint(ckpt_path) is not None:
            saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        for epoch_i in range(n_epochs):
            for batch_xs, batch_hs in batch(dataset, batch_size, sequence_length):
                loss, _ = sess.run([net['loss'], opt], feed_dict={
                    net['X']: batch_xs})
                print(loss)
                if it_i % 100 == 0:
                    summary = sess.run(net['summaries'], feed_dict={
                        net['X']: batch_xs})
                    writer.add_summary(summary, it_i)
                    saver.save(sess,
                            os.path.join(ckpt_path, 'model.ckpt'),
                            global_step=it_i)
                it_i += 1
Example #6
0
def test_librispeech_batch_generator():
    ds = librispeech.get_dataset()
    batch = next(librispeech.batch_generator(ds, batch_size=32))
    assert (batch[0].shape == (32, 6144))
Example #7
0
def test_librispeech_dataset():
    ds = librispeech.get_dataset(convert_to_wav=True)
    assert (len(ds) == 106717)
Example #8
0
def test_librispeech_batch():
    ds = librispeech.get_dataset()
    batch = next(librispeech.batch_generator(ds, batch_size=32))
    assert(batch[0].shape == (32, 6144))
    assert(batch[1].shape == (32,))
Example #9
0
def test_librispeech_dataset():
    ds = librispeech.get_dataset()
    assert(len(ds) == 106717)