Exemple #1
0
def test():
    # ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ====
    x, y = read('./dataset/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048)
    sv = tf.train.Supervisor()
    with sv.managed_session() as sess:
        for _ in range(200):
            x_, y_ = sess.run([x, y])
            print(y_)


    # ===== Read binary ====
    features = read_whole_features('./dataset/vcc2016/bin/Training Set/SF1/*001.bin')

    sv = tf.train.Supervisor()
    with sv.managed_session() as sess:
        features = sess.run(features)

    y = pw2wav(features)
    librosa.output.write_wav('test1.wav', y, 16000)  # TODO fs should be specified externally.


    # ==== Direct read =====
    f = './dataset/vcc2016/bin/Training Set/SF1/100001.bin'
    features = np.fromfile(f, np.float32)
    features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk

    y = pw2wav(features)
    librosa.output.write_wav('test2.wav', y, 16000)
Exemple #2
0
def test():
    # ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ====
    #x, y = read('./dataset/savee/bin/*/*/*.bin', 1, min_after_dequeue=1024, capacity=2048)
    #sv = tf.train.Supervisor()
    #with sv.managed_session() as sess:
    #    for _ in range(200):
    #        x_, y_ = sess.run([x, y])
    #        print(y_)


    # ===== Read binary ====
    features = read_whole_features('./dataset/savee/bin/Training Set/SNeu/*.bin')

    sv = tf.train.Supervisor()
    with sv.managed_session() as sess:
        features = sess.run(features)

    y = pw2wav(features)
    sf.write('test1.wav', y, 22050)  # TODO fs should be specified externally.


    # ==== Direct read =====
    f = './dataset/savee/bin/Training Set/SNeu/n01.bin'
    features = np.fromfile(f, np.float32)
    features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk

    y = pw2wav(features)
    sf.write('test2.wav', y, 22050)
Exemple #3
0
def main():
    logdir, ckpt = os.path.split(args.checkpoint)
    arch = tf.gfile.Glob(os.path.join(
        logdir, 'architecture*.json'))[0]  # should only be 1 file
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/xmax.npf'),
        xmin=np.fromfile('./etc/xmin.npf'),
    )

    features = read_whole_features(args.file_pattern.format(args.src))

    x = normalizer.forward_process(features['sp'])
    x = nh_to_nchw(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    y_t = y_t_id * tf.ones(shape=[
        tf.shape(x)[0],
    ], dtype=tf.int64)

    machine = MODEL(arch)
    z = machine.encode(x)
    x_t = machine.decode(z, y_t)  # NOTE: the API yields NHWC format
    x_t = tf.squeeze(x_t)
    x_t = normalizer.backward_process(x_t)

    # For sanity check (validation)
    x_s = machine.decode(z, y_s)
    x_s = tf.squeeze(x_s)
    x_s = normalizer.backward_process(x_s)

    f0_s = features['f0']
    f0_t = convert_f0(f0_s, args.src, args.trg)

    output_dir = get_default_output(args.output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    with sv.managed_session() as sess:
        load(saver, sess, logdir, ckpt=ckpt)
        while True:
            try:
                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])})
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                oFilename = make_output_wav_name(output_dir, feat['filename'])
                sf.write(oFilename, y, FS)
            except:
                break
Exemple #4
0
def main():
    logdir, ckpt = os.path.split(args.checkpoint)
    arch = tf.gfile.Glob(os.path.join(logdir, 'architecture*.json'))[0]  # should only be 1 file
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/xmax.npf'),
        xmin=np.fromfile('./etc/xmin.npf'),
    )

    features = read_whole_features(args.file_pattern.format(args.src))

    x = normalizer.forward_process(features['sp'])
    x = nh_to_nchw(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
    y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)

    machine = MODEL(arch)
    z = machine.encode(x)
    x_t = machine.decode(z, y_t)  # NOTE: the API yields NHWC format
    x_t = tf.squeeze(x_t)
    x_t = normalizer.backward_process(x_t)

    # For sanity check (validation)
    x_s = machine.decode(z, y_s)
    x_s = tf.squeeze(x_s)
    x_s = normalizer.backward_process(x_s)

    f0_s = features['f0']
    f0_t = convert_f0(f0_s, args.src, args.trg)

    output_dir = get_default_output(args.output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    with sv.managed_session() as sess:
        load(saver, sess, logdir, ckpt=ckpt)
        while True:
            try:
                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}
                )
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                oFilename = make_output_wav_name(output_dir, feat['filename'])
                sf.write(oFilename, y, FS)
            except:
                break
Exemple #5
0
def test():
    # ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ====
    #x, y = read('./dataset/savee/bin/*/*/*.bin', 1, min_after_dequeue=1024, capacity=2048)
    #sv = tf.train.Supervisor()
    #with sv.managed_session() as sess:
    #    for _ in range(200):
    #        x_, y_ = sess.run([x, y])
    #        print(y_)


    # ===== Read binary ====
    features = read_whole_features('./dataset/savee/bin/Test Set/SNeu/DC16.bin')

    sv = tf.train.Supervisor()
    with sv.managed_session() as sess:
        features = sess.run(features)

    y,sp = pw2wav(features)
    sf.write('origin1.wav', y, 22050)  # TODO fs should be specified externally.
    with open('01.bin', 'wb') as fp:
        fp.write(sp)
    """
Exemple #6
0
def main(unused_args=None):
    # args(sys.argv)

    if args.model is None:
        raise ValueError(
            '\n  You MUST specify `model`.' +\
            '\n    Use `python convert.py --help` to see applicable options.'
        )

    module = import_module(args.module, package=None)
    MODEL = getattr(module, args.model)

    FS = 16000

    with open(args.speaker_list) as fp:
        SPEAKERS = [l.strip() for l in fp.readlines()]

    logdir, ckpt = os.path.split(args.checkpoint)
    if 'VAE' in logdir:
        _path_to_arch, _ = os.path.split(logdir)
    else:
        _path_to_arch = logdir
    arch = tf.gfile.Glob(os.path.join(_path_to_arch, 'architecture*.json'))
    if len(arch) != 1:
        print('WARNING: found more than 1 architecture files!')
    arch = arch[0]
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
        xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
    )

    features = read_whole_features(args.file_pattern.format(args.src))

    x = normalizer.forward_process(features['sp'])
    x = nh_to_nhwc(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    y_t = y_t_id * tf.ones(shape=[
        tf.shape(x)[0],
    ], dtype=tf.int64)

    f0_t = features['f0']
    #    f0_t = convert_f0(f0_s, args.src, args.trg)
    #    f0_s_convert = tf.cast(f0_s,dtype=tf.int64)
    f0_t_convert = tf.cast(f0_t, dtype=tf.int64)
    machine = MODEL(arch, is_training=False)
    z = machine.encode(x)
    x_t = machine.decode(z, y_t,
                         f0_t_convert)  # NOTE: the API yields NHWC format
    x_t = tf.squeeze(x_t)
    x_t = normalizer.backward_process(x_t)

    output_dir = get_default_output(args.output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    with sv.managed_session() as sess:
        load(saver, sess, logdir, ckpt=ckpt)
        print()
        while True:
            try:
                s_time = time.perf_counter()
                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])})
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                oFilename = make_output_wav_name(output_dir, feat['filename'])
                print('\rProcessing {}'.format(oFilename), end='')
                e_time = time.perf_counter()
                print('\n')
                print('Time_sp: {}'.format(e_time - s_time))
                print('\n')
                sf.write(oFilename, y, FS)
            except KeyboardInterrupt:
                break
            finally:
                pass
        print()
Exemple #7
0
def main():

    checkpoint = torch.load(args.model_path)
    encoder = Encoder()
    generator = G()
    encoder.load_state_dict(checkpoint['encoder_state_dict'])
    generator.load_state_dict(checkpoint['generator_state_dict'])
    encoder.cuda()
    generator.cuda()

    FS = 16000
    SPEAKERS = list()
    with open(args.speaker_list) as fp:
        SPEAKERS = [l.strip() for l in fp.readlines()]

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
        xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
    )

    total_sp_speaker = []
    total_speaker = []

    total_features = read_whole_features(args.file_pattern.format(args.src))
    for features in total_features:

        x = normalizer.forward_process(features['sp'])
        x = nh_to_nchw(x)
        y_s = features['speaker']
        #print('????',SPEAKERS.index(args.trg))

        #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
        #y_t = y_t_id * torch.ones(shape=[x.shape[0],], dtype=torch.int64)
        #print(y_t)
        x = Variable(torch.FloatTensor(x).cuda(), requires_grad=False)

        y_t = torch.ones((x.shape[0])).view(-1, 1) * (SPEAKERS.index(args.trg))

        z, _ = encoder(x)
        x_t, _ = generator(z, y_t)  # NOTE: the API yields NHWC format
        x_t = torch.squeeze(x_t)
        #print('x_t.shape',x_t.shape)
        x_t = normalizer.backward_process(x_t)
        #print('backward_process.finish')

        x_s, _ = generator(z, y_s)
        x_s = torch.squeeze(x_s)
        x_s = normalizer.backward_process(x_s)

        f0_s = features['f0']
        #print(f0_s.shape)
        f0_t = convert_f0(f0_s, args.src, args.trg)

        #output_dir = get_default_output(args.output_dir)
        output_dir = args.output_dir
        features['sp'] = x_t.cpu().data.numpy()
        features['f0'] = f0_t
        #print('=-=-=-=-=-=')
        y = pw2wav(features)

        oFilename = make_output_wav_name(output_dir, features['filename'])
        print(f'\r Processing {oFilename}', end=' ')

        if not os.path.exists(os.path.dirname(oFilename)):
            try:
                os.makedirs(os.path.dirname(oFilename))
            except OSError as exc:  # Guard against race condition
                print('error')
                pass

        sf.write(oFilename, y, FS)
        #print('2: ',features['sp'].shape)
        #print('3: ',features['f0'].shape)

    print('\n==finish==')
def main():
    logdir, ckpt = os.path.split(args.checkpoint)
    print('logdir:', logdir)
    print('ckpt:', ckpt)
    arch = args.arch
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/xmax.npf'),
        xmin=np.fromfile('./etc/xmin.npf'),
    )

    features = read_whole_features(args.file_pattern.format(args.src))
    print "features shape:", features['sp'].shape
    x = normalizer.forward_process(features['sp'])
    x = nh_to_nchw(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    y_t = y_t_id * tf.ones(shape=[
        tf.shape(x)[0],
    ], dtype=tf.int64)

    machine = MODEL(arch, args, False, False)
    with tf.variable_scope("encoder"):
        z, _ = machine.encoder(x, True)
    with tf.variable_scope("generator"):
        x_t = machine.generator(z, y_t,
                                True)  # NOTE: the API yields NHWC format
    x_t = tf.transpose(x, [0, 2, 3, 1])
    print "x_t shape:", x_t.get_shape().as_list()
    x_t = tf.squeeze(x_t)

    x_t = normalizer.backward_process(x_t)

    f0_s = features['f0']
    f0_t = convert_f0(f0_s, args.src, args.trg)

    output_dir = get_default_output(args.output_dir)
    print("output_dir########:", output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    print "logdir:", logdir
    with sv.managed_session() as sess:
        #sess.run(tf.global_variables_initializer())
        #load(saver, sess, logdir, ckpt=ckpt)
        ckpt = os.path.join(logdir, ckpt)
        saver.restore(sess, ckpt)

        while True:
            try:
                print("Excuting")

                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])})
                print("Excuting.")
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                print("Excuting..")
                sf.write(
                    os.path.join(
                        output_dir,
                        os.path.splitext(os.path.split(
                            feat['filename'])[-1])[0] + '.wav'), y, FS)
                print("Excuted")

            except:
                break