Beispiel #1
0
def main():
    logdir, ckpt = os.path.split(args.checkpoint)
    arch = tf.gfile.Glob(os.path.join(
        logdir, 'architecture*.json'))[0]  # should only be 1 file
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/xmax.npf'),
        xmin=np.fromfile('./etc/xmin.npf'),
    )

    features = read_whole_features(args.file_pattern.format(args.src))

    x = normalizer.forward_process(features['sp'])
    x = nh_to_nchw(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    y_t = y_t_id * tf.ones(shape=[
        tf.shape(x)[0],
    ], dtype=tf.int64)

    machine = MODEL(arch)
    z = machine.encode(x)
    x_t = machine.decode(z, y_t)  # NOTE: the API yields NHWC format
    x_t = tf.squeeze(x_t)
    x_t = normalizer.backward_process(x_t)

    # For sanity check (validation)
    x_s = machine.decode(z, y_s)
    x_s = tf.squeeze(x_s)
    x_s = normalizer.backward_process(x_s)

    f0_s = features['f0']
    f0_t = convert_f0(f0_s, args.src, args.trg)

    output_dir = get_default_output(args.output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    with sv.managed_session() as sess:
        load(saver, sess, logdir, ckpt=ckpt)
        while True:
            try:
                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])})
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                oFilename = make_output_wav_name(output_dir, feat['filename'])
                sf.write(oFilename, y, FS)
            except:
                break
Beispiel #2
0
def main():
    logdir, ckpt = os.path.split(args.checkpoint)
    arch = tf.gfile.Glob(os.path.join(logdir, 'architecture*.json'))[0]  # should only be 1 file
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/xmax.npf'),
        xmin=np.fromfile('./etc/xmin.npf'),
    )

    features = read_whole_features(args.file_pattern.format(args.src))

    x = normalizer.forward_process(features['sp'])
    x = nh_to_nchw(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
    y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)

    machine = MODEL(arch)
    z = machine.encode(x)
    x_t = machine.decode(z, y_t)  # NOTE: the API yields NHWC format
    x_t = tf.squeeze(x_t)
    x_t = normalizer.backward_process(x_t)

    # For sanity check (validation)
    x_s = machine.decode(z, y_s)
    x_s = tf.squeeze(x_s)
    x_s = normalizer.backward_process(x_s)

    f0_s = features['f0']
    f0_t = convert_f0(f0_s, args.src, args.trg)

    output_dir = get_default_output(args.output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    with sv.managed_session() as sess:
        load(saver, sess, logdir, ckpt=ckpt)
        while True:
            try:
                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}
                )
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                oFilename = make_output_wav_name(output_dir, feat['filename'])
                sf.write(oFilename, y, FS)
            except:
                break
Beispiel #3
0
    def sample(self):
        with tf.name_scope("sample"):
            normalizer = Tanhize(
                xmax=np.fromfile('./etc/xmax.npf'),
                xmin=np.fromfile('./etc/xmin.npf'),
            )
            features = read_whole_features(
                self.args.file_pattern.format(self.args.src))
            x = normalizer.forward_process(features['sp'])
            x = nh_to_nchw(x)
            #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
            #y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)
            y_t = SPEAKERS.index(self.args.trg) * tf.ones(shape=[
                tf.shape(x)[0],
            ],
                                                          dtype=tf.int64)
            self.reuse = False
            z, _ = self.encoder(x, False)
            x_t = self.generator(z, y_t, False)
            self.reuse = True
            x_t = tf.transpose(x_t, [0, 2, 3, 1])
            print "x_t shape:", x_t.get_shape().as_list()
            x_t = tf.squeeze(x_t)
            x_t = normalizer.backward_process(x_t)

            f0_s = features['f0']
            f0_t = convert_f0(f0_s, self.args.src, self.args.trg)
        sample = dict()
        sample['features'] = features
        sample['x_t'] = x_t
        sample['f0_t'] = f0_t
        #sample['y_t'] = y_t_id
        return sample
Beispiel #4
0
    def sample(self):
        with tf.name_scope("sample"):
            normalizer = Tanhize(
                xmax=np.fromfile('./etc/xmax.npf'),
                xmin=np.fromfile('./etc/xmin.npf'),
            )
            FEAT_DIM = 1029
            SP_DIM = 513
            self.reues = True
            #features = read_whole_features(self.args.file_pattern.format(self.args.src))
            files = "./dataset/vcc2016/bin/Testing Set/SF1/200005.bin"
            #filename_queue = tf.train.string_input_producer(files, num_epochs=1)
            #reader = tf.WholeFileReader()
            #key, value = reader.read(filename_queue)
            key = tf.cast(os.path.split(files)[-1].split('.')[0], tf.string)
            with open(files, 'rb') as f:
                value = f.read()
            value = tf.decode_raw(value, tf.float32)
            value = tf.reshape(value, [-1, FEAT_DIM])
            features = dict()
            features['sp'] = value[:, :SP_DIM]
            features['ap'] = value[:, SP_DIM:2 * SP_DIM]
            features['f0'] = value[:, SP_DIM * 2]
            features['en'] = value[:, SP_DIM * 2 + 1]
            features['speaker'] = tf.cast(value[:, SP_DIM * 2 + 2], tf.int64)
            features['filename'] = key
            #x = normalizer.forward_process(features['sp'])
            x = tf.clip_by_value(features['sp'], 0., 1.) * 2. - 1.
            x = nh_to_nchw(x)
            #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
            #y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)
            y_t = SPEAKERS.index(self.args.trg) * tf.ones(shape=[
                tf.shape(x)[0],
            ],
                                                          dtype=tf.int64)
            with tf.variable_scope("encoder", reuse=True):
                z, _ = self.encoder(x, False, True)
            with tf.variable_scope("generator", reuse=True):
                x_t = self.generator(z, y_t, False, True)
            x_t = tf.transpose(x_t, [0, 2, 3, 1])
            print "x_t shape:", x_t.get_shape().as_list()
            x_t = tf.squeeze(x_t)
            x_t = normalizer.backward_process(x_t)

            f0_s = features['f0']
            f0_t = convert_f0(f0_s, self.args.src, self.args.trg)
        sample = dict()
        sample['features'] = features
        sample['x_t'] = x_t
        sample['f0_t'] = f0_t
        #sample['y_t'] = y_t_id
        return sample
Beispiel #5
0
def main(unused_args=None):
    # args(sys.argv)

    if args.model is None:
        raise ValueError(
            '\n  You MUST specify `model`.' +\
            '\n    Use `python convert.py --help` to see applicable options.'
        )

    module = import_module(args.module, package=None)
    MODEL = getattr(module, args.model)

    FS = 16000

    with open(args.speaker_list) as fp:
        SPEAKERS = [l.strip() for l in fp.readlines()]

    logdir, ckpt = os.path.split(args.checkpoint)
    if 'VAE' in logdir:
        _path_to_arch, _ = os.path.split(logdir)
    else:
        _path_to_arch = logdir
    arch = tf.gfile.Glob(os.path.join(_path_to_arch, 'architecture*.json'))
    if len(arch) != 1:
        print('WARNING: found more than 1 architecture files!')
    arch = arch[0]
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
        xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
    )

    features = read_whole_features(args.file_pattern.format(args.src))

    x = normalizer.forward_process(features['sp'])
    x = nh_to_nhwc(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    y_t = y_t_id * tf.ones(shape=[
        tf.shape(x)[0],
    ], dtype=tf.int64)

    f0_t = features['f0']
    #    f0_t = convert_f0(f0_s, args.src, args.trg)
    #    f0_s_convert = tf.cast(f0_s,dtype=tf.int64)
    f0_t_convert = tf.cast(f0_t, dtype=tf.int64)
    machine = MODEL(arch, is_training=False)
    z = machine.encode(x)
    x_t = machine.decode(z, y_t,
                         f0_t_convert)  # NOTE: the API yields NHWC format
    x_t = tf.squeeze(x_t)
    x_t = normalizer.backward_process(x_t)

    output_dir = get_default_output(args.output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    with sv.managed_session() as sess:
        load(saver, sess, logdir, ckpt=ckpt)
        print()
        while True:
            try:
                s_time = time.perf_counter()
                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])})
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                oFilename = make_output_wav_name(output_dir, feat['filename'])
                print('\rProcessing {}'.format(oFilename), end='')
                e_time = time.perf_counter()
                print('\n')
                print('Time_sp: {}'.format(e_time - s_time))
                print('\n')
                sf.write(oFilename, y, FS)
            except KeyboardInterrupt:
                break
            finally:
                pass
        print()
Beispiel #6
0
def main():

    checkpoint = torch.load(args.model_path)
    encoder = Encoder()
    generator = G()
    encoder.load_state_dict(checkpoint['encoder_state_dict'])
    generator.load_state_dict(checkpoint['generator_state_dict'])
    encoder.cuda()
    generator.cuda()

    FS = 16000
    SPEAKERS = list()
    with open(args.speaker_list) as fp:
        SPEAKERS = [l.strip() for l in fp.readlines()]

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
        xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
    )

    total_sp_speaker = []
    total_speaker = []

    total_features = read_whole_features(args.file_pattern.format(args.src))
    for features in total_features:

        x = normalizer.forward_process(features['sp'])
        x = nh_to_nchw(x)
        y_s = features['speaker']
        #print('????',SPEAKERS.index(args.trg))

        #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
        #y_t = y_t_id * torch.ones(shape=[x.shape[0],], dtype=torch.int64)
        #print(y_t)
        x = Variable(torch.FloatTensor(x).cuda(), requires_grad=False)

        y_t = torch.ones((x.shape[0])).view(-1, 1) * (SPEAKERS.index(args.trg))

        z, _ = encoder(x)
        x_t, _ = generator(z, y_t)  # NOTE: the API yields NHWC format
        x_t = torch.squeeze(x_t)
        #print('x_t.shape',x_t.shape)
        x_t = normalizer.backward_process(x_t)
        #print('backward_process.finish')

        x_s, _ = generator(z, y_s)
        x_s = torch.squeeze(x_s)
        x_s = normalizer.backward_process(x_s)

        f0_s = features['f0']
        #print(f0_s.shape)
        f0_t = convert_f0(f0_s, args.src, args.trg)

        #output_dir = get_default_output(args.output_dir)
        output_dir = args.output_dir
        features['sp'] = x_t.cpu().data.numpy()
        features['f0'] = f0_t
        #print('=-=-=-=-=-=')
        y = pw2wav(features)

        oFilename = make_output_wav_name(output_dir, features['filename'])
        print(f'\r Processing {oFilename}', end=' ')

        if not os.path.exists(os.path.dirname(oFilename)):
            try:
                os.makedirs(os.path.dirname(oFilename))
            except OSError as exc:  # Guard against race condition
                print('error')
                pass

        sf.write(oFilename, y, FS)
        #print('2: ',features['sp'].shape)
        #print('3: ',features['f0'].shape)

    print('\n==finish==')
def main():
    logdir, ckpt = os.path.split(args.checkpoint)
    print('logdir:', logdir)
    print('ckpt:', ckpt)
    arch = args.arch
    with open(arch) as fp:
        arch = json.load(fp)

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/xmax.npf'),
        xmin=np.fromfile('./etc/xmin.npf'),
    )

    features = read_whole_features(args.file_pattern.format(args.src))
    print "features shape:", features['sp'].shape
    x = normalizer.forward_process(features['sp'])
    x = nh_to_nchw(x)
    y_s = features['speaker']
    y_t_id = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    y_t = y_t_id * tf.ones(shape=[
        tf.shape(x)[0],
    ], dtype=tf.int64)

    machine = MODEL(arch, args, False, False)
    with tf.variable_scope("encoder"):
        z, _ = machine.encoder(x, True)
    with tf.variable_scope("generator"):
        x_t = machine.generator(z, y_t,
                                True)  # NOTE: the API yields NHWC format
    x_t = tf.transpose(x, [0, 2, 3, 1])
    print "x_t shape:", x_t.get_shape().as_list()
    x_t = tf.squeeze(x_t)

    x_t = normalizer.backward_process(x_t)

    f0_s = features['f0']
    f0_t = convert_f0(f0_s, args.src, args.trg)

    output_dir = get_default_output(args.output_dir)
    print("output_dir########:", output_dir)

    saver = tf.train.Saver()
    sv = tf.train.Supervisor(logdir=output_dir)
    print "logdir:", logdir
    with sv.managed_session() as sess:
        #sess.run(tf.global_variables_initializer())
        #load(saver, sess, logdir, ckpt=ckpt)
        ckpt = os.path.join(logdir, ckpt)
        saver.restore(sess, ckpt)

        while True:
            try:
                print("Excuting")

                feat, f0, sp = sess.run(
                    [features, f0_t, x_t],
                    feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])})
                print("Excuting.")
                feat.update({'sp': sp, 'f0': f0})
                y = pw2wav(feat)
                print("Excuting..")
                sf.write(
                    os.path.join(
                        output_dir,
                        os.path.splitext(os.path.split(
                            feat['filename'])[-1])[0] + '.wav'), y, FS)
                print("Excuted")

            except:
                break