def main(): logdir, ckpt = os.path.split(args.checkpoint) arch = tf.gfile.Glob(os.path.join( logdir, 'architecture*.json'))[0] # should only be 1 file with open(arch) as fp: arch = json.load(fp) normalizer = Tanhize( xmax=np.fromfile('./etc/xmax.npf'), xmin=np.fromfile('./etc/xmin.npf'), ) features = read_whole_features(args.file_pattern.format(args.src)) x = normalizer.forward_process(features['sp']) x = nh_to_nchw(x) y_s = features['speaker'] y_t_id = tf.placeholder(dtype=tf.int64, shape=[ 1, ]) y_t = y_t_id * tf.ones(shape=[ tf.shape(x)[0], ], dtype=tf.int64) machine = MODEL(arch) z = machine.encode(x) x_t = machine.decode(z, y_t) # NOTE: the API yields NHWC format x_t = tf.squeeze(x_t) x_t = normalizer.backward_process(x_t) # For sanity check (validation) x_s = machine.decode(z, y_s) x_s = tf.squeeze(x_s) x_s = normalizer.backward_process(x_s) f0_s = features['f0'] f0_t = convert_f0(f0_s, args.src, args.trg) output_dir = get_default_output(args.output_dir) saver = tf.train.Saver() sv = tf.train.Supervisor(logdir=output_dir) with sv.managed_session() as sess: load(saver, sess, logdir, ckpt=ckpt) while True: try: feat, f0, sp = [features, f0_t, x_t], feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}) feat.update({'sp': sp, 'f0': f0}) y = pw2wav(feat) oFilename = make_output_wav_name(output_dir, feat['filename']) sf.write(oFilename, y, FS) except: break
def main(): logdir, ckpt = os.path.split(args.checkpoint) arch = tf.gfile.Glob(os.path.join(logdir, 'architecture*.json'))[0] # should only be 1 file with open(arch) as fp: arch = json.load(fp) normalizer = Tanhize( xmax=np.fromfile('./etc/xmax.npf'), xmin=np.fromfile('./etc/xmin.npf'), ) features = read_whole_features(args.file_pattern.format(args.src)) x = normalizer.forward_process(features['sp']) x = nh_to_nchw(x) y_s = features['speaker'] y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,]) y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64) machine = MODEL(arch) z = machine.encode(x) x_t = machine.decode(z, y_t) # NOTE: the API yields NHWC format x_t = tf.squeeze(x_t) x_t = normalizer.backward_process(x_t) # For sanity check (validation) x_s = machine.decode(z, y_s) x_s = tf.squeeze(x_s) x_s = normalizer.backward_process(x_s) f0_s = features['f0'] f0_t = convert_f0(f0_s, args.src, args.trg) output_dir = get_default_output(args.output_dir) saver = tf.train.Saver() sv = tf.train.Supervisor(logdir=output_dir) with sv.managed_session() as sess: load(saver, sess, logdir, ckpt=ckpt) while True: try: feat, f0, sp = [features, f0_t, x_t], feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])} ) feat.update({'sp': sp, 'f0': f0}) y = pw2wav(feat) oFilename = make_output_wav_name(output_dir, feat['filename']) sf.write(oFilename, y, FS) except: break
def sample(self): with tf.name_scope("sample"): normalizer = Tanhize( xmax=np.fromfile('./etc/xmax.npf'), xmin=np.fromfile('./etc/xmin.npf'), ) features = read_whole_features( self.args.file_pattern.format(self.args.src)) x = normalizer.forward_process(features['sp']) x = nh_to_nchw(x) #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,]) #y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64) y_t = SPEAKERS.index(self.args.trg) * tf.ones(shape=[ tf.shape(x)[0], ], dtype=tf.int64) self.reuse = False z, _ = self.encoder(x, False) x_t = self.generator(z, y_t, False) self.reuse = True x_t = tf.transpose(x_t, [0, 2, 3, 1]) print "x_t shape:", x_t.get_shape().as_list() x_t = tf.squeeze(x_t) x_t = normalizer.backward_process(x_t) f0_s = features['f0'] f0_t = convert_f0(f0_s, self.args.src, self.args.trg) sample = dict() sample['features'] = features sample['x_t'] = x_t sample['f0_t'] = f0_t #sample['y_t'] = y_t_id return sample
def sample(self): with tf.name_scope("sample"): normalizer = Tanhize( xmax=np.fromfile('./etc/xmax.npf'), xmin=np.fromfile('./etc/xmin.npf'), ) FEAT_DIM = 1029 SP_DIM = 513 self.reues = True #features = read_whole_features(self.args.file_pattern.format(self.args.src)) files = "./dataset/vcc2016/bin/Testing Set/SF1/200005.bin" #filename_queue = tf.train.string_input_producer(files, num_epochs=1) #reader = tf.WholeFileReader() #key, value = key = tf.cast(os.path.split(files)[-1].split('.')[0], tf.string) with open(files, 'rb') as f: value = value = tf.decode_raw(value, tf.float32) value = tf.reshape(value, [-1, FEAT_DIM]) features = dict() features['sp'] = value[:, :SP_DIM] features['ap'] = value[:, SP_DIM:2 * SP_DIM] features['f0'] = value[:, SP_DIM * 2] features['en'] = value[:, SP_DIM * 2 + 1] features['speaker'] = tf.cast(value[:, SP_DIM * 2 + 2], tf.int64) features['filename'] = key #x = normalizer.forward_process(features['sp']) x = tf.clip_by_value(features['sp'], 0., 1.) * 2. - 1. x = nh_to_nchw(x) #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,]) #y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64) y_t = SPEAKERS.index(self.args.trg) * tf.ones(shape=[ tf.shape(x)[0], ], dtype=tf.int64) with tf.variable_scope("encoder", reuse=True): z, _ = self.encoder(x, False, True) with tf.variable_scope("generator", reuse=True): x_t = self.generator(z, y_t, False, True) x_t = tf.transpose(x_t, [0, 2, 3, 1]) print "x_t shape:", x_t.get_shape().as_list() x_t = tf.squeeze(x_t) x_t = normalizer.backward_process(x_t) f0_s = features['f0'] f0_t = convert_f0(f0_s, self.args.src, self.args.trg) sample = dict() sample['features'] = features sample['x_t'] = x_t sample['f0_t'] = f0_t #sample['y_t'] = y_t_id return sample
def main(unused_args=None): # args(sys.argv) if args.model is None: raise ValueError( '\n You MUST specify `model`.' +\ '\n Use `python --help` to see applicable options.' ) module = import_module(args.module, package=None) MODEL = getattr(module, args.model) FS = 16000 with open(args.speaker_list) as fp: SPEAKERS = [l.strip() for l in fp.readlines()] logdir, ckpt = os.path.split(args.checkpoint) if 'VAE' in logdir: _path_to_arch, _ = os.path.split(logdir) else: _path_to_arch = logdir arch = tf.gfile.Glob(os.path.join(_path_to_arch, 'architecture*.json')) if len(arch) != 1: print('WARNING: found more than 1 architecture files!') arch = arch[0] with open(arch) as fp: arch = json.load(fp) normalizer = Tanhize( xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)), xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)), ) features = read_whole_features(args.file_pattern.format(args.src)) x = normalizer.forward_process(features['sp']) x = nh_to_nhwc(x) y_s = features['speaker'] y_t_id = tf.placeholder(dtype=tf.int64, shape=[ 1, ]) y_t = y_t_id * tf.ones(shape=[ tf.shape(x)[0], ], dtype=tf.int64) f0_t = features['f0'] # f0_t = convert_f0(f0_s, args.src, args.trg) # f0_s_convert = tf.cast(f0_s,dtype=tf.int64) f0_t_convert = tf.cast(f0_t, dtype=tf.int64) machine = MODEL(arch, is_training=False) z = machine.encode(x) x_t = machine.decode(z, y_t, f0_t_convert) # NOTE: the API yields NHWC format x_t = tf.squeeze(x_t) x_t = normalizer.backward_process(x_t) output_dir = get_default_output(args.output_dir) saver = tf.train.Saver() sv = tf.train.Supervisor(logdir=output_dir) with sv.managed_session() as sess: load(saver, sess, logdir, ckpt=ckpt) print() while True: try: s_time = time.perf_counter() feat, f0, sp = [features, f0_t, x_t], feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}) feat.update({'sp': sp, 'f0': f0}) y = pw2wav(feat) oFilename = make_output_wav_name(output_dir, feat['filename']) print('\rProcessing {}'.format(oFilename), end='') e_time = time.perf_counter() print('\n') print('Time_sp: {}'.format(e_time - s_time)) print('\n') sf.write(oFilename, y, FS) except KeyboardInterrupt: break finally: pass print()
def main(): checkpoint = torch.load(args.model_path) encoder = Encoder() generator = G() encoder.load_state_dict(checkpoint['encoder_state_dict']) generator.load_state_dict(checkpoint['generator_state_dict']) encoder.cuda() generator.cuda() FS = 16000 SPEAKERS = list() with open(args.speaker_list) as fp: SPEAKERS = [l.strip() for l in fp.readlines()] normalizer = Tanhize( xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)), xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)), ) total_sp_speaker = [] total_speaker = [] total_features = read_whole_features(args.file_pattern.format(args.src)) for features in total_features: x = normalizer.forward_process(features['sp']) x = nh_to_nchw(x) y_s = features['speaker'] #print('????',SPEAKERS.index(args.trg)) #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,]) #y_t = y_t_id * torch.ones(shape=[x.shape[0],], dtype=torch.int64) #print(y_t) x = Variable(torch.FloatTensor(x).cuda(), requires_grad=False) y_t = torch.ones((x.shape[0])).view(-1, 1) * (SPEAKERS.index(args.trg)) z, _ = encoder(x) x_t, _ = generator(z, y_t) # NOTE: the API yields NHWC format x_t = torch.squeeze(x_t) #print('x_t.shape',x_t.shape) x_t = normalizer.backward_process(x_t) #print('backward_process.finish') x_s, _ = generator(z, y_s) x_s = torch.squeeze(x_s) x_s = normalizer.backward_process(x_s) f0_s = features['f0'] #print(f0_s.shape) f0_t = convert_f0(f0_s, args.src, args.trg) #output_dir = get_default_output(args.output_dir) output_dir = args.output_dir features['sp'] = x_t.cpu().data.numpy() features['f0'] = f0_t #print('=-=-=-=-=-=') y = pw2wav(features) oFilename = make_output_wav_name(output_dir, features['filename']) print(f'\r Processing {oFilename}', end=' ') if not os.path.exists(os.path.dirname(oFilename)): try: os.makedirs(os.path.dirname(oFilename)) except OSError as exc: # Guard against race condition print('error') pass sf.write(oFilename, y, FS) #print('2: ',features['sp'].shape) #print('3: ',features['f0'].shape) print('\n==finish==')
def main(): logdir, ckpt = os.path.split(args.checkpoint) print('logdir:', logdir) print('ckpt:', ckpt) arch = args.arch with open(arch) as fp: arch = json.load(fp) normalizer = Tanhize( xmax=np.fromfile('./etc/xmax.npf'), xmin=np.fromfile('./etc/xmin.npf'), ) features = read_whole_features(args.file_pattern.format(args.src)) print "features shape:", features['sp'].shape x = normalizer.forward_process(features['sp']) x = nh_to_nchw(x) y_s = features['speaker'] y_t_id = tf.placeholder(dtype=tf.int64, shape=[ 1, ]) y_t = y_t_id * tf.ones(shape=[ tf.shape(x)[0], ], dtype=tf.int64) machine = MODEL(arch, args, False, False) with tf.variable_scope("encoder"): z, _ = machine.encoder(x, True) with tf.variable_scope("generator"): x_t = machine.generator(z, y_t, True) # NOTE: the API yields NHWC format x_t = tf.transpose(x, [0, 2, 3, 1]) print "x_t shape:", x_t.get_shape().as_list() x_t = tf.squeeze(x_t) x_t = normalizer.backward_process(x_t) f0_s = features['f0'] f0_t = convert_f0(f0_s, args.src, args.trg) output_dir = get_default_output(args.output_dir) print("output_dir########:", output_dir) saver = tf.train.Saver() sv = tf.train.Supervisor(logdir=output_dir) print "logdir:", logdir with sv.managed_session() as sess: #load(saver, sess, logdir, ckpt=ckpt) ckpt = os.path.join(logdir, ckpt) saver.restore(sess, ckpt) while True: try: print("Excuting") feat, f0, sp = [features, f0_t, x_t], feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}) print("Excuting.") feat.update({'sp': sp, 'f0': f0}) y = pw2wav(feat) print("Excuting..") sf.write( os.path.join( output_dir, os.path.splitext(os.path.split( feat['filename'])[-1])[0] + '.wav'), y, FS) print("Excuted") except: break