def main(): if args.face.split('.')[1] in ['jpg', 'png', 'jpeg']: full_frames = [cv2.imread(args.face)] else: video_stream = cv2.VideoCapture(args.face) length = int(video_stream.get(cv2.CAP_PROP_FRAME_COUNT)) print("Number of frames in the input video: " + str(length)) frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break frames.append(frame) if len(frames) % 2000 == 0: print(len(frames)) if len(frames) * (1./fps) >= args.max_sec: break full_frames = [] ss = 0. es = (ss + (window_size / 1000.)) mid_second = (ss + es) / 2. while int(mid_second * fps) < len(frames): full_frames.append(frames[int(mid_second * fps)]) ss += (video_step_size_in_ms / 1000.) es = (ss + (window_size / 1000.)) mid_second = (ss + es) / 2. print ("Number of frames to be used for inference: "+str(len(full_frames))) mfccs = loadmat(args.mat)['mfccs'] mfcc_chunks = [] i = 0 time_ms = 0. frame_duration = (1./fps) * 1000. mfcc_mags = [] while (i < (len(mfccs[0])-(mfcc_chunk_size - 1))): mfcc_chunks.append(mfccs[:,i:i + mfcc_chunk_size]) mfcc_mags.append(np.sum(mfccs[:, i:i + mfcc_chunk_size])) time_ms += frame_duration i = int(time_ms // 10) if (time_ms / 1000.) > args.max_sec: break print ("Length of mfcc chunks: "+str(len(mfcc_chunks))) batch_size = args.lipgan_batch_size gen = datagen(full_frames.copy(), mfcc_chunks) for i, (img_batch, mfcc_batch, frames, coords) in enumerate(tqdm(gen, total=int(np.ceil(float(len(mfcc_chunks))/batch_size)))): if i == 0: model = create_model(args) print ("Model Created") model.load_weights(args.checkpoint_path) print ("Model loaded") frame_h, frame_w = full_frames[0].shape[:-1] out = cv2.VideoWriter(path.join(args.results_dir, 'result.avi'), cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) pred = model.predict([img_batch, mfcc_batch]) pred = pred * 255 for p, f, c in zip(pred, frames, coords): y1, y2, x1, x2 = c p = cv2.resize(p, (x2 - x1, y2 - y1)) f[y1:y2, x1:x2] = p out.write(f) out.release() command = 'ffmpeg -i {} -i {} -strict -2 -q:v 1 {}'.format(args.audio, path.join(args.results_dir, 'result.avi'), path.join(args.results_dir, 'result_voice.avi')) subprocess.call(command, shell=True)
parser.add_argument('--all_images', default='filenames.pkl', help='Filename for caching image paths') args = parser.parse_args() if path.exists(path.join(args.logdir, args.all_images)): args.all_images = pickle.load(open(path.join(args.logdir, args.all_images), 'rb')) else: all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root))) pickle.dump(all_images, open(path.join(args.logdir, args.all_images), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) args.all_images = all_images print ("Will be training on {} images".format(len(args.all_images))) if args.model == 'residual': model = create_model_residual(args, mel_step_size) else: model = create_model(args, mel_step_size) if args.resume: model.load_weights(args.resume) print('Resuming from : {}'.format(args.resume)) args.batch_size = args.n_gpu * args.batch_size train_datagen = datagen(args) class WeightsSaver(Callback): def __init__(self, N, weight_path): self.N = N self.batch = 0 self.weight_path = weight_path def on_batch_end(self, batch, logs={}):
if path.exists(path.join(args.logdir, args.all_images)): args.all_images = pickle.load( open(path.join(args.logdir, args.all_images), 'rb')) else: all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root))) pickle.dump(all_images, open(path.join(args.logdir, args.all_images), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) args.all_images = all_images print("Will be training on {} images".format(len(args.all_images))) if args.model == 'residual': gen = mg.create_model_residual(args) else: gen = mg.create_model(args) disc = md.create_model(args) comb = mg.create_combined_model(gen, disc, args) if args.resume_gen: gen.load_weights(args.resume_gen) print('Resuming generator from : {}'.format(args.resume_gen)) if args.resume_disc: disc.load_weights(args.resume_disc) print('Resuming discriminator from : {}'.format(args.resume_disc)) args.batch_size = args.n_gpu * args.batch_size train_datagen = datagen(args) comb.summary()
def al_train(): with tf.Session() as sess: vocab, rev_vocab, dev_set, train_set = gens.prepare_data(gen_config) for set in train_set: print("al train len: ", len(set)) train_bucket_sizes = [ len(train_set[b]) for b in xrange(len(gen_config.buckets)) ] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] disc_model = h_disc.create_model(sess, disc_config, disc_config.name_model) gen_model = gens.create_model(sess, gen_config, forward_only=False, name_scope=gen_config.name_model) current_step = 0 step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0 gen_loss_summary = tf.Summary() disc_loss_summary = tf.Summary() while True: current_step += 1 start_time = time.time() random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) print( "==================Update Discriminator: %d=====================" % current_step) # 1.Sample (X,Y) from real disc_data encoder_inputs, decoder_inputs, target_weights, source_inputs, source_outputs = gen_model.get_batch( train_set, bucket_id, gen_config.batch_size) # 2.Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X) train_query, train_answer, train_labels = disc_train_data( sess, gen_model, vocab, source_inputs, source_outputs, encoder_inputs, decoder_inputs, target_weights, bucket_id, mc_search=False) if current_step % 200 == 0: print("train_query: ", len(train_query)) print("train_answer: ", len(train_answer)) print("train_labels: ", len(train_labels)) for i in xrange(len(train_query)): print("label: ", train_labels[i]) print("train_answer_sentence: ", train_answer[i]) print(" ".join([ tf.compat.as_str(rev_vocab[output]) for output in train_answer[i] ])) train_query = np.transpose(train_query) train_answer = np.transpose(train_answer) # 3.Update D using (X, Y ) as positive examples and(X, ^Y) as negative examples _, disc_step_loss = disc_step(sess, bucket_id, disc_model, train_query, train_answer, train_labels, forward_only=False) disc_loss += disc_step_loss / disc_config.steps_per_checkpoint print( "==================Update Generator: %d=========================" % current_step) # 1.Sample (X,Y) from real disc_data update_gen_data = gen_model.get_batch(train_set, bucket_id, gen_config.batch_size) encoder, decoder, weights, source_inputs, source_outputs = update_gen_data # 2.Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X) with Monte Carlo search train_query, train_answer, train_labels = disc_train_data( sess, gen_model, vocab, source_inputs, source_outputs, encoder, decoder, weights, bucket_id, mc_search=True) if current_step % 200 == 0: for i in xrange(len(train_query)): print("label: ", train_labels[i]) print(" ".join([ tf.compat.as_str(rev_vocab[output]) for output in train_answer[i] ])) train_query = np.transpose(train_query) train_answer = np.transpose(train_answer) # 3.Compute Reward r for (X, ^Y ) using D.---based on Monte Carlo search reward, _ = disc_step(sess, bucket_id, disc_model, train_query, train_answer, train_labels, forward_only=True) batch_reward += reward / gen_config.steps_per_checkpoint print("step_reward: ", reward) # 4.Update G on (X, ^Y ) using reward r gan_adjusted_loss, gen_step_loss, _ = gen_model.step( sess, encoder, decoder, weights, bucket_id, forward_only=False, reward=reward, up_reward=True, debug=True) gen_loss += gen_step_loss / gen_config.steps_per_checkpoint print("gen_step_loss: ", gen_step_loss) print("gen_step_adjusted_loss: ", gan_adjusted_loss) # 5.Teacher-Forcing: Update G on (X, Y ) t_adjusted_loss, t_step_loss, a = gen_model.step( sess, encoder, decoder, weights, bucket_id, forward_only=False) t_loss += t_step_loss / gen_config.steps_per_checkpoint print("t_step_loss: ", t_step_loss) print("t_adjusted_loss", t_adjusted_loss) if current_step % gen_config.steps_per_checkpoint == 0: step_time += (time.time() - start_time) / gen_config.steps_per_checkpoint print( "current_steps: %d, step time: %.4f, disc_loss: %.3f, gen_loss: %.3f, t_loss: %.3f, reward: %.3f" % (current_step, step_time, disc_loss, gen_loss, t_loss, batch_reward)) disc_loss_value = disc_loss_summary.value.add() disc_loss_value.tag = disc_config.name_loss disc_loss_value.simple_value = float(disc_loss) gen_global_steps = sess.run(gen_model.global_step) gen_loss_value = gen_loss_summary.value.add() gen_loss_value.tag = gen_config.name_loss gen_loss_value.simple_value = float(gen_loss) t_loss_value = gen_loss_summary.value.add() t_loss_value.tag = gen_config.teacher_loss t_loss_value.simple_value = float(t_loss) batch_reward_value = gen_loss_summary.value.add() batch_reward_value.tag = gen_config.reward_name batch_reward_value.simple_value = float(batch_reward) if current_step % (gen_config.steps_per_checkpoint * 2) == 0: print("current_steps: %d, save disc model" % current_step) disc_ckpt_dir = os.path.abspath( os.path.join(disc_config.train_dir, "checkpoints")) if not os.path.exists(disc_ckpt_dir): os.makedirs(disc_ckpt_dir) disc_model_path = os.path.join(disc_ckpt_dir, "disc.model") disc_model.saver.save(sess, disc_model_path, global_step=disc_model.global_step) print("current_steps: %d, save gen model" % current_step) gen_ckpt_dir = os.path.abspath( os.path.join(gen_config.train_dir, "checkpoints")) if not os.path.exists(gen_ckpt_dir): os.makedirs(gen_ckpt_dir) gen_model_path = os.path.join(gen_ckpt_dir, "gen.model") gen_model.saver.save(sess, gen_model_path, global_step=gen_model.global_step) step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0 sys.stdout.flush()
def main(): if args.face.split('.')[1] in ['jpg', 'png', 'jpeg']: full_frames = [cv2.imread(args.face)] else: video_stream = cv2.VideoCapture(args.face) length = int(video_stream.get(cv2.CAP_PROP_FRAME_COUNT)) print("Number of frames in the input video: " + str(length)) frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break frames.append(frame) if len(frames) % 2000 == 0: print(len(frames)) if len(frames) * (1. / fps) >= args.max_sec: break full_frames = [] ss = 0. es = (ss + (window_size / 1000.)) mid_second = (ss + es) / 2. while int(mid_second * fps) < len(frames): full_frames.append(frames[int(mid_second * fps)]) ss += (video_step_size_in_ms / 1000.) es = (ss + (window_size / 1000.)) mid_second = (ss + es) / 2. print("Number of frames to be used for inference: " + str(len(full_frames))) mfccs = loadmat(args.mat)['mfccs'] mfcc_chunks = [] i = 0 time_ms = 0. frame_duration = (1. / fps) * 1000. mfcc_mags = [] while (i < (len(mfccs[0]) - (mfcc_chunk_size - 1))): mfcc_chunks.append(mfccs[:, i:i + mfcc_chunk_size]) mfcc_mags.append(np.sum(mfccs[:, i:i + mfcc_chunk_size])) time_ms += frame_duration i = int(time_ms // 10) if (time_ms / 1000.) > args.max_sec: break print("Length of mfcc chunks: " + str(len(mfcc_chunks))) # for frame in full_frames: # print(np.shape(frame)) batch_size = args.lipgan_batch_size gen = datagen(full_frames.copy(), mfcc_chunks) for i, (img_batch, mfcc_batch, frames, coords) in enumerate( tqdm(gen, total=int(np.ceil(float(len(mfcc_chunks)) / batch_size)))): if i == 0: model = create_model(args) print("Model Created") model.load_weights(args.checkpoint_path) print("Model loaded") frame_h, frame_w = full_frames[0].shape[:-1] out = cv2.VideoWriter(path.join(args.results_dir, 'result.avi'), cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) pred = model.predict([img_batch, mfcc_batch]) pred = pred * 255 for p, f, c in zip(pred, frames, coords): y1, y2, x1, x2 = c # Bounds checking for 1280x720 images x1 = max(x1, 0) y1 = max(y1, 0) x2 = min(x2, 1280) y2 = min(y2, 720) p = cv2.resize(p, (x2 - x1, y2 - y1)) # Blending. Make alpha array, with strong alpha at edges alphaArr = np.zeros((p.shape[0], p.shape[1])) blends = np.array((0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0)) # Top for col in range(10): alphaArr[:, col] = np.repeat(blends[col], p.shape[0]) # Bottom i = 0 for col in reversed(range(p.shape[1] - 10, p.shape[1])): alphaArr[:, col] = np.repeat(blends[i], p.shape[0]) i = i + 1 # Left for row in range(10): alphaArr[row, :] = np.repeat(blends[row], p.shape[1]) # Right i = 0 for row in reversed(range(p.shape[0] - 10, p.shape[0])): alphaArr[row, :] = np.repeat(blends[i], p.shape[1]) i = i + 1 # print(np.shape(p)) # print(x1,x2,y1,y2) # print(np.shape(f[y1:y2,x1:x2,:])) pBlendR = (alphaArr * f[y1:y2, x1:x2, 0]) + ( (1 - alphaArr) * p[:, :, 0]) pBlendG = (alphaArr * f[y1:y2, x1:x2, 1]) + ( (1 - alphaArr) * p[:, :, 1]) pBlendB = (alphaArr * f[y1:y2, x1:x2, 2]) + ( (1 - alphaArr) * p[:, :, 2]) f[y1:y2, x1:x2, 0] = pBlendR f[y1:y2, x1:x2, 1] = pBlendG f[y1:y2, x1:x2, 2] = pBlendB #out.write(cv2.rectangle(f,(x1,y1),(x2,y2),(255,0,0))) out.write(f) command = 'ffmpeg -i {} -i {} -strict -2 {}'.format( args.audio, path.join(args.results_dir, 'result.avi'), path.join(args.results_dir, 'result_voice.mp4')) subprocess.call(command, shell=True)
def main(): if args.face.split('.')[1] in ['jpg', 'png', 'jpeg']: full_frames = [cv2.imread(args.face)] else: video_stream = cv2.VideoCapture(args.face) full_frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break full_frames.append(frame) if len(full_frames) % 2000 == 0: print(len(full_frames)) if len(full_frames) * (1. / fps) >= args.max_sec: break print("Number of frames available for inference: " + str(len(full_frames))) wav = audio.load_wav(args.audio, 16000) mel = audio.melspectrogram(wav) print(mel.shape) if np.isnan(mel.reshape(-1)).sum() > 0: raise ValueError('Mel contains nan!') mel_chunks = [] i = 0 while 1: start_idx = int(i * mel_idx_multiplier) if start_idx + mel_step_size > len(mel[0]): break mel_chunks.append(mel[:, start_idx:start_idx + mel_step_size]) i += 1 print("Length of mel chunks: {}".format(len(mel_chunks))) batch_size = args.lipgan_batch_size gen = datagen(full_frames.copy(), mel_chunks) for i, (img_batch, mel_batch, frames, coords) in enumerate( tqdm(gen, total=int(np.ceil(float(len(mel_chunks)) / batch_size)))): if i == 0: model = create_model(args, mel_step_size) print("Model Created") model.load_weights(args.checkpoint_path) print("Model loaded") frame_h, frame_w = full_frames[0].shape[:-1] out = cv2.VideoWriter(path.join(args.results_dir, 'result.avi'), cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) pred = model.predict([img_batch, mel_batch]) pred = pred * 255 for p, f, c in zip(pred, frames, coords): y1, y2, x1, x2 = c p = cv2.resize(p, (x2 - x1, y2 - y1)) f[y1:y2, x1:x2] = p out.write(f) out.release() command = 'ffmpeg -i {} -i {} -strict -2 -q:v 1 {}'.format( args.audio, path.join(args.results_dir, 'result.avi'), path.join(args.results_dir, 'result_voice.avi')) subprocess.call(command, shell=True)
parser.add_argument('--all_images', default='filenames.pkl', help='Filename for caching image paths') args = parser.parse_args() if path.exists(path.join(args.logdir, args.all_images)): args.all_images = pickle.load(open(path.join(args.logdir, args.all_images), 'rb')) else: all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root))) pickle.dump(all_images, open(path.join(args.logdir, args.all_images), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) args.all_images = all_images print ("Will be training on {} images".format(len(args.all_images))) if args.model == 'residual': gen = mg.create_model_residual(args, mel_step_size) else: gen = mg.create_model(args, mel_step_size) disc = md.create_model(args, mel_step_size) comb = mg.create_combined_model(gen, disc, args, mel_step_size) if args.resume_gen: gen.load_weights(args.resume_gen) print('Resuming generator from : {}'.format(args.resume_gen)) if args.resume_disc: disc.load_weights(args.resume_disc) print('Resuming discriminator from : {}'.format(args.resume_disc)) args.batch_size = args.n_gpu * args.batch_size train_datagen = datagen(args) comb.summary()
if path.exists(path.join(args.logdir, args.all_images)): args.all_images = pickle.load( open(path.join(args.logdir, args.all_images), 'rb')) else: all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root))) pickle.dump(all_images, open(path.join(args.logdir, args.all_images), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) args.all_images = all_images print("Will be training on {} images".format(len(args.all_images))) if args.model == 'residual': model = create_model_residual(args) else: model = create_model(args) if args.resume: model.load_weights(args.resume) print('Resuming from : {}'.format(args.resume)) args.batch_size = args.n_gpu * args.batch_size train_datagen = datagen(args) class WeightsSaver(Callback): def __init__(self, N, weight_path): self.N = N self.batch = 0 self.weight_path = weight_path