Esempio n. 1
0
def main():
	if args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
		full_frames = [cv2.imread(args.face)]
	else:
		video_stream = cv2.VideoCapture(args.face)
		length = int(video_stream.get(cv2.CAP_PROP_FRAME_COUNT))
		print("Number of frames in the input video: " + str(length))
		
		frames = []
		while 1:
			still_reading, frame = video_stream.read()
			if not still_reading:
				video_stream.release()
				break
			frames.append(frame)
			if len(frames) % 2000 == 0: print(len(frames))

			if len(frames) * (1./fps) >= args.max_sec: break

		full_frames = []
		ss = 0.
		es = (ss + (window_size / 1000.))
		mid_second = (ss + es) / 2.

		while int(mid_second * fps) < len(frames):
			full_frames.append(frames[int(mid_second * fps)])

			ss += (video_step_size_in_ms / 1000.)
			es = (ss + (window_size / 1000.))
			mid_second = (ss + es) / 2.

		print ("Number of frames to be used for inference: "+str(len(full_frames)))

	mfccs = loadmat(args.mat)['mfccs']
	mfcc_chunks = []

	i = 0
	time_ms = 0.
	frame_duration = (1./fps) * 1000.
	mfcc_mags = []
	while (i < (len(mfccs[0])-(mfcc_chunk_size - 1))):
		mfcc_chunks.append(mfccs[:,i:i + mfcc_chunk_size])
		mfcc_mags.append(np.sum(mfccs[:, i:i + mfcc_chunk_size]))
		time_ms += frame_duration
		i = int(time_ms // 10)

		if (time_ms / 1000.) > args.max_sec: break

	print ("Length of mfcc chunks: "+str(len(mfcc_chunks)))

	batch_size = args.lipgan_batch_size
	gen = datagen(full_frames.copy(), mfcc_chunks)

	for i, (img_batch, mfcc_batch, frames, coords) in enumerate(tqdm(gen, 
											total=int(np.ceil(float(len(mfcc_chunks))/batch_size)))):
		if i == 0:
			model = create_model(args)
			print ("Model Created")

			model.load_weights(args.checkpoint_path)
			print ("Model loaded")

			frame_h, frame_w = full_frames[0].shape[:-1]
			out = cv2.VideoWriter(path.join(args.results_dir, 'result.avi'), 
									cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h))

		pred = model.predict([img_batch, mfcc_batch])
		pred = pred * 255
		
		for p, f, c in zip(pred, frames, coords):
			y1, y2, x1, x2 = c
			p = cv2.resize(p, (x2 - x1, y2 - y1))

			f[y1:y2, x1:x2] = p
			out.write(f)
	out.release()
	command = 'ffmpeg -i {} -i {} -strict -2 -q:v 1 {}'.format(args.audio, path.join(args.results_dir, 'result.avi'), 
														path.join(args.results_dir, 'result_voice.avi'))
	subprocess.call(command, shell=True)
Esempio n. 2
0
parser.add_argument('--all_images', default='filenames.pkl', help='Filename for caching image paths')
args = parser.parse_args()

if path.exists(path.join(args.logdir, args.all_images)):
	args.all_images = pickle.load(open(path.join(args.logdir, args.all_images), 'rb'))
else:
	all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root)))
	pickle.dump(all_images, open(path.join(args.logdir, args.all_images), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
	args.all_images = all_images
	
print ("Will be training on {} images".format(len(args.all_images)))

if args.model == 'residual':
	model = create_model_residual(args, mel_step_size)
else:
	model = create_model(args, mel_step_size)

if args.resume:
	model.load_weights(args.resume)
	print('Resuming from : {}'.format(args.resume))

args.batch_size = args.n_gpu * args.batch_size
train_datagen = datagen(args)

class WeightsSaver(Callback):
	def __init__(self, N, weight_path):
		self.N = N
		self.batch = 0
		self.weight_path = weight_path

	def on_batch_end(self, batch, logs={}):
Esempio n. 3
0
if path.exists(path.join(args.logdir, args.all_images)):
    args.all_images = pickle.load(
        open(path.join(args.logdir, args.all_images), 'rb'))
else:
    all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root)))
    pickle.dump(all_images,
                open(path.join(args.logdir, args.all_images), 'wb'),
                protocol=pickle.HIGHEST_PROTOCOL)
    args.all_images = all_images

print("Will be training on {} images".format(len(args.all_images)))

if args.model == 'residual':
    gen = mg.create_model_residual(args)
else:
    gen = mg.create_model(args)

disc = md.create_model(args)
comb = mg.create_combined_model(gen, disc, args)

if args.resume_gen:
    gen.load_weights(args.resume_gen)
    print('Resuming generator from : {}'.format(args.resume_gen))
if args.resume_disc:
    disc.load_weights(args.resume_disc)
    print('Resuming discriminator from : {}'.format(args.resume_disc))

args.batch_size = args.n_gpu * args.batch_size
train_datagen = datagen(args)

comb.summary()
def al_train():
    with tf.Session() as sess:

        vocab, rev_vocab, dev_set, train_set = gens.prepare_data(gen_config)
        for set in train_set:
            print("al train len: ", len(set))

        train_bucket_sizes = [
            len(train_set[b]) for b in xrange(len(gen_config.buckets))
        ]
        train_total_size = float(sum(train_bucket_sizes))
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        disc_model = h_disc.create_model(sess, disc_config,
                                         disc_config.name_model)
        gen_model = gens.create_model(sess,
                                      gen_config,
                                      forward_only=False,
                                      name_scope=gen_config.name_model)

        current_step = 0
        step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0
        gen_loss_summary = tf.Summary()
        disc_loss_summary = tf.Summary()

        while True:
            current_step += 1
            start_time = time.time()
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            print(
                "==================Update Discriminator: %d====================="
                % current_step)
            # 1.Sample (X,Y) from real disc_data
            encoder_inputs, decoder_inputs, target_weights, source_inputs, source_outputs = gen_model.get_batch(
                train_set, bucket_id, gen_config.batch_size)

            # 2.Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X)
            train_query, train_answer, train_labels = disc_train_data(
                sess,
                gen_model,
                vocab,
                source_inputs,
                source_outputs,
                encoder_inputs,
                decoder_inputs,
                target_weights,
                bucket_id,
                mc_search=False)
            if current_step % 200 == 0:
                print("train_query: ", len(train_query))
                print("train_answer: ", len(train_answer))
                print("train_labels: ", len(train_labels))
                for i in xrange(len(train_query)):
                    print("label: ", train_labels[i])
                    print("train_answer_sentence: ", train_answer[i])
                    print(" ".join([
                        tf.compat.as_str(rev_vocab[output])
                        for output in train_answer[i]
                    ]))

            train_query = np.transpose(train_query)
            train_answer = np.transpose(train_answer)

            # 3.Update D using (X, Y ) as positive examples and(X, ^Y) as negative examples
            _, disc_step_loss = disc_step(sess,
                                          bucket_id,
                                          disc_model,
                                          train_query,
                                          train_answer,
                                          train_labels,
                                          forward_only=False)
            disc_loss += disc_step_loss / disc_config.steps_per_checkpoint

            print(
                "==================Update Generator: %d========================="
                % current_step)
            # 1.Sample (X,Y) from real disc_data
            update_gen_data = gen_model.get_batch(train_set, bucket_id,
                                                  gen_config.batch_size)
            encoder, decoder, weights, source_inputs, source_outputs = update_gen_data

            # 2.Sample (X,Y) and (X, ^Y) through ^Y ~ G(*|X) with Monte Carlo search
            train_query, train_answer, train_labels = disc_train_data(
                sess,
                gen_model,
                vocab,
                source_inputs,
                source_outputs,
                encoder,
                decoder,
                weights,
                bucket_id,
                mc_search=True)

            if current_step % 200 == 0:
                for i in xrange(len(train_query)):
                    print("label: ", train_labels[i])
                    print(" ".join([
                        tf.compat.as_str(rev_vocab[output])
                        for output in train_answer[i]
                    ]))

            train_query = np.transpose(train_query)
            train_answer = np.transpose(train_answer)

            # 3.Compute Reward r for (X, ^Y ) using D.---based on Monte Carlo search
            reward, _ = disc_step(sess,
                                  bucket_id,
                                  disc_model,
                                  train_query,
                                  train_answer,
                                  train_labels,
                                  forward_only=True)
            batch_reward += reward / gen_config.steps_per_checkpoint
            print("step_reward: ", reward)

            # 4.Update G on (X, ^Y ) using reward r
            gan_adjusted_loss, gen_step_loss, _ = gen_model.step(
                sess,
                encoder,
                decoder,
                weights,
                bucket_id,
                forward_only=False,
                reward=reward,
                up_reward=True,
                debug=True)
            gen_loss += gen_step_loss / gen_config.steps_per_checkpoint

            print("gen_step_loss: ", gen_step_loss)
            print("gen_step_adjusted_loss: ", gan_adjusted_loss)

            # 5.Teacher-Forcing: Update G on (X, Y )
            t_adjusted_loss, t_step_loss, a = gen_model.step(
                sess, encoder, decoder, weights, bucket_id, forward_only=False)
            t_loss += t_step_loss / gen_config.steps_per_checkpoint

            print("t_step_loss: ", t_step_loss)
            print("t_adjusted_loss", t_adjusted_loss)

            if current_step % gen_config.steps_per_checkpoint == 0:

                step_time += (time.time() -
                              start_time) / gen_config.steps_per_checkpoint

                print(
                    "current_steps: %d, step time: %.4f, disc_loss: %.3f, gen_loss: %.3f, t_loss: %.3f, reward: %.3f"
                    % (current_step, step_time, disc_loss, gen_loss, t_loss,
                       batch_reward))

                disc_loss_value = disc_loss_summary.value.add()
                disc_loss_value.tag = disc_config.name_loss
                disc_loss_value.simple_value = float(disc_loss)

                gen_global_steps = sess.run(gen_model.global_step)
                gen_loss_value = gen_loss_summary.value.add()
                gen_loss_value.tag = gen_config.name_loss
                gen_loss_value.simple_value = float(gen_loss)
                t_loss_value = gen_loss_summary.value.add()
                t_loss_value.tag = gen_config.teacher_loss
                t_loss_value.simple_value = float(t_loss)
                batch_reward_value = gen_loss_summary.value.add()
                batch_reward_value.tag = gen_config.reward_name
                batch_reward_value.simple_value = float(batch_reward)

                if current_step % (gen_config.steps_per_checkpoint * 2) == 0:
                    print("current_steps: %d, save disc model" % current_step)
                    disc_ckpt_dir = os.path.abspath(
                        os.path.join(disc_config.train_dir, "checkpoints"))
                    if not os.path.exists(disc_ckpt_dir):
                        os.makedirs(disc_ckpt_dir)
                    disc_model_path = os.path.join(disc_ckpt_dir, "disc.model")
                    disc_model.saver.save(sess,
                                          disc_model_path,
                                          global_step=disc_model.global_step)

                    print("current_steps: %d, save gen model" % current_step)
                    gen_ckpt_dir = os.path.abspath(
                        os.path.join(gen_config.train_dir, "checkpoints"))
                    if not os.path.exists(gen_ckpt_dir):
                        os.makedirs(gen_ckpt_dir)
                    gen_model_path = os.path.join(gen_ckpt_dir, "gen.model")
                    gen_model.saver.save(sess,
                                         gen_model_path,
                                         global_step=gen_model.global_step)

                step_time, disc_loss, gen_loss, t_loss, batch_reward = 0.0, 0.0, 0.0, 0.0, 0.0
                sys.stdout.flush()
def main():
    if args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
        full_frames = [cv2.imread(args.face)]
    else:
        video_stream = cv2.VideoCapture(args.face)
        length = int(video_stream.get(cv2.CAP_PROP_FRAME_COUNT))
        print("Number of frames in the input video: " + str(length))

        frames = []
        while 1:
            still_reading, frame = video_stream.read()
            if not still_reading:
                video_stream.release()
                break
            frames.append(frame)
            if len(frames) % 2000 == 0: print(len(frames))

            if len(frames) * (1. / fps) >= args.max_sec: break

        full_frames = []
        ss = 0.
        es = (ss + (window_size / 1000.))
        mid_second = (ss + es) / 2.

        while int(mid_second * fps) < len(frames):
            full_frames.append(frames[int(mid_second * fps)])

            ss += (video_step_size_in_ms / 1000.)
            es = (ss + (window_size / 1000.))
            mid_second = (ss + es) / 2.

        print("Number of frames to be used for inference: " +
              str(len(full_frames)))

    mfccs = loadmat(args.mat)['mfccs']
    mfcc_chunks = []

    i = 0
    time_ms = 0.
    frame_duration = (1. / fps) * 1000.
    mfcc_mags = []
    while (i < (len(mfccs[0]) - (mfcc_chunk_size - 1))):
        mfcc_chunks.append(mfccs[:, i:i + mfcc_chunk_size])
        mfcc_mags.append(np.sum(mfccs[:, i:i + mfcc_chunk_size]))
        time_ms += frame_duration
        i = int(time_ms // 10)

        if (time_ms / 1000.) > args.max_sec: break

    print("Length of mfcc chunks: " + str(len(mfcc_chunks)))

    # for frame in full_frames:
    # 	print(np.shape(frame))

    batch_size = args.lipgan_batch_size
    gen = datagen(full_frames.copy(), mfcc_chunks)

    for i, (img_batch, mfcc_batch, frames, coords) in enumerate(
            tqdm(gen,
                 total=int(np.ceil(float(len(mfcc_chunks)) / batch_size)))):
        if i == 0:
            model = create_model(args)
            print("Model Created")

            model.load_weights(args.checkpoint_path)
            print("Model loaded")

            frame_h, frame_w = full_frames[0].shape[:-1]
            out = cv2.VideoWriter(path.join(args.results_dir, 'result.avi'),
                                  cv2.VideoWriter_fourcc(*'DIVX'), fps,
                                  (frame_w, frame_h))

        pred = model.predict([img_batch, mfcc_batch])
        pred = pred * 255

        for p, f, c in zip(pred, frames, coords):
            y1, y2, x1, x2 = c

            # Bounds checking for 1280x720 images
            x1 = max(x1, 0)
            y1 = max(y1, 0)
            x2 = min(x2, 1280)
            y2 = min(y2, 720)

            p = cv2.resize(p, (x2 - x1, y2 - y1))

            # Blending. Make alpha array, with strong alpha at edges
            alphaArr = np.zeros((p.shape[0], p.shape[1]))
            blends = np.array((0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0))

            # Top
            for col in range(10):
                alphaArr[:, col] = np.repeat(blends[col], p.shape[0])

            # Bottom
            i = 0
            for col in reversed(range(p.shape[1] - 10, p.shape[1])):
                alphaArr[:, col] = np.repeat(blends[i], p.shape[0])
                i = i + 1

            # Left
            for row in range(10):
                alphaArr[row, :] = np.repeat(blends[row], p.shape[1])

            # Right
            i = 0
            for row in reversed(range(p.shape[0] - 10, p.shape[0])):
                alphaArr[row, :] = np.repeat(blends[i], p.shape[1])
                i = i + 1

            # print(np.shape(p))
            # print(x1,x2,y1,y2)
            # print(np.shape(f[y1:y2,x1:x2,:]))
            pBlendR = (alphaArr * f[y1:y2, x1:x2, 0]) + (
                (1 - alphaArr) * p[:, :, 0])
            pBlendG = (alphaArr * f[y1:y2, x1:x2, 1]) + (
                (1 - alphaArr) * p[:, :, 1])
            pBlendB = (alphaArr * f[y1:y2, x1:x2, 2]) + (
                (1 - alphaArr) * p[:, :, 2])

            f[y1:y2, x1:x2, 0] = pBlendR
            f[y1:y2, x1:x2, 1] = pBlendG
            f[y1:y2, x1:x2, 2] = pBlendB

            #out.write(cv2.rectangle(f,(x1,y1),(x2,y2),(255,0,0)))
            out.write(f)

    command = 'ffmpeg -i {} -i {} -strict -2 {}'.format(
        args.audio, path.join(args.results_dir, 'result.avi'),
        path.join(args.results_dir, 'result_voice.mp4'))
    subprocess.call(command, shell=True)
def main():
    if args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
        full_frames = [cv2.imread(args.face)]
    else:
        video_stream = cv2.VideoCapture(args.face)

        full_frames = []
        while 1:
            still_reading, frame = video_stream.read()
            if not still_reading:
                video_stream.release()
                break
            full_frames.append(frame)
            if len(full_frames) % 2000 == 0: print(len(full_frames))

            if len(full_frames) * (1. / fps) >= args.max_sec: break

        print("Number of frames available for inference: " +
              str(len(full_frames)))

    wav = audio.load_wav(args.audio, 16000)
    mel = audio.melspectrogram(wav)
    print(mel.shape)

    if np.isnan(mel.reshape(-1)).sum() > 0:
        raise ValueError('Mel contains nan!')

    mel_chunks = []
    i = 0
    while 1:
        start_idx = int(i * mel_idx_multiplier)
        if start_idx + mel_step_size > len(mel[0]):
            break
        mel_chunks.append(mel[:, start_idx:start_idx + mel_step_size])
        i += 1

    print("Length of mel chunks: {}".format(len(mel_chunks)))

    batch_size = args.lipgan_batch_size
    gen = datagen(full_frames.copy(), mel_chunks)

    for i, (img_batch, mel_batch, frames, coords) in enumerate(
            tqdm(gen,
                 total=int(np.ceil(float(len(mel_chunks)) / batch_size)))):
        if i == 0:
            model = create_model(args, mel_step_size)
            print("Model Created")

            model.load_weights(args.checkpoint_path)
            print("Model loaded")

            frame_h, frame_w = full_frames[0].shape[:-1]
            out = cv2.VideoWriter(path.join(args.results_dir, 'result.avi'),
                                  cv2.VideoWriter_fourcc(*'DIVX'), fps,
                                  (frame_w, frame_h))

        pred = model.predict([img_batch, mel_batch])
        pred = pred * 255

        for p, f, c in zip(pred, frames, coords):
            y1, y2, x1, x2 = c
            p = cv2.resize(p, (x2 - x1, y2 - y1))

            f[y1:y2, x1:x2] = p
            out.write(f)

    out.release()

    command = 'ffmpeg -i {} -i {} -strict -2 -q:v 1 {}'.format(
        args.audio, path.join(args.results_dir, 'result.avi'),
        path.join(args.results_dir, 'result_voice.avi'))
    subprocess.call(command, shell=True)
Esempio n. 7
0
parser.add_argument('--all_images', default='filenames.pkl', help='Filename for caching image paths')
args = parser.parse_args()

if path.exists(path.join(args.logdir, args.all_images)):
	args.all_images = pickle.load(open(path.join(args.logdir, args.all_images), 'rb'))
else:
	all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root)))
	pickle.dump(all_images, open(path.join(args.logdir, args.all_images), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
	args.all_images = all_images

print ("Will be training on {} images".format(len(args.all_images)))

if args.model == 'residual':
	gen = mg.create_model_residual(args, mel_step_size)
else:
	gen = mg.create_model(args, mel_step_size)

disc = md.create_model(args, mel_step_size)
comb = mg.create_combined_model(gen, disc, args, mel_step_size)

if args.resume_gen:
	gen.load_weights(args.resume_gen)
	print('Resuming generator from : {}'.format(args.resume_gen))
if args.resume_disc:
	disc.load_weights(args.resume_disc)
	print('Resuming discriminator from : {}'.format(args.resume_disc))

args.batch_size = args.n_gpu * args.batch_size
train_datagen = datagen(args)

comb.summary()
Esempio n. 8
0
if path.exists(path.join(args.logdir, args.all_images)):
    args.all_images = pickle.load(
        open(path.join(args.logdir, args.all_images), 'rb'))
else:
    all_images = glob(path.join("{}/train/*/*/*.jpg".format(args.data_root)))
    pickle.dump(all_images,
                open(path.join(args.logdir, args.all_images), 'wb'),
                protocol=pickle.HIGHEST_PROTOCOL)
    args.all_images = all_images

print("Will be training on {} images".format(len(args.all_images)))

if args.model == 'residual':
    model = create_model_residual(args)
else:
    model = create_model(args)

if args.resume:
    model.load_weights(args.resume)
    print('Resuming from : {}'.format(args.resume))

args.batch_size = args.n_gpu * args.batch_size
train_datagen = datagen(args)


class WeightsSaver(Callback):
    def __init__(self, N, weight_path):
        self.N = N
        self.batch = 0
        self.weight_path = weight_path