def famosGeneration(content, noise, templatePatch, bVis=False): if opt.multiScale > 0: x = netMix(content, noise, templatePatch) else: x = netMix(content, noise) a5 = x[:, -5:] A = 4 * nn.functional.tanh(x[:, :-5]) ##smooths probs somehow A = nn.functional.softmax(1 * (A - A.detach().max()), dim=1) mixed = getTemplateMixImage(A, templatePatch) alpha = nn.functional.sigmoid(a5[:, 3:4]) beta = nn.functional.sigmoid(a5[:, 4:5]) fake = blend(nn.functional.tanh(a5[:, :3]), mixed, alpha, beta) ##call second Unet to refine further if opt.refine: a5 = netRefine( torch.cat([content, mixed, fake, a5[:, :3], tvArray(A)], 1), noise) alpha = nn.functional.sigmoid(a5[:, 3:4]) beta = nn.functional.sigmoid(a5[:, 4:5]) fake = blend(nn.functional.tanh(a5[:, :3]), mixed, alpha, beta) if bVis: return fake, torch.cat([alpha, beta, (alpha + beta) * 0.5], 1), A, mixed #alpha return fake
def live_feed(): emojis = get_emojis() while True: img = vcam.read()[1] gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_faces = detector(gray_img) face = gray_faces[0] shape_68 = shape_predictor(img, face) shape = face_utils.shape_to_np(shape_68) (x, y, w, h) = face_utils.rect_to_bb(face) faceAligned = fa.align(img, gray_img, face) faceAligned = cv2.resize(faceAligned, (256, 256)) cv2.imshow('aligned', faceAligned) cv2.imshow('face ', img[y:y + h, x:x + w]) pred_probab, pred_class = keras_predict(model, faceAligned) print(pred_probab, pred_class) img = blend(img, emojis[pred_class], (x, y, w, h)) cv2.imshow('img', img) keypress = cv2.waitKey(1) if keypress % 256 == 27: print("Escape is pressed, quiting...") vcam.release() cv2.destroyAllWindows() break
def noised(): noise = self.generator(inputs, self.relative) if self.relative: noise = tf.multiply(inputs, noise) return inputs + tf.multiply(noise, amount) else: # Blend the two return blend(inputs, noise, amount)
def training_step(self, train_batch, batch_idx): x, _, context, w = train_batch blended_x, blended_c, _ = blend(x, context, w) logits = self.forward(blended_x) pred = torch.sigmoid(logits) loss = utility_score(blended_c, pred, mode='loss') self.log('train_loss', loss) return loss
def process_video(settings: dict): input_name = settings['input_name'] if settings['cv_colourfix']: utils.colour_fix(input_name) input_video = cv2.VideoCapture(input_name) if not input_video or not input_video.isOpened(): raise Exception('Failed to read video.') output_name = settings['output_name'] blend_mode = settings['blend_mode'] blend_range = float(settings['blend_range']) video_res = [ int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) ] output_res = utils.parse_resolution(video_res, settings['resolution']) video_fps = int(input_dir.get(cv2.CAP_PROP_FPS)) output_fps = int(settings['output_fps']) fps_ratio = int(video_fps / output_fps) video_frames = input_video.get(cv2.CAP_PROP_FRAME_COUNT) output_frames = int(video_frames / fps_ratio) codec = settings['fourcc'] print(f'Input Res: {video_res} \n', f'Output Res: {output_res} \n' ) blended_frames = int(blend_range * fps_ratio) weight = weights.get_weight(blend_mode, blended_frames) output_video = cv2.VideoWriter( filename = f'no-audio_{output_name}', fourcc = cv2.VideoWriter_fourcc(*codec), fps = output_fps, frameSize = (output_res[0], output_res[1]) ) need_resize = video_res != output_res time_list = [0]*15 imgs = [] input_video.set(cv2.CAP_PROP_POS_FRAMES, 0) # Load all Frames for _ in range(0, blended_frames): _, frame = input_video.read() if need_resize: frame = cv2.resize( frame, (output_res[0], output_res[1]) ) imgs += [frame] output_video.write( utils.blend( np.asarray(imgs), weight) ) del imgs[:fps_ratio] # Load remaining unloaded frames for i in range(1, output_frames): time_start = time.process_time() for _ in range(0, fps_ratio): _, frame = input_video.read() if need_resize: frame = cv2.resize( frame, (output_res[0], output_res[1]) ) imgs.append(frame) output_video.write( utils.blend( np.asarray(imgs), weight )) del imgs[:fps_ratio] elapsed_time = time.process_time()-time_start time_list.pop(0) time_list.append(elapsed_time) average_time = sum(time_list) / len(time_list) print('Performance:', '%.3f' % average_time, 'seconds/frame -', '%.3f' % (1/average_time), 'FPS' ) print('Estimation:', time.strftime('%H:%M:%S', time.gmtime(math.ceil(avg_time*int(output_nframes-i))))) print(f'Progress: {i}/{output_frames} -', '%.3f' % (100*i/output_frames), '%') output_video.release() input_video.release() utils.add_audio(input_name, output_name) if settings['cv_colourfix']: Path(input_name).unlink() Path('to-fix_{input_name}').rename(input_name)
idx_to_label = {int(key): value[1] for key, value in idx_to_label.items()} # set device device = torch.device('cuda:%d' % args.gpu_no if args.gpu_no >= 0 else 'cpu') network = CAM(args.network).to(device) network.eval() image = imload(args.image, args.imsize, args.cropsize).to(device) # make class activation map with torch.no_grad(): prob, cls, cam = network(image, topk=args.topk) # tensor to pil image img_pil = imshow(image) img_pil.save(args.save_path + "input.jpg") for k in range(args.topk): print("Predict '%s' with %2.4f probability" % (idx_to_label[cls[k]], prob[k])) cam_ = cam[k].squeeze().cpu().data.numpy() cam_pil = array_to_cam(cam_) cam_pil.save(args.save_path + "cam_class__%s_prob__%2.4f.jpg" % (idx_to_label[cls[k]], prob[k])) # overlay image and class activation map blended_cam = blend(img_pil, cam_pil, args.blend_alpha) blended_cam.save(args.save_path + "blended_class__%s_prob__%2.4f.jpg" % (idx_to_label[cls[k]], prob[k]))
def transition(from_img, to_img, duration=10, fps=30): num_frames = duration * fps # Number of frames needed to produce a video of length duration with fps for alpha in np.linspace(0.0, 1.0, num_frames): blended = utils.blend(from_img, to_img, alpha) yield cv2.cvtColor(blended.astype(np.uint8), cv2.COLOR_GRAY2RGB)