def main(): parser = argparse.ArgumentParser() parser.add_argument('--source', type=Path, help='Videos root directory', required=True) parser.add_argument('--videodf', type=Path, help='Path to read the videos DataFrame') parser.add_argument('--facesfolder', type=Path, help='Faces output root directory', required=True) parser.add_argument('--facesdf', type=Path, help='Path to save the output DataFrame of faces', required=True) parser.add_argument('--checkpoint', type=Path, help='Path to save the temporary per-video outputs', required=True) parser.add_argument('--fpv', type=int, default=32, help='Frames per video') parser.add_argument( '--device', type=torch.device, default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), help='Device to use for face extraction') parser.add_argument('--collateonly', help='Only perform collation of pre-existing results', action='store_true') parser.add_argument('--noindex', help='Do not rebuild the index', action='store_false') parser.add_argument('--batch', type=int, help='Batch size', default=16) parser.add_argument('--threads', type=int, help='Number of threads', default=8) parser.add_argument('--offset', type=int, help='Offset to start extraction', default=0) parser.add_argument('--num', type=int, help='Number of videos to process', default=0) parser.add_argument('--lazycheck', action='store_true', help='Lazy check of existing video indexes') parser.add_argument('--deepcheck', action='store_true', help='Try to open every image') args = parser.parse_args() ## Parameters parsing device: torch.device = args.device source_dir: Path = args.source facedestination_dir: Path = args.facesfolder frames_per_video: int = args.fpv videodataset_path: Path = args.videodf facesdataset_path: Path = args.facesdf collateonly: bool = args.collateonly batch_size: int = args.batch threads: int = args.threads offset: int = args.offset num: int = args.num lazycheck: bool = args.lazycheck deepcheck: bool = args.deepcheck checkpoint_folder: Path = args.checkpoint index_enable: bool = args.noindex ## Parameters face_size = 512 print('Loading video DataFrame') df_videos = pd.read_pickle(videodataset_path) if num > 0: df_videos_process = df_videos.iloc[offset:offset + num] else: df_videos_process = df_videos.iloc[offset:] if not collateonly: ## Blazeface loading print('Loading face extractor') facedet = BlazeFace().to(device) facedet.load_weights("blazeface/blazeface.pth") facedet.load_anchors("blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames( x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) ## Face extraction with ThreadPoolExecutor(threads) as p: for batch_idx0 in tqdm(np.arange(start=0, stop=len(df_videos_process), step=batch_size), desc='Extracting faces'): tosave_list = list( p.map( partial( process_video, source_dir=source_dir, facedestination_dir=facedestination_dir, checkpoint_folder=checkpoint_folder, face_size=face_size, face_extractor=face_extractor, lazycheck=lazycheck, deepcheck=deepcheck, ), df_videos_process.iloc[batch_idx0:batch_idx0 + batch_size].iterrows())) for tosave in tosave_list: if tosave is not None: if len(tosave[2]): list(p.map(save_jpg, tosave[2])) tosave[1].parent.mkdir(parents=True, exist_ok=True) tosave[0].to_pickle(str(tosave[1])) if index_enable: # Collect checkpoints df_videos['nfaces'] = np.zeros(len(df_videos), np.uint8) faces_dataset = [] for idx, record in tqdm(df_videos.iterrows(), total=len(df_videos), desc='Collecting faces results'): # Checkpoint video_face_checkpoint_path = checkpoint_folder.joinpath( record['path']).with_suffix('.faces.pkl') if video_face_checkpoint_path.exists(): try: df_video_faces = pd.read_pickle( str(video_face_checkpoint_path)) # Fix same attribute issue df_video_faces = df_video_faces.rename( columns={'subject': 'videosubject'}, errors='ignore') nfaces = len( np.unique( df_video_faces.index.map(lambda x: int( x.split('_subj')[1].split('.jpg')[0])))) df_videos.loc[idx, 'nfaces'] = nfaces faces_dataset.append(df_video_faces) except Exception as e: print('Error while reading: {}'.format( video_face_checkpoint_path)) print(e) video_face_checkpoint_path.unlink() # Save videos with updated faces print('Saving videos DataFrame to {}'.format(videodataset_path)) df_videos.to_pickle(str(videodataset_path)) if offset is not None: if num is not None: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format( offset, num + offset)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}.pkl'.format(offset)) elif num is not None: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(0, num)) # Creates directory (if doesn't exist) facesdataset_path.parent.mkdir(parents=True, exist_ok=True) print('Saving faces DataFrame to {}'.format(facesdataset_path)) df_faces = pd.concat( faces_dataset, axis=0, ) df_faces['video'] = df_faces['video'].astype('category') for key in [ 'kp1x', 'kp1y', 'kp2x', 'kp2y', 'kp3x', 'kp3y', 'kp4x', 'kp4y', 'kp5x', 'kp5y', 'kp6x', 'kp6y', 'left', 'top', 'right', 'bottom', ]: df_faces[key] = df_faces[key].astype(np.int16) df_faces['videosubject'] = df_faces['videosubject'].astype(np.int8) # Eventually remove duplicates df_faces = df_faces.loc[~df_faces.index.duplicated(keep='first')] fields_to_preserve_from_video = [ i for i in ['folder', 'subject', 'scene', 'cluster', 'nfaces'] if i in df_videos ] df_faces = pd.merge(df_faces, df_videos[fields_to_preserve_from_video], left_on='video', right_index=True) df_faces.to_pickle(str(facesdataset_path)) print('Completed!')
# In[5]: transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False) # In[6]: facedet = BlazeFace().to(device) facedet.load_weights("../blazeface/blazeface.pth") facedet.load_anchors("../blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet) # ## Detect faces # In[7]: torch.cuda.is_available() # In[8]: torch.cuda.current_device() # In[9]: torch.cuda.device(0)
def main(argv): args = parse_args(argv) ## Parameters parsing device: torch.device = args.device source_dir: Path = args.source facedestination_dir: Path = args.facesfolder frames_per_video: int = args.fpv videodataset_path: Path = args.videodf facesdataset_path: Path = args.facesdf collateonly: bool = args.collateonly batch_size: int = args.batch threads: int = args.threads offset: int = args.offset num: int = args.num lazycheck: bool = args.lazycheck deepcheck: bool = args.deepcheck checkpoint_folder: Path = args.checkpoint index_enable: bool = args.noindex ## Parameters face_size = 512 print('Loading video DataFrame') df_videos = pd.read_pickle(videodataset_path) if num > 0: df_videos_process = df_videos.iloc[offset:offset + num] else: df_videos_process = df_videos.iloc[offset:] if not collateonly: ## Blazeface loading print('Loading face extractor') facedet = BlazeFace().to(device) facedet.load_weights("blazeface/blazeface.pth") facedet.load_anchors("blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) ## Face extraction with ThreadPoolExecutor(threads) as p: for batch_idx0 in tqdm(np.arange(start=0, stop=len(df_videos_process), step=batch_size), desc='Extracting faces'): tosave_list = list(p.map(partial(process_video, source_dir=source_dir, facedestination_dir=facedestination_dir, checkpoint_folder=checkpoint_folder, face_size=face_size, face_extractor=face_extractor, lazycheck=lazycheck, deepcheck=deepcheck, ), df_videos_process.iloc[batch_idx0:batch_idx0 + batch_size].iterrows())) for tosave in tosave_list: if tosave is not None: if len(tosave[2]): list(p.map(save_jpg, tosave[2])) tosave[1].parent.mkdir(parents=True, exist_ok=True) tosave[0].to_pickle(str(tosave[1])) if index_enable: # Collect checkpoints df_videos['nfaces'] = np.zeros(len(df_videos), np.uint8) faces_dataset = [] for idx, record in tqdm(df_videos.iterrows(), total=len(df_videos), desc='Collecting faces results'): # Checkpoint video_face_checkpoint_path = checkpoint_folder.joinpath(record['path']).with_suffix('.faces.pkl') if video_face_checkpoint_path.exists(): try: df_video_faces = pd.read_pickle(str(video_face_checkpoint_path)) # Fix same attribute issue df_video_faces = df_video_faces.rename(columns={'subject': 'videosubject'}, errors='ignore') nfaces = len( np.unique(df_video_faces.index.map(lambda x: int(x.split('_subj')[1].split('.jpg')[0])))) df_videos.loc[idx, 'nfaces'] = nfaces faces_dataset.append(df_video_faces) except Exception as e: print('Error while reading: {}'.format(video_face_checkpoint_path)) print(e) video_face_checkpoint_path.unlink() if len(faces_dataset) == 0: raise ValueError(f'No checkpoint found from face extraction. ' f'Is the the source path {source_dir} correct for the videos in your dataframe?') # Save videos with updated faces print('Saving videos DataFrame to {}'.format(videodataset_path)) df_videos.to_pickle(str(videodataset_path)) if offset > 0: if num > 0: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath( 'faces_df_from_video_{}_to_video_{}.pkl'.format(offset, num + offset)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(offset, num + offset)) else: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath('faces_df_from_video_{}.pkl'.format(offset)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}.pkl'.format(offset)) elif num > 0: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath( 'faces_df_from_video_{}_to_video_{}.pkl'.format(0, num)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(0, num)) else: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath('faces_df.pkl') # just a check if the path is a dir # Creates directory (if doesn't exist) facesdataset_path.parent.mkdir(parents=True, exist_ok=True) print('Saving faces DataFrame to {}'.format(facesdataset_path)) df_faces = pd.concat(faces_dataset, axis=0, ) df_faces['video'] = df_faces['video'].astype('category') for key in ['kp1x', 'kp1y', 'kp2x', 'kp2y', 'kp3x', 'kp3y', 'kp4x', 'kp4y', 'kp5x', 'kp5y', 'kp6x', 'kp6y', 'left', 'top', 'right', 'bottom', ]: df_faces[key] = df_faces[key].astype(np.int16) df_faces['videosubject'] = df_faces['videosubject'].astype(np.int8) # Eventually remove duplicates df_faces = df_faces.loc[~df_faces.index.duplicated(keep='first')] fields_to_preserve_from_video = [i for i in ['folder', 'subject', 'scene', 'cluster', 'nfaces'] if i in df_videos] df_faces = pd.merge(df_faces, df_videos[fields_to_preserve_from_video], left_on='video', right_index=True) df_faces.to_pickle(str(facesdataset_path)) print('Completed!')
def run_nb(modelname): # ## Parameters # In[2]: """ Choose an architecture between - EfficientNetB4 - EfficientNetB4ST - EfficientNetAutoAttB4 - EfficientNetAutoAttB4ST - Xception """ net_model = modelname """ Choose a training dataset between - DFDC - FFPP """ train_db = 'DFDC' # In[3]: device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') face_policy = 'scale' face_size = 224 frames_per_video = 32 # ## Initialization # In[4]: print('=' * 20) model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)] print('=' * 20) net = getattr(fornet, net_model)().eval().to(device) print('=' * 20) net.load_state_dict( load_url(model_url, map_location=device, check_hash=True)) # In[5]: transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False) # In[6]: facedet = BlazeFace().to(device) facedet.load_weights("../blazeface/blazeface.pth") facedet.load_anchors("../blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames( x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet) # ## Detect faces # In[7]: torch.cuda.is_available() # In[8]: torch.cuda.current_device() # In[9]: torch.cuda.device(0) # In[10]: torch.cuda.device_count() # In[11]: torch.cuda.get_device_name(0) # In[12]: vid_real_faces = face_extractor.process_video('samples/lynaeydofd.mp4') vid_fake_faces = face_extractor.process_video('samples/mqzvfufzoq.mp4') # In[13]: im_real_face = vid_real_faces[0]['faces'][0] im_fake_face = vid_fake_faces[0]['faces'][0] # In[14]: fig, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].imshow(im_real_face) ax[0].set_title('REAL') ax[1].imshow(im_fake_face) ax[1].set_title('FAKE') # ## Predict scores for each frame # In[15]: # For each frame, we consider the face with the highest confidence score found by BlazeFace (= frame['faces'][0]) faces_real_t = torch.stack([ transf(image=frame['faces'][0])['image'] for frame in vid_real_faces if len(frame['faces']) ]) faces_fake_t = torch.stack([ transf(image=frame['faces'][0])['image'] for frame in vid_fake_faces if len(frame['faces']) ]) with torch.no_grad(): faces_real_pred = net(faces_real_t.to(device)).cpu().numpy().flatten() faces_fake_pred = net(faces_fake_t.to(device)).cpu().numpy().flatten() # In[16]: fig, ax = plt.subplots(1, 2, figsize=(12, 4)) ax[0].stem([f['frame_idx'] for f in vid_real_faces if len(f['faces'])], expit(faces_real_pred), use_line_collection=True) ax[0].set_title('REAL') ax[0].set_xlabel('Frame') ax[0].set_ylabel('Score') ax[0].set_ylim([0, 1]) ax[0].grid(True) ax[1].stem([f['frame_idx'] for f in vid_fake_faces if len(f['faces'])], expit(faces_fake_pred), use_line_collection=True) ax[1].set_title('FAKE') ax[1].set_xlabel('Frame') ax[1].set_ylabel('Score') ax[1].set_ylim([0, 1]) ax[1].set_yticks([0, 1], ['REAL', 'FAKE']) # In[17]: """ Print average scores. An average score close to 0 predicts REAL. An average score close to 1 predicts FAKE. """ print('Average score for REAL video: {:.4f}'.format( expit(faces_real_pred.mean()))) print('Average score for FAKE face: {:.4f}'.format( expit(faces_fake_pred.mean())))