def __init__(self, device): self.net = BlazeFace().to(device) self.net.load_weights("blazeface.pth") self.net.load_anchors("anchors.npy") self.mouth_region_size = (64,64) self.img_dims = (128, 128)
def __init__(self, blaze_weight, anchors, scale: float = 1.0): super().__init__() face_detector = BlazeFace().to(device) face_detector.load_weights(blaze_weight) face_detector.load_anchors(anchors) _ = face_detector.train(False) self.extractor = FaceExtractor(face_detector, margin=scale - 1)
def test_export_onnx(tmpdir): p = str(tmpdir.join('blazeface.onnx')) x = torch.randn(1, 3, 128, 128) model = BlazeFace() torch.onnx.export(model, x, p, verbose=True, input_names=['input'], output_names=['output'])
def detect(self): model = BlazeFace() model.load_weights(self.model_weights) model.load_anchors(self.model_anchors) # img = cv2.cvtColor(cv2.imread(self.img_path), cv2.COLOR_BGR2RGB) # img_res = cv2.resize(img, (self.img_size, self.img_size)) img_res = cv2.resize(self.img_arr, (self.img_size, self.img_size)) results = model.predict_on_image(img_res) self.detections = results return results
def main(): # Here we check the train data files extensions. train_list = list(os.listdir(os.path.join(DATA_FOLDER, TRAIN_SAMPLE_FOLDER))) ext_dict = [] for file in train_list: file_ext = file.split('.')[1] if (file_ext not in ext_dict): ext_dict.append(file_ext) print(f"Extensions: {ext_dict}") # Let's count how many files with each extensions there are. for file_ext in ext_dict: print( f"Files with extension `{file_ext}`: {len([file for file in train_list if file.endswith(file_ext)])}") test_list = list(os.listdir(os.path.join(DATA_FOLDER, TEST_FOLDER))) ext_dict = [] for file in test_list: file_ext = file.split('.')[1] if (file_ext not in ext_dict): ext_dict.append(file_ext) print(f"Extensions: {ext_dict}") for file_ext in ext_dict: print( f"Files with extension `{file_ext}`: {len([file for file in train_list if file.endswith(file_ext)])}") json_file = [file for file in train_list if file.endswith('json')][0] print(f"JSON file: {json_file}") meta_train_df = get_meta_from_json(TRAIN_SAMPLE_FOLDER, json_file) meta_train_df.head() fake_train_sample_video = list( meta_train_df.loc[meta_train_df.label == 'FAKE'].sample(3).index) real_train_sample_video = list( meta_train_df.loc[meta_train_df.label == 'REAL'].sample(3).index) print("PyTorch version:", torch.__version__) print("CUDA version:", torch.version.cuda) print("cuDNN version:", torch.backends.cudnn.version()) gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(gpu) net=BlazeFace().to(gpu) net.load_weights("../input/blazeface.pth") net.load_anchors("../input/anchors.npy") for video_file in fake_train_sample_video: get_frame_faces(os.path.join( DATA_FOLDER, TRAIN_SAMPLE_FOLDER, video_file), net)
def process_video(video_path, filename, image_path, original): gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu") facedet = BlazeFace().to(gpu) facedet.load_weights("blazeface.pth") facedet.load_anchors("anchors.npy") _ = facedet.train(False) from helpers_read_video_1 import VideoReader from helpers_face_extract_1 import FaceExtractor frames_per_video = 10 video_reader = VideoReader() video_read_fn = lambda x: video_reader.read_random_frames( x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) faces = face_extractor.process_video(video_path) # Only look at one face per frame. face_extractor.keep_only_best_face(faces) n = 0 for frame_data in faces: for face in frame_data["faces"]: face_locations = face_recognition.face_locations(face) for face_location in face_locations: top, right, bottom, left = face_location face_image = face[top:bottom, left:right] resized_face = cv2.resize(face_image, (224, 224), interpolation=cv2.INTER_AREA) resized_face = cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR) cv2.imwrite( image_path + "/" + filename[:-4] + original + "_" + str(n) + ".jpg", resized_face, [int(cv2.IMWRITE_JPEG_QUALITY), 85]) n += 1
def _load_face_extractor(self): """ Init and Return the face extractor object (implemented in deepfakes-inference-demo/helpers/face_extract_1) that consists of a video reader function and a facedetector """ import sys sys.path.insert(0, os.path.join(self.root_path, "blazeface-pytorch")) sys.path.insert( 0, os.path.join(self.root_path, "deepfakes-inference-demo")) #Load the face detection model BlazeFace, based on https://github.com/tkat0/PyTorch_BlazeFace/ from blazeface import BlazeFace facedet = BlazeFace().to(self.gpu) #Load the pretrained weights facedet.load_weights( os.path.join(self.root_path, "blazeface-pytorch/blazeface.pth")) facedet.load_anchors( os.path.join(self.root_path, "blazeface-pytorch/anchors.npy")) #Set the module in evaluation mode _ = facedet.train(False) from helpers.read_video_1 import VideoReader from helpers.face_extract_1 import FaceExtractor #set number of frames to be read from the video, taken regulary from the beggining to the end of the video self.frames_per_video = 17 #init video reader video_reader = VideoReader() #create a lambda function to read the frames where x is the video path video_read_fn = lambda x: video_reader.read_frames( x, num_frames=self.frames_per_video) #init the face extractor with the video reader function and the facedetector face_extractor = FaceExtractor(video_read_fn, facedet) return face_extractor
for k in range(6): kp_x = detections[i, 4 + k * 2] * img.shape[1] kp_y = detections[i, 4 + k * 2 + 1] * img.shape[0] circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1, edgecolor="lightskyblue", facecolor="none", alpha=detections[i, 16]) ax.add_patch(circle) plt.show() from blazeface import BlazeFace net = BlazeFace().to(gpu) net.load_weights("blazeface.pth") net.load_anchors("anchors.npy") # Optionally change the thresholds: net.min_score_thresh = 0.75 net.min_suppression_threshold = 0.3 img = cv2.imread("test.jpg") img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) detections = net.predict_on_image(img) detections.shape plot_detections(img, detections)
def main(argv): args = parse_args(argv) ## Parameters parsing device: torch.device = args.device source_dir: Path = args.source facedestination_dir: Path = args.facesfolder frames_per_video: int = args.fpv videodataset_path: Path = args.videodf facesdataset_path: Path = args.facesdf collateonly: bool = args.collateonly batch_size: int = args.batch threads: int = args.threads offset: int = args.offset num: int = args.num lazycheck: bool = args.lazycheck deepcheck: bool = args.deepcheck checkpoint_folder: Path = args.checkpoint index_enable: bool = args.noindex ## Parameters face_size = 512 print('Loading video DataFrame') df_videos = pd.read_pickle(videodataset_path) if num > 0: df_videos_process = df_videos.iloc[offset:offset + num] else: df_videos_process = df_videos.iloc[offset:] if not collateonly: ## Blazeface loading print('Loading face extractor') facedet = BlazeFace().to(device) facedet.load_weights("blazeface/blazeface.pth") facedet.load_anchors("blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) ## Face extraction with ThreadPoolExecutor(threads) as p: for batch_idx0 in tqdm(np.arange(start=0, stop=len(df_videos_process), step=batch_size), desc='Extracting faces'): tosave_list = list(p.map(partial(process_video, source_dir=source_dir, facedestination_dir=facedestination_dir, checkpoint_folder=checkpoint_folder, face_size=face_size, face_extractor=face_extractor, lazycheck=lazycheck, deepcheck=deepcheck, ), df_videos_process.iloc[batch_idx0:batch_idx0 + batch_size].iterrows())) for tosave in tosave_list: if tosave is not None: if len(tosave[2]): list(p.map(save_jpg, tosave[2])) tosave[1].parent.mkdir(parents=True, exist_ok=True) tosave[0].to_pickle(str(tosave[1])) if index_enable: # Collect checkpoints df_videos['nfaces'] = np.zeros(len(df_videos), np.uint8) faces_dataset = [] for idx, record in tqdm(df_videos.iterrows(), total=len(df_videos), desc='Collecting faces results'): # Checkpoint video_face_checkpoint_path = checkpoint_folder.joinpath(record['path']).with_suffix('.faces.pkl') if video_face_checkpoint_path.exists(): try: df_video_faces = pd.read_pickle(str(video_face_checkpoint_path)) # Fix same attribute issue df_video_faces = df_video_faces.rename(columns={'subject': 'videosubject'}, errors='ignore') nfaces = len( np.unique(df_video_faces.index.map(lambda x: int(x.split('_subj')[1].split('.jpg')[0])))) df_videos.loc[idx, 'nfaces'] = nfaces faces_dataset.append(df_video_faces) except Exception as e: print('Error while reading: {}'.format(video_face_checkpoint_path)) print(e) video_face_checkpoint_path.unlink() if len(faces_dataset) == 0: raise ValueError(f'No checkpoint found from face extraction. ' f'Is the the source path {source_dir} correct for the videos in your dataframe?') # Save videos with updated faces print('Saving videos DataFrame to {}'.format(videodataset_path)) df_videos.to_pickle(str(videodataset_path)) if offset > 0: if num > 0: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath( 'faces_df_from_video_{}_to_video_{}.pkl'.format(offset, num + offset)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(offset, num + offset)) else: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath('faces_df_from_video_{}.pkl'.format(offset)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}.pkl'.format(offset)) elif num > 0: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath( 'faces_df_from_video_{}_to_video_{}.pkl'.format(0, num)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(0, num)) else: if facesdataset_path.is_dir(): facesdataset_path = facesdataset_path.joinpath('faces_df.pkl') # just a check if the path is a dir # Creates directory (if doesn't exist) facesdataset_path.parent.mkdir(parents=True, exist_ok=True) print('Saving faces DataFrame to {}'.format(facesdataset_path)) df_faces = pd.concat(faces_dataset, axis=0, ) df_faces['video'] = df_faces['video'].astype('category') for key in ['kp1x', 'kp1y', 'kp2x', 'kp2y', 'kp3x', 'kp3y', 'kp4x', 'kp4y', 'kp5x', 'kp5y', 'kp6x', 'kp6y', 'left', 'top', 'right', 'bottom', ]: df_faces[key] = df_faces[key].astype(np.int16) df_faces['videosubject'] = df_faces['videosubject'].astype(np.int8) # Eventually remove duplicates df_faces = df_faces.loc[~df_faces.index.duplicated(keep='first')] fields_to_preserve_from_video = [i for i in ['folder', 'subject', 'scene', 'cluster', 'nfaces'] if i in df_videos] df_faces = pd.merge(df_faces, df_videos[fields_to_preserve_from_video], left_on='video', right_index=True) df_faces.to_pickle(str(facesdataset_path)) print('Completed!')
def test_forward(): x = torch.randn(1, 3, 128, 128) model = BlazeFace() h = model(x) assert h.detach().numpy().shape == (1, 96, 8, 8)
import sys from blazebase import resize_pad, denormalize_detections from blazeface import BlazeFace from blazepalm import BlazePalm from blazeface_landmark import BlazeFaceLandmark from blazehand_landmark import BlazeHandLandmark from visualization import draw_detections, draw_landmarks, draw_roi, HAND_CONNECTIONS, FACE_CONNECTIONS gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.set_grad_enabled(False) back_detector = True face_detector = BlazeFace(back_model=back_detector).to(gpu) if back_detector: face_detector.load_weights("blazefaceback.pth") face_detector.load_anchors("anchors_face_back.npy") else: face_detector.load_weights("blazeface.pth") face_detector.load_anchors("anchors_face.npy") palm_detector = BlazePalm().to(gpu) palm_detector.load_weights("blazepalm.pth") palm_detector.load_anchors("anchors_palm.npy") palm_detector.min_score_thresh = .75 hand_regressor = BlazeHandLandmark().to(gpu) hand_regressor.load_weights("blazehand_landmark.pth")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--source', type=Path, help='Videos root directory', required=True) parser.add_argument('--videodf', type=Path, help='Path to read the videos DataFrame') parser.add_argument('--facesfolder', type=Path, help='Faces output root directory', required=True) parser.add_argument('--facesdf', type=Path, help='Path to save the output DataFrame of faces', required=True) parser.add_argument('--checkpoint', type=Path, help='Path to save the temporary per-video outputs', required=True) parser.add_argument('--fpv', type=int, default=32, help='Frames per video') parser.add_argument( '--device', type=torch.device, default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), help='Device to use for face extraction') parser.add_argument('--collateonly', help='Only perform collation of pre-existing results', action='store_true') parser.add_argument('--noindex', help='Do not rebuild the index', action='store_false') parser.add_argument('--batch', type=int, help='Batch size', default=16) parser.add_argument('--threads', type=int, help='Number of threads', default=8) parser.add_argument('--offset', type=int, help='Offset to start extraction', default=0) parser.add_argument('--num', type=int, help='Number of videos to process', default=0) parser.add_argument('--lazycheck', action='store_true', help='Lazy check of existing video indexes') parser.add_argument('--deepcheck', action='store_true', help='Try to open every image') args = parser.parse_args() ## Parameters parsing device: torch.device = args.device source_dir: Path = args.source facedestination_dir: Path = args.facesfolder frames_per_video: int = args.fpv videodataset_path: Path = args.videodf facesdataset_path: Path = args.facesdf collateonly: bool = args.collateonly batch_size: int = args.batch threads: int = args.threads offset: int = args.offset num: int = args.num lazycheck: bool = args.lazycheck deepcheck: bool = args.deepcheck checkpoint_folder: Path = args.checkpoint index_enable: bool = args.noindex ## Parameters face_size = 512 print('Loading video DataFrame') df_videos = pd.read_pickle(videodataset_path) if num > 0: df_videos_process = df_videos.iloc[offset:offset + num] else: df_videos_process = df_videos.iloc[offset:] if not collateonly: ## Blazeface loading print('Loading face extractor') facedet = BlazeFace().to(device) facedet.load_weights("blazeface/blazeface.pth") facedet.load_anchors("blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames( x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) ## Face extraction with ThreadPoolExecutor(threads) as p: for batch_idx0 in tqdm(np.arange(start=0, stop=len(df_videos_process), step=batch_size), desc='Extracting faces'): tosave_list = list( p.map( partial( process_video, source_dir=source_dir, facedestination_dir=facedestination_dir, checkpoint_folder=checkpoint_folder, face_size=face_size, face_extractor=face_extractor, lazycheck=lazycheck, deepcheck=deepcheck, ), df_videos_process.iloc[batch_idx0:batch_idx0 + batch_size].iterrows())) for tosave in tosave_list: if tosave is not None: if len(tosave[2]): list(p.map(save_jpg, tosave[2])) tosave[1].parent.mkdir(parents=True, exist_ok=True) tosave[0].to_pickle(str(tosave[1])) if index_enable: # Collect checkpoints df_videos['nfaces'] = np.zeros(len(df_videos), np.uint8) faces_dataset = [] for idx, record in tqdm(df_videos.iterrows(), total=len(df_videos), desc='Collecting faces results'): # Checkpoint video_face_checkpoint_path = checkpoint_folder.joinpath( record['path']).with_suffix('.faces.pkl') if video_face_checkpoint_path.exists(): try: df_video_faces = pd.read_pickle( str(video_face_checkpoint_path)) # Fix same attribute issue df_video_faces = df_video_faces.rename( columns={'subject': 'videosubject'}, errors='ignore') nfaces = len( np.unique( df_video_faces.index.map(lambda x: int( x.split('_subj')[1].split('.jpg')[0])))) df_videos.loc[idx, 'nfaces'] = nfaces faces_dataset.append(df_video_faces) except Exception as e: print('Error while reading: {}'.format( video_face_checkpoint_path)) print(e) video_face_checkpoint_path.unlink() # Save videos with updated faces print('Saving videos DataFrame to {}'.format(videodataset_path)) df_videos.to_pickle(str(videodataset_path)) if offset is not None: if num is not None: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format( offset, num + offset)) else: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}.pkl'.format(offset)) elif num is not None: facesdataset_path = facesdataset_path.parent.joinpath( str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(0, num)) # Creates directory (if doesn't exist) facesdataset_path.parent.mkdir(parents=True, exist_ok=True) print('Saving faces DataFrame to {}'.format(facesdataset_path)) df_faces = pd.concat( faces_dataset, axis=0, ) df_faces['video'] = df_faces['video'].astype('category') for key in [ 'kp1x', 'kp1y', 'kp2x', 'kp2y', 'kp3x', 'kp3y', 'kp4x', 'kp4y', 'kp5x', 'kp5y', 'kp6x', 'kp6y', 'left', 'top', 'right', 'bottom', ]: df_faces[key] = df_faces[key].astype(np.int16) df_faces['videosubject'] = df_faces['videosubject'].astype(np.int8) # Eventually remove duplicates df_faces = df_faces.loc[~df_faces.index.duplicated(keep='first')] fields_to_preserve_from_video = [ i for i in ['folder', 'subject', 'scene', 'cluster', 'nfaces'] if i in df_videos ] df_faces = pd.merge(df_faces, df_videos[fields_to_preserve_from_video], left_on='video', right_index=True) df_faces.to_pickle(str(facesdataset_path)) print('Completed!')
with ThreadPoolExecutor(max_workers=num_workers) as ex: meta = tqdm(ex.map(process_file, range(len(df))), total=len(df)) return pd.DataFrame(meta) if __name__ == '__main__': args = parser.parse_args() assert args.df is not None, 'Need to specify metadata file' with open(args.config) as f: config = yaml.load(f) device = torch.device('cuda:{}'.format(args.gpu)) df = pd.read_csv(args.df) path = config['data_path'] # Facedet facedet = BlazeFace().to(device) facedet.load_weights("./dfdet/BlazeFace/blazeface.pth") facedet.load_anchors("./dfdet/BlazeFace/anchors.npy") _ = facedet.train(False) # video_reader = VideoReader() def video_read_fn(x): return video_reader.read_frames(x, num_frames=config['n_frames']) face_extractor = FaceExtractor(video_read_fn, facedet) faces_dataframe = preprocess_on_video_set(df, 4) faces_dataframe.to_csv('{}/faces_metadata.csv'.format(config['out_path']))
class MouthDetector(): def __init__(self, device): self.net = BlazeFace().to(device) self.net.load_weights("blazeface.pth") self.net.load_anchors("anchors.npy") self.mouth_region_size = (64,64) self.img_dims = (128, 128) def plot_detections(self, img, detections, with_keypoints=True): fig, ax = plt.subplots(1, figsize=(10, 10)) ax.grid(False) ax.imshow(img/255.) if isinstance(detections, torch.Tensor): detections = detections.cpu().numpy() if detections.ndim == 1: detections = np.expand_dims(detections, axis=0) print("Found %d faces" % detections.shape[0]) for i in range(detections.shape[0]): ymin = detections[i, 0] * img.shape[0] xmin = detections[i, 1] * img.shape[1] ymax = detections[i, 2] * img.shape[0] xmax = detections[i, 3] * img.shape[1] rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1, edgecolor="r", facecolor="none", alpha=detections[i, 16]) ax.add_patch(rect) print(ymin, ymax, xmin, xmax) if with_keypoints: for k in range(2,3): kp_x = detections[i, 4 + k*2 ] * img.shape[1] kp_y = detections[i, 4 + k*2 + 1] * img.shape[0] circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1, edgecolor="lightskyblue", facecolor="none", alpha=detections[i, 16]) ax.add_patch(circle) plt.show() def mouth_detection(self, img, detections, with_keypoints=True, img_dims=(128,128)): # fig, ax = plt.subplots(1, figsize=(10, 10)) # ax.grid(False) # ax.imshow(img/255.) if isinstance(detections, torch.Tensor): detections = detections.cpu().numpy() if detections.ndim == 1: detections = np.expand_dims(detections, axis=0) print("Found %d faces" % detections.shape[0]) i = 0 # first face detection k = 2 # nose keypoint # for i in range(detections.shape[0]): #for all faces ymin = detections[i, 0] * img_dims[0] xmin = detections[i, 1] * img_dims[1] ymax = detections[i, 2] * img_dims[0] xmax = detections[i, 3] * img_dims[1] # print(xmin, xmax, ymin, ymax) # for k in range(2,3): #for all keypoints kp_x = detections[i, 4 + k*2 ] * img_dims[1] kp_y = detections[i, 4 + k*2 + 1] * img_dims[0] print('########') print(kp_y, kp_x) mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)] return mouth_region def batch_mouth_detection(self, frames, detections, with_keypoints=True, img_dims= (128, 128)): """ return mouth regions for a batch of frames along with status if any frame was skipped while keypoint finding mouth_regions: mouth rois flag: boolean if a frame is skipped ; True if a frame is skipped else False """ resize_frames = [] for frame in frames: if frame.shape[0] !=self.img_dims[0] or frame.shape[1] != self.img_dims[1]: frame = resize(frame, self.img_dims) resize_frames.append(frame) else: resize_frames.append(frame) # print(len(resize_frames)) # frames = torch.from_numpy(np.array(resize_frames)) # if isinstance(detections, torch.Tensor): # detections = detections.cpu().numpy() # if len(detections) == 2: # detections = np.expand_dims(detections, axis=1) # print("Found %d faces" % detections.shape[0]) i = 0 # first face detection k = 2 # nose keypoint # for i in range(detections.shape[0]): #for all faces # print(len(detections)) # print('########') # print(kp_y, kp_x) mouth_regions = [] for index, img in enumerate(frames): if len(detections[index]) > 0: try: ymin = detections[index][i, 0] * img_dims[0] xmin = detections[index][i, 1] * img_dims[1] ymax = detections[index][i, 2] * img_dims[0] xmax = detections[index][i, 3] * img_dims[1] # print(xmin, xmax, ymin, ymax) # for k in range(2,3): #for all keypoints kp_x = detections[index][i, 4 + k*2 ] * img_dims[1] kp_y = detections[index][i, 4 + k*2 + 1] * img_dims[0] mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)] mouth_regions.append(resize(mouth_region.cpu().numpy(), self.mouth_region_size)) except IndexError: flag = True break else: flag = True if len(frames) == len(mouth_regions): flag = False else: flag = True # print(len(mouth_region_size)) return np.array(mouth_regions), flag ########################## old code # class VideoHandler(object): # def __init__(self, filepaths): # self.paths = filepaths # self.mp4_filenames = filepaths # self.blaze_detector = MouthDetector() # # self.mouth_extractor = FaceROIExtractor() # def read_video_audio_dyn(self, video_path): # # print(video_path, audio_path) # clip = VideoFileClip(video_path, verbose=False) # video_frames = torch.FloatTensor(list(clip.iter_frames())) # # video_frames = torch.FloatTensor(list(imageio.get_reader(video_path, 'ffmpeg'))) # # waveform, sample_rate = torchaudio.load(audio_path) # waveform = torch.from_numpy( # clip.audio.to_soundarray()).float().permute(1, 0) # specgram = torchaudio.transforms.MelSpectrogram()(waveform) # return specgram, video_frames # def read_video_audio_blaze_roi(self, video_path, frame_len, subdir='train', audio_path=None): # frames = [] # mouth_frames = [] # mouth_indices = [] # video_frames = [] # cap = cv2.VideoCapture(video_path) # frame_counter = 0 # while cap.isOpened(): # ret, frame = cap.read() # if ret: # frame = resize(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), (256,256))*255 # frames.append(frame.astype(np.uint8)) # frame_counter += 1 # else: # break # if frame_counter == 31: # frame length # break # cap.release() # frames = np.array(frames) # frames = (frames - frames.min()) / (frames.max() - frames.min()) # img = torch.from_numpy(frames).permute(0,3,1,2) * 255 # detections = self.blaze_detector.net.predict_on_batch(img) # mouth_regions, flag = self.blaze_detector.batch_mouth_detection(img.permute(0,2,3,1), detections) # if not flag: # mouth_regions /= 255. # return frames, mouth_regions, flag # class MouthDetector(): # def __init__(self): # self.net = BlazeFace().to(device) # self.net.load_weights("blazeface.pth") # self.net.load_anchors("anchors.npy") # self.mouth_region_size = (64,64) # self.img_dims = (128, 128) # def plot_detections(self, img, detections, with_keypoints=True): # fig, ax = plt.subplots(1, figsize=(10, 10)) # ax.grid(False) # ax.imshow(img/255.) # if isinstance(detections, torch.Tensor): # detections = detections.cpu().numpy() # if detections.ndim == 1: # detections = np.expand_dims(detections, axis=0) # print("Found %d faces" % detections.shape[0]) # for i in range(detections.shape[0]): # ymin = detections[i, 0] * img.shape[0] # xmin = detections[i, 1] * img.shape[1] # ymax = detections[i, 2] * img.shape[0] # xmax = detections[i, 3] * img.shape[1] # rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, # linewidth=1, edgecolor="r", facecolor="none", # alpha=detections[i, 16]) # ax.add_patch(rect) # print(ymin, ymax, xmin, xmax) # if with_keypoints: # for k in range(2,3): # kp_x = detections[i, 4 + k*2 ] * img.shape[1] # kp_y = detections[i, 4 + k*2 + 1] * img.shape[0] # circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1, # edgecolor="lightskyblue", facecolor="none", # alpha=detections[i, 16]) # ax.add_patch(circle) # plt.show() # def mouth_detection(self, img, detections, with_keypoints=True, img_dims=(128,128)): # # fig, ax = plt.subplots(1, figsize=(10, 10)) # # ax.grid(False) # # ax.imshow(img/255.) # if isinstance(detections, torch.Tensor): # detections = detections.cpu().numpy() # if detections.ndim == 1: # detections = np.expand_dims(detections, axis=0) # print("Found %d faces" % detections.shape[0]) # i = 0 # first face detection # k = 2 # nose keypoint # # for i in range(detections.shape[0]): #for all faces # ymin = detections[i, 0] * img_dims[0] # xmin = detections[i, 1] * img_dims[1] # ymax = detections[i, 2] * img_dims[0] # xmax = detections[i, 3] * img_dims[1] # # print(xmin, xmax, ymin, ymax) # # for k in range(2,3): #for all keypoints # kp_x = detections[i, 4 + k*2 ] * img_dims[1] # kp_y = detections[i, 4 + k*2 + 1] * img_dims[0] # print('########') # print(kp_y, kp_x) # mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)] # return mouth_region # def batch_mouth_detection(self, frames, detections, with_keypoints=True, img_dims= (128, 128)): # """ # return mouth regions for a batch of frames along with status if any frame was skipped while keypoint finding # mouth_regions: mouth rois # flag: boolean if a frame is skipped ; True if a frame is skipped else False # """ # resize_frames = [] # for frame in frames: # if frame.shape[0] !=self.img_dims[0] or frame.shape[1] != self.img_dims[1]: # frame = resize(frame, self.img_dims) # resize_frames.append(frame) # else: # resize_frames.append(frame.numpy()) # # print(len(resize_frames)) # frames = torch.from_numpy(np.array(resize_frames)) # if isinstance(detections, torch.Tensor): # detections = detections.cpu().numpy() # # if len(detections) == 2: # # detections = np.expand_dims(detections, axis=1) # # print("Found %d faces" % detections.shape[0]) # i = 0 # first face detection # k = 2 # nose keypoint # # for i in range(detections.shape[0]): #for all faces # # print(len(detections)) # # print('########') # # print(kp_y, kp_x) # mouth_regions = [] # for index, img in enumerate(frames): # if len(detections[index]) > 0: # try: # ymin = detections[index][i, 0] * img_dims[0] # xmin = detections[index][i, 1] * img_dims[1] # ymax = detections[index][i, 2] * img_dims[0] # xmax = detections[index][i, 3] * img_dims[1] # # print(xmin, xmax, ymin, ymax) # # for k in range(2,3): #for all keypoints # kp_x = detections[index][i, 4 + k*2 ] * img_dims[1] # kp_y = detections[index][i, 4 + k*2 + 1] * img_dims[0] # mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)] # mouth_regions.append(resize(mouth_region, self.mouth_region_size)) # except IndexError: # flag = True # break # else: # flag = True # if len(frames) == len(mouth_regions): # flag = False # else: # flag = True # # print(len(mouth_region_size)) # return np.array(mouth_regions), flag
weighted_detection = detection.clone() if len(overlapping) > 1: coordinates = output_detections[i][overlapping, :16] scores = output_detections[i][overlapping, 16:17] total_score = scores.sum() weighted = (coordinates * scores).sum(dim=0) / total_score weighted_detection[:16] = weighted weighted_detection[16] = total_score / len(overlapping) faces.append(weighted_detection) faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, 17)) filtered_detections.append(faces) return filtered_detections[0] net = BlazeFace().to("cpu") net.load_state_dict(torch.load("blazeface.pth")) net.eval() myNet = MyBlazeFace(net, "anchors.npy").to("cpu") torch.save(myNet, "myBlazeface.pth") # from torch.autograd import Variable # myNet = torch.load("myBlazeface.pth") # dummy_input = Variable(torch.randn(1, 3, 128, 128)) # nchw # onnx_filename = "blazeface.onnx" # torch.onnx.export(myNet, dummy_input, # onnx_filename, # verbose=True) # import onnx # from onnx_tf.backend import prepare # from PIL import Image
import numpy as np import torch import cv2 from blazeface import BlazeFace # some useful info print("PyTorch version:", torch.__version__) print("CUDA version:", torch.version.cuda) print("cuDNN version:", torch.backends.cudnn.version()) gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(gpu) net = BlazeFace().to(gpu) net.load_weights("blazeface.pth") net.load_anchors("anchors.npy") # let's start the capture now print("starting camera now....") #adjust based on your device. For most cases, normally 0 cap = cv2.VideoCapture(0) while (True): # Capture frame-by-frame ret, frame = cap.read() # Our operations on the frame come here #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (128, 128)) detections = net.predict_on_image(img)
model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)] print('=' * 20) net = getattr(fornet, net_model)().eval().to(device) print('=' * 20) net.load_state_dict(load_url(model_url, map_location=device, check_hash=True)) # In[5]: transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False) # In[6]: facedet = BlazeFace().to(device) facedet.load_weights("../blazeface/blazeface.pth") facedet.load_anchors("../blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet) # ## Detect faces # In[7]: torch.cuda.is_available() # In[8]:
for k in range(6): offset = 4 + k * 2 # raw_boxes[:, offset ] = (raw_boxes[:, offset ] / 128.0) * self.anchors[:, 2] + self.anchors[:, 0] # x # raw_boxes[:, offset + 1] = (raw_boxes[:, offset + 1] / 128.0) * self.anchors[:, 3] + self.anchors[:, 1] # y concat_stuff.append((raw_boxes[:, offset] / 128.0) * self.anchors[:, 2] + self.anchors[:, 0]) concat_stuff.append((raw_boxes[:, offset + 1] / 128.0) * self.anchors[:, 3] + self.anchors[:, 1]) return torch.stack(concat_stuff, dim=-1) import coremltools as ct from coremltools.converters.onnx import convert bfModel = BlazeFace() bfModel.load_weights("./blazeface.pth") bfModel.load_anchors("./anchors.npy") bfs = BlazeFaceScaled(bfModel) bfs.eval() traced_model = torch.jit.trace(bfs, torch.rand(1, 3, 128, 128), check_trace=True) # print(traced_model) mlmodel = ct.convert(traced_model, inputs=[ ct.ImageType(name="image", shape=ct.Shape(shape=( 1,
import cv2 from facemesh import FaceMesh from blazeface import BlazeFace # load FaceMesh model mesh_net = FaceMesh() mesh_net.load_weights("facemesh.pth") # load BlazeFace model blaze_net = BlazeFace() blaze_net.load_weights("blazeface.pth") blaze_net.load_anchors("anchors.npy") # postprocessing for face detector def get_crop_face(detections, image): w, h = image.shape[0], image.shape[1] ymin = int(detections[0, 0] * w) xmin = int(detections[0, 1] * h) ymax = int(detections[0, 2] * w) xmax = int(detections[0, 3] * h) margin_x = int(0.25 * (xmax - xmin)) margin_y = int(0.25 * (ymax - ymin)) ymin -= margin_y ymax += margin_y xmin -= margin_x xmax += margin_x
import torch import torch.nn as nn from network.models import model_selection test_dir = "/home/dchen/DFDC/test_videos/" test_videos = sorted([x for x in os.listdir(test_dir) if x[-4:] == ".mp4"]) gpu = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") import sys sys.path.insert(0, "/home/dchen/DFDC/blazeface-pytorch") sys.path.insert(0, "/home/dchen/DFDC/deepfakes-inference-demo") from blazeface import BlazeFace facedet = BlazeFace().to(gpu) facedet.load_weights("/home/dchen/DFDC/blazeface-pytorch/blazeface.pth") facedet.load_anchors("/home/dchen/DFDC/blazeface-pytorch/anchors.npy") _ = facedet.train(False) from helpers.read_video_1 import VideoReader from helpers.face_extract_1 import FaceExtractor frames_per_video = 16 video_reader = VideoReader() video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn, facedet) input_size = 299
import face_recognition import random from concurrent.futures import ThreadPoolExecutor sys.path.insert(1, 'helpers') sys.path.insert(1, 'model') sys.path.insert(1, 'weight') from cvit import CViT from helpers_read_video_1 import VideoReader from helpers_face_extract_1 import FaceExtractor device = 'cuda' if torch.cuda.is_available() else 'cpu' from blazeface import BlazeFace facedet = BlazeFace().to(device) facedet.load_weights("helpers/blazeface.pth") facedet.load_anchors("helpers/anchors.npy") _ = facedet.train(False) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize_transform = transforms.Compose([transforms.Normalize(mean, std)]) tresh = 50 sample = 'sample__prediction_data/' ran = random.randint(0, 400) ran_min = abs(ran - 1)
from torch.utils.data import Dataset, DataLoader from blazeface import BlazeFace import os import cv2 import numpy as np from matplotlib import pyplot as plt import random import pickle DATA_FOLDER = '../input/deepfake-detection-challenge' TRAIN_SAMPLE_FOLDER = 'train_sample_videos' TEST_FOLDER = 'test_videos' device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") NET = BlazeFace().to(device) NET.load_weights("../input/blazeface.pth") NET.load_anchors("../input/anchors.npy") class MyLSTM(nn.Module): def __init__(self, num_layers=2, num_hidden_nodes=512): super(MyLSTM, self).__init__() self.num_layers = num_layers self.num_hidden_nodes = num_hidden_nodes # input dim is 167, output 200 self.lstm = nn.LSTM(167, num_hidden_nodes, batch_first=True, num_layers=num_layers)
def run_nb(modelname): # ## Parameters # In[2]: """ Choose an architecture between - EfficientNetB4 - EfficientNetB4ST - EfficientNetAutoAttB4 - EfficientNetAutoAttB4ST - Xception """ net_model = modelname """ Choose a training dataset between - DFDC - FFPP """ train_db = 'DFDC' # In[3]: device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') face_policy = 'scale' face_size = 224 frames_per_video = 32 # ## Initialization # In[4]: print('=' * 20) model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)] print('=' * 20) net = getattr(fornet, net_model)().eval().to(device) print('=' * 20) net.load_state_dict( load_url(model_url, map_location=device, check_hash=True)) # In[5]: transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False) # In[6]: facedet = BlazeFace().to(device) facedet.load_weights("../blazeface/blazeface.pth") facedet.load_anchors("../blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames( x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet) # ## Detect faces # In[7]: torch.cuda.is_available() # In[8]: torch.cuda.current_device() # In[9]: torch.cuda.device(0) # In[10]: torch.cuda.device_count() # In[11]: torch.cuda.get_device_name(0) # In[12]: vid_real_faces = face_extractor.process_video('samples/lynaeydofd.mp4') vid_fake_faces = face_extractor.process_video('samples/mqzvfufzoq.mp4') # In[13]: im_real_face = vid_real_faces[0]['faces'][0] im_fake_face = vid_fake_faces[0]['faces'][0] # In[14]: fig, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].imshow(im_real_face) ax[0].set_title('REAL') ax[1].imshow(im_fake_face) ax[1].set_title('FAKE') # ## Predict scores for each frame # In[15]: # For each frame, we consider the face with the highest confidence score found by BlazeFace (= frame['faces'][0]) faces_real_t = torch.stack([ transf(image=frame['faces'][0])['image'] for frame in vid_real_faces if len(frame['faces']) ]) faces_fake_t = torch.stack([ transf(image=frame['faces'][0])['image'] for frame in vid_fake_faces if len(frame['faces']) ]) with torch.no_grad(): faces_real_pred = net(faces_real_t.to(device)).cpu().numpy().flatten() faces_fake_pred = net(faces_fake_t.to(device)).cpu().numpy().flatten() # In[16]: fig, ax = plt.subplots(1, 2, figsize=(12, 4)) ax[0].stem([f['frame_idx'] for f in vid_real_faces if len(f['faces'])], expit(faces_real_pred), use_line_collection=True) ax[0].set_title('REAL') ax[0].set_xlabel('Frame') ax[0].set_ylabel('Score') ax[0].set_ylim([0, 1]) ax[0].grid(True) ax[1].stem([f['frame_idx'] for f in vid_fake_faces if len(f['faces'])], expit(faces_fake_pred), use_line_collection=True) ax[1].set_title('FAKE') ax[1].set_xlabel('Frame') ax[1].set_ylabel('Score') ax[1].set_ylim([0, 1]) ax[1].set_yticks([0, 1], ['REAL', 'FAKE']) # In[17]: """ Print average scores. An average score close to 0 predicts REAL. An average score close to 1 predicts FAKE. """ print('Average score for REAL video: {:.4f}'.format( expit(faces_real_pred.mean()))) print('Average score for FAKE face: {:.4f}'.format( expit(faces_fake_pred.mean())))