def get_cluster_metadata(self, iCluster): m = (self._df.cluster == iCluster) & (self._df.exists) df = self._df[m].reset_index(drop = True).copy() # Filter out originals not existing on file. azOriginal = np.unique(df.orig.values) l_files = df.file.values azOriginal = [x for x in azOriginal if x in l_files] m = df.orig.isin(azOriginal) df = df[m].reset_index(drop = True) l_out = [] for x in azOriginal: original = df[df.file == x] fakes = df[(df.orig == x) & (df.file != x)] full_file_orig = original.apply(lambda x: get_part_dir(x['part']) / x['file'], axis = 1).iloc[0] fill_file_fakes = list(fakes.apply(lambda x: get_part_dir(x['part']) / x['file'], axis = 1)) l_out.append((full_file_orig, fill_file_fakes)) return l_out
def __init__(self): df_c = pd.read_feather(get_meta_dir() / "face_clusters.feather") l_parts = [] for x in range(50): path = get_part_dir(x, False) isDir = path.is_dir() if isDir: l_parts.append(x) l_orig = [] l_file = [] l_part = [] for iPart in l_parts: df_meta = read_metadata(iPart) for x in df_meta: num_fakes = len (x[1]) l_orig.extend([x[0]]* (num_fakes + 1)) l_file.append(x[0]) l_file.extend(x[1]) l_part.extend([iPart] * (num_fakes + 1)) df = pd.DataFrame({'orig': l_orig, 'file': l_file, 'part': l_part}) df = df.merge(df_c, left_on = 'orig', right_on='video') df = df.drop(['video', 'chunk'], axis = 1) l_file_tuple = list(zip (df.file, df.part)) l_exists = [] for x in l_file_tuple: filepath = get_part_dir(x[1]) / x[0] l_exists.append(filepath.is_file()) df = df.assign(exists = l_exists) num_files = df.shape[0] num_originals = np.unique(df.orig).shape[0] num_clusters = np.unique(df.cluster).shape[0] # print(f"num_files = {num_files}, num_originals = {num_originals}, num_clusters = {num_clusters}") self._df = df
def prepare_process(iPart): # Todo prep all (original, fake) for all parts. Issue tasks for all pairs and mp on those, not the iPart. l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() mtcnn_detector = MTCNNDetector() num_originals = len(l_d) l_part_task = [] for idx_key in range(num_originals): current = l_d[idx_key] original = dir / current[0] # Pick first fake. Todo: Can pick other fakes for more data. (one set per epoch) num_fakes = len(current[1]) if num_fakes == 0: print( f"p_{iPart}_{str(original.stem)}: No associated fakes. Skipping." ) continue fake = dir / current[1][0] isPairFound = original.is_file() and fake.is_file() if isPairFound: pass else: print(f"p_{iPart}: Original and/or fake not found. Skipping.") continue file_pair_out = output_dir / f"Line_Pair_p_{iPart}_{str(original.stem)}_{str(fake.stem)}.npy" file_real_out = output_dir / f"Line_Test_p_{iPart}_{str(original.stem)}_real.npy" file_fake_out = output_dir / f"Line_Test_p_{iPart}_{str(fake.stem)}_fake.npy" isExisting = file_pair_out.is_file() and file_real_out.is_file( ) and file_fake_out.is_file() if isExisting: continue l_part_task.append((iPart, original, fake)) return l_part_task
def run_one(): input_dir = get_part_dir(0) mtcnn_detector = MTCNNDetector() l_files = list(sorted(input_dir.iterdir())) l_files = [x for x in l_files if x.suffix == '.mp4'] video_path = input_dir / "nrdnytturz.mp4" assert video_path.is_file() #video_path = l_files[126] video_size = 32 W = 256 H = 1 video = read_video(video_path, video_size) x_max = video.shape[2] y_max = video.shape[1] z_max = video.shape[0] faces = find_two_consistent_faces(mtcnn_detector, video) featureset = ['l_mouth', 'r_mouth'] anSample = sample_feature(video, faces, featureset, W, H, True) l_feature0 = np.array((*_get_integer_coords_single_feature( x_max, y_max, faces[0], featureset[0]), 0)) r_feature0 = np.array((*_get_integer_coords_single_feature( x_max, y_max, faces[0], featureset[1]), 0)) vector = r_feature0 - l_feature0 length_vector = np.sqrt(vector.dot(vector)) anSampleOut = straighten_sample(anSample, length_vector) anSample = anSample.reshape(-1)
def process_part(iPart): l_d = read_metadata(iPart) input_dir = get_part_dir(iPart) output_dir = get_output_dir() mtcnn_detector = MTCNNDetector() for o_set in l_d: l_samples = [] original_path = input_dir / o_set[0] #print(f"{iPart}: {original_path.stem}...") r_data = sample_video_safe(mtcnn_detector, original_path, False) if r_data is None: print(f"{original_path.stem}: Bad original. Skipping set.") continue l_samples.append(r_data) for fake_path in o_set[1]: f_data = sample_video_safe(mtcnn_detector, input_dir / fake_path, False) if f_data is None: continue l_samples.append(f_data) if len(l_samples) >= 2: data = np.concatenate(l_samples) filename = f"p_{iPart}_{original_path.stem}.npy" output_path = output_dir / filename np.save(output_path, data) else: print(f"{original_path.stem}: No good fakes. Skipping set.")
def process(iPart): l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() num_originals = len(l_d) for idx_key in range(num_originals): print(f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}") current = l_d[idx_key] original = dir / current[0] fake = dir / random.choice(current[1]) if (original.is_file() and fake.is_file()): data_train = sample_pair(original, fake) if data_train is None: print(f"p_{iPart}_{str(original.stem)}_{str(fake.stem)}: No data.") pass else: file_out = output_dir / f"p_{iPart}_Train_{str(original.stem)}_{str(fake.stem)}.npy" np.save(file_out, data_train) data_test_real = sample_single(original) data_test_fake = sample_single(fake) isValid = (data_test_real is not None) and (data_test_fake is not None) if isValid: file_real_out = output_dir / f"p_{iPart}_Test_{str(original.stem)}_real.npy" np.save(file_real_out, data_test_real) file_fake_out = output_dir / f"p_{iPart}_Test_{str(fake.stem)}_fake.npy" np.save(file_fake_out, data_test_fake)
def process(iPart): l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() num_originals = len(l_d) for idx_key in range(num_originals): print( f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}") current = l_d[idx_key] original = dir / current[0] fake = dir / random.choice(current[1]) if (original.is_file() and fake.is_file()): sample_image_pair(iPart, original, fake) sample_image_single(iPart, original, False) sample_image_single(iPart, fake, True)
original = video.orig iCluster = video.cluster part = video.part file = video.file l_video_set = list(df[df.orig == original].file) num_frames = 32 print(f"Video: {file} Cluster: {iCluster} Original: {original} Part: {part}") input_dir = get_part_dir(part) assert (input_dir / file).is_file() assert (input_dir / original).is_file() video_real = read_video(input_dir / original, num_frames) video_fake = read_video(input_dir / file, num_frames) x_max = video_fake.shape[2] y_max = video_fake.shape[1] mtcnn_detector = MTCNNDetector() l_faces_fake = _get_face_boxes(mtcnn_detector, video_fake, [num_frames//2])
from face_detector import MTCNNDetector from line_sampler import get_line from sklearn.metrics import mean_squared_error a = [7, 3, 4, 11, 24, 123, 3, 7, 3, 4, 10, 21, 123, 3] b = [7, 3, 4, 11, 24, 129, 6, 11, 4, 10, 21, 123, 3, 3] # eye - eye line. # Start .5 outside, continue to l_eye location. l_d = read_metadata(7) entry = l_d[3] input_dir = get_part_dir(7) mtcnn_detector = MTCNNDetector() real_path = input_dir / entry[0] fake_path = input_dir / entry[1][1] assert real_path.is_file() assert fake_path.is_file() video_size = 32 W = 256 H = 1 real_video = read_video(real_path, video_size) fake_video = read_video(fake_path, video_size)
def get_sampling_cubes_for_part(iPart, output_dir): l_d = read_metadata(iPart) dir = get_part_dir(iPart) num_videos = len(l_d) print( f"p_{iPart}: Fake detection on part {iPart}. {len(l_d)} original video(s)." ) for idx_key in range(num_videos): current = l_d[idx_key] x_real = current[0] isCompleted = dataframe_exists(iPart, x_real) if isCompleted: # print(f"p_{iPart}_{x_real}: Already done.") continue else: print(f"p_{iPart}_{x_real}: Starting. {idx_key +1} of {len(l_d)}") x_real = dir / x_real assert x_real.is_file(), "Error: Original not found" vidcap = cv2.VideoCapture(str(x_real)) video_real = read_video(vidcap) vidcap.release() num_frames = video_real.shape[0] l_fakes = current[1] l_df_video = [] for x_fake in l_fakes: x_fake = dir / x_fake if not x_fake.is_file(): print( f" WARNING: p_{iPart}_{x_real.stem}: Not a file: {x_fake}. Situation handled." ) continue print(f" p_{iPart}_{x_real.stem}: Processing {str(x_fake.stem)}") vidcap = cv2.VideoCapture(str(x_fake)) video_fake = read_video(vidcap) vidcap.release() df_video = get_sampling_cubes(video_real, video_fake) df_video = df_video.assign(fake=str(x_fake.stem)) l_df_video.append(df_video) if len(l_df_video) > 0: df_video = pd.concat(l_df_video, axis=0) df_video = df_video.assign(original=str(x_real.stem)) df_video = df_video.assign(part=iPart) df_video.to_pickle(output_dir / f"p_{iPart}_{x_real.stem}_.pkl") print(f"p_{iPart}_{x_real.stem}: Complete.") else: print( f"p_{iPart}_{x_real.stem}: WARNING: No fakes found. No sampling cubes produced for video." ) return []