def get_cluster_metadata(self, iCluster):

        m = (self._df.cluster == iCluster) & (self._df.exists)
        df = self._df[m].reset_index(drop = True).copy()

        # Filter out originals not existing on file.
        azOriginal = np.unique(df.orig.values)

        l_files = df.file.values

        azOriginal = [x for x in azOriginal if x in l_files]

        m = df.orig.isin(azOriginal)

        df = df[m].reset_index(drop = True)

        l_out = []

        for x in azOriginal:

            original = df[df.file == x]
            fakes = df[(df.orig == x) & (df.file != x)]

            full_file_orig = original.apply(lambda x: get_part_dir(x['part']) / x['file'], axis = 1).iloc[0]

            fill_file_fakes = list(fakes.apply(lambda x: get_part_dir(x['part']) / x['file'], axis = 1))

            l_out.append((full_file_orig, fill_file_fakes))

        return l_out
    def __init__(self):
        df_c = pd.read_feather(get_meta_dir() / "face_clusters.feather")

        l_parts = []

        for x in range(50):
            path = get_part_dir(x, False)
            isDir = path.is_dir()
            if isDir:
                l_parts.append(x)

        l_orig = []
        l_file = []
        l_part = []

        for iPart in l_parts:
            df_meta = read_metadata(iPart)

            for x in df_meta:
                num_fakes = len (x[1])
                l_orig.extend([x[0]]* (num_fakes + 1))
                l_file.append(x[0])
                l_file.extend(x[1])
                l_part.extend([iPart] * (num_fakes + 1))


        df = pd.DataFrame({'orig': l_orig, 'file': l_file, 'part': l_part})

        df = df.merge(df_c, left_on = 'orig', right_on='video')

        df = df.drop(['video', 'chunk'], axis = 1)


        l_file_tuple = list(zip (df.file, df.part))

        l_exists = []

        for x in l_file_tuple:
            filepath = get_part_dir(x[1]) / x[0]
            l_exists.append(filepath.is_file())


        df = df.assign(exists = l_exists)

        num_files = df.shape[0]
        num_originals = np.unique(df.orig).shape[0]
        num_clusters = np.unique(df.cluster).shape[0]

        # print(f"num_files = {num_files}, num_originals = {num_originals}, num_clusters = {num_clusters}")

        self._df = df
Exemple #3
0
def prepare_process(iPart):

    # Todo prep all (original, fake) for all parts. Issue tasks for all pairs and mp on those, not the iPart.

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)
    output_dir = get_output_dir()

    mtcnn_detector = MTCNNDetector()

    num_originals = len(l_d)

    l_part_task = []

    for idx_key in range(num_originals):

        current = l_d[idx_key]

        original = dir / current[0]

        # Pick first fake. Todo: Can pick other fakes for more data. (one set per epoch)
        num_fakes = len(current[1])

        if num_fakes == 0:
            print(
                f"p_{iPart}_{str(original.stem)}: No associated fakes. Skipping."
            )
            continue

        fake = dir / current[1][0]

        isPairFound = original.is_file() and fake.is_file()

        if isPairFound:
            pass
        else:
            print(f"p_{iPart}: Original and/or fake not found. Skipping.")
            continue

        file_pair_out = output_dir / f"Line_Pair_p_{iPart}_{str(original.stem)}_{str(fake.stem)}.npy"
        file_real_out = output_dir / f"Line_Test_p_{iPart}_{str(original.stem)}_real.npy"
        file_fake_out = output_dir / f"Line_Test_p_{iPart}_{str(fake.stem)}_fake.npy"

        isExisting = file_pair_out.is_file() and file_real_out.is_file(
        ) and file_fake_out.is_file()

        if isExisting:
            continue

        l_part_task.append((iPart, original, fake))

    return l_part_task
Exemple #4
0
def run_one():
    input_dir = get_part_dir(0)
    mtcnn_detector = MTCNNDetector()

    l_files = list(sorted(input_dir.iterdir()))

    l_files = [x for x in l_files if x.suffix == '.mp4']

    video_path = input_dir / "nrdnytturz.mp4"

    assert video_path.is_file()

    #video_path = l_files[126]

    video_size = 32

    W = 256
    H = 1

    video = read_video(video_path, video_size)

    x_max = video.shape[2]
    y_max = video.shape[1]
    z_max = video.shape[0]

    faces = find_two_consistent_faces(mtcnn_detector, video)

    featureset = ['l_mouth', 'r_mouth']

    anSample = sample_feature(video, faces, featureset, W, H, True)

    l_feature0 = np.array((*_get_integer_coords_single_feature(
        x_max, y_max, faces[0], featureset[0]), 0))
    r_feature0 = np.array((*_get_integer_coords_single_feature(
        x_max, y_max, faces[0], featureset[1]), 0))

    vector = r_feature0 - l_feature0

    length_vector = np.sqrt(vector.dot(vector))

    anSampleOut = straighten_sample(anSample, length_vector)

    anSample = anSample.reshape(-1)
Exemple #5
0
def process_part(iPart):

    l_d = read_metadata(iPart)

    input_dir = get_part_dir(iPart)

    output_dir = get_output_dir()

    mtcnn_detector = MTCNNDetector()

    for o_set in l_d:

        l_samples = []

        original_path = input_dir / o_set[0]

        #print(f"{iPart}: {original_path.stem}...")

        r_data = sample_video_safe(mtcnn_detector, original_path, False)

        if r_data is None:
            print(f"{original_path.stem}: Bad original. Skipping set.")
            continue

        l_samples.append(r_data)

        for fake_path in o_set[1]:
            f_data = sample_video_safe(mtcnn_detector, input_dir / fake_path,
                                       False)

            if f_data is None:
                continue

            l_samples.append(f_data)

        if len(l_samples) >= 2:
            data = np.concatenate(l_samples)
            filename = f"p_{iPart}_{original_path.stem}.npy"
            output_path = output_dir / filename
            np.save(output_path, data)
        else:
            print(f"{original_path.stem}: No good fakes. Skipping set.")
Exemple #6
0
def process(iPart):

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)
    output_dir = get_output_dir()

    num_originals = len(l_d)

    for idx_key in range(num_originals):

        print(f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}")

        current = l_d[idx_key]

        original =  dir / current[0]
        fake = dir / random.choice(current[1])
        
        if (original.is_file() and fake.is_file()):
            data_train = sample_pair(original, fake)

            if data_train is None:
                print(f"p_{iPart}_{str(original.stem)}_{str(fake.stem)}: No data.")
                pass
            else:
                file_out = output_dir / f"p_{iPart}_Train_{str(original.stem)}_{str(fake.stem)}.npy"
                np.save(file_out, data_train)


            data_test_real = sample_single(original)
            data_test_fake = sample_single(fake)

            isValid = (data_test_real is not None) and (data_test_fake is not None)

            if isValid:
                file_real_out = output_dir / f"p_{iPart}_Test_{str(original.stem)}_real.npy"
                np.save(file_real_out, data_test_real)

                file_fake_out = output_dir / f"p_{iPart}_Test_{str(fake.stem)}_fake.npy"
                np.save(file_fake_out, data_test_fake)
Exemple #7
0
def process(iPart):

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)
    output_dir = get_output_dir()

    num_originals = len(l_d)

    for idx_key in range(num_originals):

        print(
            f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}")

        current = l_d[idx_key]

        original = dir / current[0]
        fake = dir / random.choice(current[1])

        if (original.is_file() and fake.is_file()):
            sample_image_pair(iPart, original, fake)

            sample_image_single(iPart, original, False)
            sample_image_single(iPart, fake, True)
Exemple #8
0
original = video.orig
iCluster = video.cluster
part = video.part
file = video.file

l_video_set = list(df[df.orig == original].file)


num_frames = 32


print(f"Video: {file} Cluster: {iCluster} Original: {original} Part: {part}")


input_dir = get_part_dir(part)

assert (input_dir / file).is_file()
assert (input_dir / original).is_file()

video_real = read_video(input_dir / original, num_frames)
video_fake = read_video(input_dir / file, num_frames)

x_max = video_fake.shape[2]
y_max = video_fake.shape[1]

mtcnn_detector = MTCNNDetector()


l_faces_fake = _get_face_boxes(mtcnn_detector, video_fake, [num_frames//2])
Exemple #9
0
from face_detector import MTCNNDetector
from line_sampler import get_line

from sklearn.metrics import mean_squared_error

a = [7, 3, 4, 11, 24, 123, 3, 7, 3, 4, 10, 21, 123, 3]
b = [7, 3, 4, 11, 24, 129, 6, 11, 4, 10, 21, 123, 3, 3]

# eye - eye line.
# Start .5 outside, continue to l_eye location.

l_d = read_metadata(7)

entry = l_d[3]

input_dir = get_part_dir(7)
mtcnn_detector = MTCNNDetector()

real_path = input_dir / entry[0]
fake_path = input_dir / entry[1][1]

assert real_path.is_file()
assert fake_path.is_file()

video_size = 32

W = 256
H = 1

real_video = read_video(real_path, video_size)
fake_video = read_video(fake_path, video_size)
Exemple #10
0
def get_sampling_cubes_for_part(iPart, output_dir):

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)

    num_videos = len(l_d)

    print(
        f"p_{iPart}: Fake detection on part {iPart}. {len(l_d)} original video(s)."
    )

    for idx_key in range(num_videos):

        current = l_d[idx_key]

        x_real = current[0]

        isCompleted = dataframe_exists(iPart, x_real)

        if isCompleted:
            # print(f"p_{iPart}_{x_real}: Already done.")
            continue
        else:
            print(f"p_{iPart}_{x_real}: Starting. {idx_key +1} of {len(l_d)}")

        x_real = dir / x_real
        assert x_real.is_file(), "Error: Original not found"

        vidcap = cv2.VideoCapture(str(x_real))

        video_real = read_video(vidcap)

        vidcap.release()

        num_frames = video_real.shape[0]

        l_fakes = current[1]

        l_df_video = []

        for x_fake in l_fakes:
            x_fake = dir / x_fake

            if not x_fake.is_file():
                print(
                    f"   WARNING: p_{iPart}_{x_real.stem}: Not a file: {x_fake}. Situation handled."
                )
                continue

            print(f"   p_{iPart}_{x_real.stem}: Processing {str(x_fake.stem)}")

            vidcap = cv2.VideoCapture(str(x_fake))

            video_fake = read_video(vidcap)

            vidcap.release()

            df_video = get_sampling_cubes(video_real, video_fake)

            df_video = df_video.assign(fake=str(x_fake.stem))

            l_df_video.append(df_video)

        if len(l_df_video) > 0:
            df_video = pd.concat(l_df_video, axis=0)
            df_video = df_video.assign(original=str(x_real.stem))
            df_video = df_video.assign(part=iPart)

            df_video.to_pickle(output_dir / f"p_{iPart}_{x_real.stem}_.pkl")
            print(f"p_{iPart}_{x_real.stem}: Complete.")

        else:
            print(
                f"p_{iPart}_{x_real.stem}: WARNING: No fakes found. No sampling cubes produced for video."
            )

    return []