Exemple #1
0
def prepare_process(iPart):

    # Todo prep all (original, fake) for all parts. Issue tasks for all pairs and mp on those, not the iPart.

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)
    output_dir = get_output_dir()

    mtcnn_detector = MTCNNDetector()

    num_originals = len(l_d)

    l_part_task = []

    for idx_key in range(num_originals):

        current = l_d[idx_key]

        original = dir / current[0]

        # Pick first fake. Todo: Can pick other fakes for more data. (one set per epoch)
        num_fakes = len(current[1])

        if num_fakes == 0:
            print(
                f"p_{iPart}_{str(original.stem)}: No associated fakes. Skipping."
            )
            continue

        fake = dir / current[1][0]

        isPairFound = original.is_file() and fake.is_file()

        if isPairFound:
            pass
        else:
            print(f"p_{iPart}: Original and/or fake not found. Skipping.")
            continue

        file_pair_out = output_dir / f"Line_Pair_p_{iPart}_{str(original.stem)}_{str(fake.stem)}.npy"
        file_real_out = output_dir / f"Line_Test_p_{iPart}_{str(original.stem)}_real.npy"
        file_fake_out = output_dir / f"Line_Test_p_{iPart}_{str(fake.stem)}_fake.npy"

        isExisting = file_pair_out.is_file() and file_real_out.is_file(
        ) and file_fake_out.is_file()

        if isExisting:
            continue

        l_part_task.append((iPart, original, fake))

    return l_part_task
    def __init__(self):
        df_c = pd.read_feather(get_meta_dir() / "face_clusters.feather")

        l_parts = []

        for x in range(50):
            path = get_part_dir(x, False)
            isDir = path.is_dir()
            if isDir:
                l_parts.append(x)

        l_orig = []
        l_file = []
        l_part = []

        for iPart in l_parts:
            df_meta = read_metadata(iPart)

            for x in df_meta:
                num_fakes = len (x[1])
                l_orig.extend([x[0]]* (num_fakes + 1))
                l_file.append(x[0])
                l_file.extend(x[1])
                l_part.extend([iPart] * (num_fakes + 1))


        df = pd.DataFrame({'orig': l_orig, 'file': l_file, 'part': l_part})

        df = df.merge(df_c, left_on = 'orig', right_on='video')

        df = df.drop(['video', 'chunk'], axis = 1)


        l_file_tuple = list(zip (df.file, df.part))

        l_exists = []

        for x in l_file_tuple:
            filepath = get_part_dir(x[1]) / x[0]
            l_exists.append(filepath.is_file())


        df = df.assign(exists = l_exists)

        num_files = df.shape[0]
        num_originals = np.unique(df.orig).shape[0]
        num_clusters = np.unique(df.cluster).shape[0]

        # print(f"num_files = {num_files}, num_originals = {num_originals}, num_clusters = {num_clusters}")

        self._df = df
Exemple #3
0
def process_part(iPart):

    l_d = read_metadata(iPart)

    input_dir = get_part_dir(iPart)

    output_dir = get_output_dir()

    mtcnn_detector = MTCNNDetector()

    for o_set in l_d:

        l_samples = []

        original_path = input_dir / o_set[0]

        #print(f"{iPart}: {original_path.stem}...")

        r_data = sample_video_safe(mtcnn_detector, original_path, False)

        if r_data is None:
            print(f"{original_path.stem}: Bad original. Skipping set.")
            continue

        l_samples.append(r_data)

        for fake_path in o_set[1]:
            f_data = sample_video_safe(mtcnn_detector, input_dir / fake_path,
                                       False)

            if f_data is None:
                continue

            l_samples.append(f_data)

        if len(l_samples) >= 2:
            data = np.concatenate(l_samples)
            filename = f"p_{iPart}_{original_path.stem}.npy"
            output_path = output_dir / filename
            np.save(output_path, data)
        else:
            print(f"{original_path.stem}: No good fakes. Skipping set.")
Exemple #4
0
def process(iPart):

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)
    output_dir = get_output_dir()

    num_originals = len(l_d)

    for idx_key in range(num_originals):

        print(f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}")

        current = l_d[idx_key]

        original =  dir / current[0]
        fake = dir / random.choice(current[1])
        
        if (original.is_file() and fake.is_file()):
            data_train = sample_pair(original, fake)

            if data_train is None:
                print(f"p_{iPart}_{str(original.stem)}_{str(fake.stem)}: No data.")
                pass
            else:
                file_out = output_dir / f"p_{iPart}_Train_{str(original.stem)}_{str(fake.stem)}.npy"
                np.save(file_out, data_train)


            data_test_real = sample_single(original)
            data_test_fake = sample_single(fake)

            isValid = (data_test_real is not None) and (data_test_fake is not None)

            if isValid:
                file_real_out = output_dir / f"p_{iPart}_Test_{str(original.stem)}_real.npy"
                np.save(file_real_out, data_test_real)

                file_fake_out = output_dir / f"p_{iPart}_Test_{str(fake.stem)}_fake.npy"
                np.save(file_fake_out, data_test_fake)
Exemple #5
0
def process(iPart):

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)
    output_dir = get_output_dir()

    num_originals = len(l_d)

    for idx_key in range(num_originals):

        print(
            f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}")

        current = l_d[idx_key]

        original = dir / current[0]
        fake = dir / random.choice(current[1])

        if (original.is_file() and fake.is_file()):
            sample_image_pair(iPart, original, fake)

            sample_image_single(iPart, original, False)
            sample_image_single(iPart, fake, True)
Exemple #6
0
def list_all_files():
    parts = list(range(50))

    l_part = []
    l_original = []
    l_file = []

    for iPart in parts:
        l_d = read_metadata(iPart)

        for x in l_d:
            original = x[0][:-4]
            l_part.append(iPart)
            l_original.append(original)
            l_file.append(original)
            for fake in x[1]:
                l_part.append(iPart)
                l_original.append(original)
                l_file.append(fake[:-4])
    """c"""

    df = pd.DataFrame({'p': l_part, 'original': l_original, 'file': l_file})

    df.to_pickle(get_output_dir() / "all_files.pkl")
Exemple #7
0
from mp4_frames import read_video
from mp4_frames import read_metadata

from featureline import find_middle_face_box
from face_detector import MTCNNDetector
from line_sampler import get_line

from sklearn.metrics import mean_squared_error

a = [7, 3, 4, 11, 24, 123, 3, 7, 3, 4, 10, 21, 123, 3]
b = [7, 3, 4, 11, 24, 129, 6, 11, 4, 10, 21, 123, 3, 3]

# eye - eye line.
# Start .5 outside, continue to l_eye location.

l_d = read_metadata(7)

entry = l_d[3]

input_dir = get_part_dir(7)
mtcnn_detector = MTCNNDetector()

real_path = input_dir / entry[0]
fake_path = input_dir / entry[1][1]

assert real_path.is_file()
assert fake_path.is_file()

video_size = 32

W = 256
Exemple #8
0
def get_sampling_cubes_for_part(iPart, output_dir):

    l_d = read_metadata(iPart)
    dir = get_part_dir(iPart)

    num_videos = len(l_d)

    print(
        f"p_{iPart}: Fake detection on part {iPart}. {len(l_d)} original video(s)."
    )

    for idx_key in range(num_videos):

        current = l_d[idx_key]

        x_real = current[0]

        isCompleted = dataframe_exists(iPart, x_real)

        if isCompleted:
            # print(f"p_{iPart}_{x_real}: Already done.")
            continue
        else:
            print(f"p_{iPart}_{x_real}: Starting. {idx_key +1} of {len(l_d)}")

        x_real = dir / x_real
        assert x_real.is_file(), "Error: Original not found"

        vidcap = cv2.VideoCapture(str(x_real))

        video_real = read_video(vidcap)

        vidcap.release()

        num_frames = video_real.shape[0]

        l_fakes = current[1]

        l_df_video = []

        for x_fake in l_fakes:
            x_fake = dir / x_fake

            if not x_fake.is_file():
                print(
                    f"   WARNING: p_{iPart}_{x_real.stem}: Not a file: {x_fake}. Situation handled."
                )
                continue

            print(f"   p_{iPart}_{x_real.stem}: Processing {str(x_fake.stem)}")

            vidcap = cv2.VideoCapture(str(x_fake))

            video_fake = read_video(vidcap)

            vidcap.release()

            df_video = get_sampling_cubes(video_real, video_fake)

            df_video = df_video.assign(fake=str(x_fake.stem))

            l_df_video.append(df_video)

        if len(l_df_video) > 0:
            df_video = pd.concat(l_df_video, axis=0)
            df_video = df_video.assign(original=str(x_real.stem))
            df_video = df_video.assign(part=iPart)

            df_video.to_pickle(output_dir / f"p_{iPart}_{x_real.stem}_.pkl")
            print(f"p_{iPart}_{x_real.stem}: Complete.")

        else:
            print(
                f"p_{iPart}_{x_real.stem}: WARNING: No fakes found. No sampling cubes produced for video."
            )

    return []