Exemple #1
0
def labeler_disagreement():
	import baseline 

	movies = commons.load_movies()
	labelof = [ [], [] ]
	label_files = ['_LABEL1', '_LABEL2']
	label_to_digit = {'GOOD':0, 'BAD':0, 'NA':0, 'N':1}

	for imdb_id in movies:
		face_dir = commons.get_faces_dir(imdb_id)

		# only look at movies with more than one label
		if sum([ os.path.exists(os.path.join(face_dir, label_file)) for label_file in label_files ]) > 1:
			for i in range(len(label_files)):
				with open(os.path.join(face_dir, label_files[i]), 'r') as f:
					lines = f.readlines()
					for line in lines:
						if len(line) > 2:
							_, label = line.strip().split(':')

							# corner case
							if len(label) > 6 and label[:6] == "SAMEAS":
								label = lines[int(label[6:])].strip().split(':')[1]
							labelof[i].append(label_to_digit[label])

	assert(len(labelof[0]) == len(labelof[1]))

	baseline.f1_score(labelof[1], labelof[0])
	print("number of characters ", len(labelof[1]))
Exemple #2
0
def extract_audio_from_video():
    movies = commons.load_movies()
    print("Extract audio from video.")
    print("Number of cores: {}".format(multiprocessing.cpu_count()))

    if not os.path.exists(commons.AUDIO_DIR):
        os.mkdir(commons.AUDIO_DIR)

    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    pool.map(extract_audio_from_video_worker, movies.keys())
Exemple #3
0
def count_overlap_labels():
	movies = commons.load_movies()
	count = 0
	for imdb_id in movies:
		face_dir = commons.get_faces_dir(imdb_id)

		labels = ['_LABEL1', '_LABEL2']
		exist = [os.path.exists(os.path.join(face_dir, label)) for label in labels]

		if sum(exist) > 1:
			count += 1

	print("2 Labeled movies: {}/{}".format(count, len(movies)))	
Exemple #4
0
def count_labels():
	movies = commons.load_movies()
	count = 0
	for imdb_id in movies:
		face_dir = commons.get_faces_dir(imdb_id)

		labels = ["_LABEL{}".format(i) for i in range(1, 4)]
		for label in labels:
			if os.path.exists(os.path.join(face_dir, label)):
				count += 1
				break

		if os.path.exists(os.path.join(face_dir, "_LABEL")):
			print("need to fix {}".format(imdb_id))

	print("Labeled movies: {}/{}".format(count, len(movies)))
Exemple #5
0
def generate_train_and_test():
    import random

    movies = commons.load_movies()
    TEST_PERCENT = 0.2

    test = random.sample(movies.keys(), int(TEST_PERCENT * len(movies)))
    train = list(set(movies.keys()) - set(test))

    assert (len(set(test).intersection(train)) == 0)

    with open(commons.TRAIN_FILE, 'w') as f:
        for imdb_id in train:
            f.write("{}\n".format(imdb_id))

    with open(commons.TEST_FILE, 'w') as f:
        for imdb_id in test:
            f.write("{}\n".format(imdb_id))
def generate_fusion_data():
    if not os.path.exists(FUSION_DIR):
        os.mkdir(FUSION_DIR)

    if (os.path.exists(FUSION_TRAIN_X) and os.path.exists(FUSION_TRAIN_Y)
            and os.path.exists(FUSION_TRAIN_WHO)
            and os.path.exists(FUSION_TEST_X) and os.path.exists(FUSION_TEST_Y)
            and os.path.exists(FUSION_TEST_WHO)):
        with open(FUSION_TRAIN_X, 'rb') as f:
            train_x = pickle.load(f)
        with open(FUSION_TRAIN_Y, 'rb') as f:
            train_y = pickle.load(f)
        with open(FUSION_TRAIN_WHO, 'r') as f:
            train_who = []
            for line in f.readlines():
                train_who.append(line.strip())
        with open(FUSION_TEST_X, 'rb') as f:
            test_x = pickle.load(f)
        with open(FUSION_TEST_Y, 'rb') as f:
            test_y = pickle.load(f)
        with open(FUSION_TEST_WHO, 'r') as f:
            test_who = []
            for line in f.readlines():
                test_who.append(line.strip())
        return (train_x, train_y, train_who), (test_x, test_y, test_who)

    audio_train_data = audio_model.load_training_data()
    audio_test_data = audio_model.load_test_data()

    train_ids, test_ids = commons.get_train_and_test_imbd_ids()
    movies, labelof = commons.load_movies(), commons.get_label()

    train_x, train_y, test_x, test_y = [], [], [], []
    train_who, test_who = audio_train_data[2], audio_test_data[2]

    IMAGE_SIZE = 224
    cnn_model = VGGFace(include_top=False,
                        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
                        pooling='None')
    vgg19_model = scene_model.VGG_19()

    print("Generating & merging features...", datetime.now())

    for (xs, ys, whos) in [audio_train_data, audio_test_data]:
        assert (len(xs) == len(ys) and len(ys) == len(whos))

        for i in range(len(whos)):
            print("\r{}/{}".format(i, len(whos)), end="")

            imdb_id, c_id, scene = whos[i].strip().split('-')
            start_time, end_time = scene.split('~')
            c_id, start_time, end_time = int(c_id), float(start_time), float(
                end_time)

            audio_x, y = numpy.asarray(xs[i]).flatten(), ys[i]

            # find 2 random images belonging to (imdb_id, c_id) scene
            images = [
                x for x in os.listdir(commons.TRAIN_IMAGES_DIR)
                if x.startswith("{}-{}".format(imdb_id, c_id))
            ]
            temp_images = []

            for image in images:
                assert (image[-4:] == '.jpg')
                _, _, timestamp = image[:-4].split('-')
                timestamp = float(timestamp)
                if start_time <= timestamp and timestamp <= end_time:
                    temp_images.append(image)

            assert (len(temp_images) >= 5
                    )  # a scene is guaranteed to have >= 5 images
            images = random.sample(temp_images, NUM_IMAGES_USED)

            image_x = []
            for image in images:
                image_filename = os.path.join(commons.TRAIN_IMAGES_DIR, image)
                image = keras.preprocessing.image.load_img(
                    image_filename, target_size=(IMAGE_SIZE, IMAGE_SIZE))

                # face features
                x = keras.preprocessing.image.img_to_array(image)
                x = numpy.expand_dims(x, axis=0)
                x = keras_vggface.utils.preprocess_input(x, version=1)
                x = cnn_model.predict(x)
                x = x.flatten()
                image_x.append(x)

                # scene features
                x = scene_model.extract_scene_feature(vgg19_model, image)
                x = x.flatten()
                image_x.append(x)

            x = numpy.concatenate(tuple(image_x + [audio_x]))

            if imdb_id in train_ids:
                train_x.append(x)
                train_y.append(y)
            elif imdb_id in test_ids:
                test_x.append(x)
                test_y.append(y)

    print("\rDone. ", datetime.now())

    with open(FUSION_TRAIN_X, 'wb') as f:
        pickle.dump(train_x, f)
    with open(FUSION_TRAIN_Y, 'wb') as f:
        pickle.dump(train_y, f)
    with open(FUSION_TRAIN_WHO, 'w') as f:
        for who in train_who:
            f.write("{}\n".format(who))
    with open(FUSION_TEST_X, 'wb') as f:
        pickle.dump(test_x, f)
    with open(FUSION_TEST_Y, 'wb') as f:
        pickle.dump(test_y, f)
    with open(FUSION_TEST_WHO, 'w') as f:
        for who in test_who:
            f.write("{}\n".format(who))

    return (train_x, train_y, train_who), (test_x, test_y, test_who)
Exemple #7
0
def prepare_images():
    # 0. safety check
    if not os.path.exists(commons.IMAGE_DIR):
        print("ERROR: IMAGE_DIR {} not exist.".format(commons.IMAGE_DIR))

    if not os.path.exists(commons.TRAIN_IMAGES_DIR):
        os.mkdir(commons.TRAIN_IMAGES_DIR)

    if not os.path.exists(commons.TRAIN_IMAGES_DONE_DIR):
        os.mkdir(commons.TRAIN_IMAGES_DONE_DIR)

    movies = commons.load_movies()
    images = [
        x for x in sorted(os.listdir(commons.IMAGE_DIR))
        if len(x) > 4 and x[-4:] == ".jpg"
    ]
    errors = []

    # 1. categorize images into imdb_id -> character_id -> list of images belonging to that id
    print("Categorizing images...")
    imagesof = defaultdict(lambda: defaultdict(list))
    for x in images:
        # already prepared
        if os.path.exists(os.path.join(commons.TRAIN_IMAGES_DIR, x)):
            continue

        imdb_id, character_id, timestamp = x[:-4].split('-')
        character_id = int(character_id)
        imagesof[imdb_id][character_id].append(x)

    # 2. locate face for each image with black edge cropped
    for imdb_id in imagesof:
        print("Prepare images for <{}> {}...".format(imdb_id,
                                                     movies[imdb_id].name))
        if os.path.exists(os.path.join(commons.TRAIN_IMAGES_DONE_DIR,
                                       imdb_id)):
            continue

        character_encodings = commons.get_characters(imdb_id)

        for character_id in imagesof[imdb_id]:
            for x in imagesof[imdb_id][character_id]:
                try:
                    image = cv2.imread(os.path.join(commons.IMAGE_DIR, x))
                    image = remove_black_edge(image)

                    rgb_image = image[:, :, ::-1]
                    face_locations = face_recognition.face_locations(rgb_image)
                    encodings = face_recognition.face_encodings(
                        rgb_image, known_face_locations=face_locations)
                except Exception:
                    print("ERROR {}".format(e))
                    errors.append(e)

                for i in range(len(face_locations)):
                    try:
                        result = face_recognition.compare_faces(
                            character_encodings[character_id], encodings[i])
                        if face.is_same_person(result):
                            height, _, _ = image.shape
                            _, right, _, left = face_locations[i]
                            mid = int((left + right) / 2)

                            height = height if height % 2 == 0 else height - 1

                            x0 = max(0, mid - int(height / 2))
                            x1 = x0 + height

                            rect_image = image[0:height, x0:x1]
                            cv2.imwrite(
                                os.path.join(commons.TRAIN_IMAGES_DIR, x),
                                rect_image)

                    except Exception as e:
                        print("ERROR {}".format(e))
                        errors.append(e)

        open(os.path.join(commons.TRAIN_IMAGES_DONE_DIR, imdb_id), "a").close()

    for e in errors:
        print(e)
Exemple #8
0
def generate_audio_features():
    movies = commons.load_movies()
    print("Extract audio pieces...")

    # if not os.path.exists(commons.AUDIO_UNIT_DIR):
    #       os.mkdir(commons.AUDIO_UNIT_DIR)
    # if not os.path.exists(commons.AUDIO_UNIT_DONE_DIR):
    #       os.mkdir(commons.AUDIO_UNIT_DONE_DIR)

    labelof = commons.get_label()
    train_ids, test_ids = commons.get_train_and_test_imbd_ids()
    train_x, train_y, train_who, test_x, test_y, test_who = [], [], [], [], [], []

    if not os.path.exists(commons.AUDIO_BASELINE_DIR):
        os.mkdir(commons.AUDIO_BASELINE_DIR)
    all_scenes = {}  # imdb_id -> scenes

    for imdb_id in movies:
        if imdb_id not in labelof:
            continue
        temp_audio_unit_path = os.path.join(commons.AUDIO_UNIT_DIR, imdb_id)

        # 0. clean up work if exists
        if os.path.exists(os.path.join(commons.AUDIO_UNIT_DONE_DIR, imdb_id)):
            print("<{}> already completed.".format(imdb_id))
            return
        if os.path.exists(os.path.join(commons.AUDIO_UNIT_DIR, imdb_id)):
            shutil.rmtree(temp_audio_unit_path)

        # 1. create temp path and start working!
        os.mkdir(temp_audio_unit_path)
        frames = [
            x for x in os.listdir(commons.TRAIN_IMAGES_DIR)
            if x.startswith(imdb_id)
        ]

        timestampsof = defaultdict(list)  # character_id -> timestamps
        for frame in frames:
            _, character_id, timestamp = frame[:-4].split('-')
            character_id = int(character_id)
            timestamp = float(timestamp)
            timestampsof[character_id].append(timestamp)
        for character_id in timestampsof:
            timestampsof[character_id].sort()

        scenes = defaultdict(
            list)  # character_id -> [ scene0 = [ timestamp0 ... ] ]
        # timestamp is a float exactly what in train_image, in milisecond
        curr_scene = []
        for character_id in timestampsof:
            for timestamp in timestampsof[character_id]:
                if not is_the_same_scene(curr_scene, timestamp):
                    scenes[character_id].append(curr_scene)
                    curr_scene = []
                curr_scene.append(timestamp)

            scenes[character_id].append(curr_scene)
            curr_scene = []

        for character_id in scenes:
            scenes[character_id] = filter_scene(scenes[character_id])
        all_scenes[imdb_id] = scenes

        # 2. from character_id -> a list of scene [timestamps],
        #        extract audio file and save to train/test
        #        NOTE: we need to write because we don't know how to convert scipy wavfile to
        #                  format known to pyAudioAnalysis
        partial_xs = []
        rate, data = wavfile.read(
            os.path.join(commons.AUDIO_DIR, "{}.wav".format(imdb_id)))
        for c_id in scenes:
            for scene in scenes[c_id]:
                # clip audio of this scene
                start, end = scene[0], scene[-1]
                start_sec, end_sec = start / 1000, end / 1000  # convert to seconds
                start_frame, end_frame = int(start_sec * rate), int(end_sec *
                                                                    rate)
                x = data[start_frame:end_frame + 1]

                # get audio features of this scene
                from pyAudioAnalysis import audioFeatureExtraction

                if len(x.shape) > 1 and x.shape[1] == 2:  # stereo to mono
                    x = x.sum(axis=1) / 2
                features, _ = audioFeatureExtraction.stFeatureExtraction(
                    x,
                    rate,
                    0.05 * rate,  #frame size
                    0.025 * (end_frame + 1 - start_frame))  #frame step

                # catch edge case: nothing in audio
                if not numpy.isfinite(features).all():
                    features = numpy.nan_to_num(features)
                    print("Catched error: <{}> has nothing in audio.".format(
                        imdb_id))
                features = features[:, :38]  # drop extra frame data ... sign

                # add (x, y)
                label = labelof[imdb_id][c_id]
                if imdb_id in train_ids:
                    train_x.append(features)
                    train_y.append(label)
                    train_who.append("{}-{}-{}~{}".format(
                        imdb_id, c_id, start, end))
                else:
                    test_x.append(features)
                    test_y.append(label)
                    test_who.append("{}-{}-{}~{}".format(
                        imdb_id, c_id, start, end))

        print("<{}> Finished".format(imdb_id))

    with open(commons.AUDIO_BASELINE_TRAIN_X, 'wb') as f:
        pickle.dump(train_x, f)
    with open(commons.AUDIO_BASELINE_TRAIN_Y, 'wb') as f:
        pickle.dump(train_y, f)
    with open(commons.AUDIO_BASELINE_TRAIN_WHO, 'w') as f:
        for who in train_who:
            f.write(who + '\n')

    with open(commons.AUDIO_BASELINE_TEST_X, 'wb') as f:
        pickle.dump(test_x, f)
    with open(commons.AUDIO_BASELINE_TEST_Y, 'wb') as f:
        pickle.dump(test_y, f)
    with open(commons.AUDIO_BASELINE_TEST_WHO, 'w') as f:
        for who in test_who:
            f.write(who + '\n')

    with open(commons.SCENES, 'wb') as f:
        pickle.dump(all_scenes, f)
Exemple #9
0
                    # valid image! save image with desired format
                    image_filename = temp_image_dir + \
                     format_image_filename(imdb_id, char_index, input_movie.get(cv2.CAP_PROP_POS_MSEC))
                    cv2.imwrite(image_filename, bgr_frame)

        print(" Done.")

        # move all images out of temp_image_dir to IMAGE_DIR, delete temp dir, mark success
        for file in os.listdir(temp_image_dir):
            filename, extension = os.path.splitext(file)
            if extension == ".jpg":
                shutil.move(os.path.join(temp_image_dir, file),
                            os.path.join(commons.IMAGE_DIR, file))
        shutil.rmtree(temp_image_dir)
        open(os.path.join(commons.IMAGE_DONE_DIR, imdb_id), "a").close()

        # cleanup
        input_movie.release()
        cv2.destroyAllWindows()

    for error_msg in errors:
        print(error_msg)


if __name__ == '__main__':
    movies = commons.load_movies()
    #save_faces(movies)
    #face_clustering(movies)

    save_images(movies)