def labeler_disagreement(): import baseline movies = commons.load_movies() labelof = [ [], [] ] label_files = ['_LABEL1', '_LABEL2'] label_to_digit = {'GOOD':0, 'BAD':0, 'NA':0, 'N':1} for imdb_id in movies: face_dir = commons.get_faces_dir(imdb_id) # only look at movies with more than one label if sum([ os.path.exists(os.path.join(face_dir, label_file)) for label_file in label_files ]) > 1: for i in range(len(label_files)): with open(os.path.join(face_dir, label_files[i]), 'r') as f: lines = f.readlines() for line in lines: if len(line) > 2: _, label = line.strip().split(':') # corner case if len(label) > 6 and label[:6] == "SAMEAS": label = lines[int(label[6:])].strip().split(':')[1] labelof[i].append(label_to_digit[label]) assert(len(labelof[0]) == len(labelof[1])) baseline.f1_score(labelof[1], labelof[0]) print("number of characters ", len(labelof[1]))
def extract_audio_from_video(): movies = commons.load_movies() print("Extract audio from video.") print("Number of cores: {}".format(multiprocessing.cpu_count())) if not os.path.exists(commons.AUDIO_DIR): os.mkdir(commons.AUDIO_DIR) pool = multiprocessing.Pool(multiprocessing.cpu_count()) pool.map(extract_audio_from_video_worker, movies.keys())
def count_overlap_labels(): movies = commons.load_movies() count = 0 for imdb_id in movies: face_dir = commons.get_faces_dir(imdb_id) labels = ['_LABEL1', '_LABEL2'] exist = [os.path.exists(os.path.join(face_dir, label)) for label in labels] if sum(exist) > 1: count += 1 print("2 Labeled movies: {}/{}".format(count, len(movies)))
def count_labels(): movies = commons.load_movies() count = 0 for imdb_id in movies: face_dir = commons.get_faces_dir(imdb_id) labels = ["_LABEL{}".format(i) for i in range(1, 4)] for label in labels: if os.path.exists(os.path.join(face_dir, label)): count += 1 break if os.path.exists(os.path.join(face_dir, "_LABEL")): print("need to fix {}".format(imdb_id)) print("Labeled movies: {}/{}".format(count, len(movies)))
def generate_train_and_test(): import random movies = commons.load_movies() TEST_PERCENT = 0.2 test = random.sample(movies.keys(), int(TEST_PERCENT * len(movies))) train = list(set(movies.keys()) - set(test)) assert (len(set(test).intersection(train)) == 0) with open(commons.TRAIN_FILE, 'w') as f: for imdb_id in train: f.write("{}\n".format(imdb_id)) with open(commons.TEST_FILE, 'w') as f: for imdb_id in test: f.write("{}\n".format(imdb_id))
def generate_fusion_data(): if not os.path.exists(FUSION_DIR): os.mkdir(FUSION_DIR) if (os.path.exists(FUSION_TRAIN_X) and os.path.exists(FUSION_TRAIN_Y) and os.path.exists(FUSION_TRAIN_WHO) and os.path.exists(FUSION_TEST_X) and os.path.exists(FUSION_TEST_Y) and os.path.exists(FUSION_TEST_WHO)): with open(FUSION_TRAIN_X, 'rb') as f: train_x = pickle.load(f) with open(FUSION_TRAIN_Y, 'rb') as f: train_y = pickle.load(f) with open(FUSION_TRAIN_WHO, 'r') as f: train_who = [] for line in f.readlines(): train_who.append(line.strip()) with open(FUSION_TEST_X, 'rb') as f: test_x = pickle.load(f) with open(FUSION_TEST_Y, 'rb') as f: test_y = pickle.load(f) with open(FUSION_TEST_WHO, 'r') as f: test_who = [] for line in f.readlines(): test_who.append(line.strip()) return (train_x, train_y, train_who), (test_x, test_y, test_who) audio_train_data = audio_model.load_training_data() audio_test_data = audio_model.load_test_data() train_ids, test_ids = commons.get_train_and_test_imbd_ids() movies, labelof = commons.load_movies(), commons.get_label() train_x, train_y, test_x, test_y = [], [], [], [] train_who, test_who = audio_train_data[2], audio_test_data[2] IMAGE_SIZE = 224 cnn_model = VGGFace(include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), pooling='None') vgg19_model = scene_model.VGG_19() print("Generating & merging features...", datetime.now()) for (xs, ys, whos) in [audio_train_data, audio_test_data]: assert (len(xs) == len(ys) and len(ys) == len(whos)) for i in range(len(whos)): print("\r{}/{}".format(i, len(whos)), end="") imdb_id, c_id, scene = whos[i].strip().split('-') start_time, end_time = scene.split('~') c_id, start_time, end_time = int(c_id), float(start_time), float( end_time) audio_x, y = numpy.asarray(xs[i]).flatten(), ys[i] # find 2 random images belonging to (imdb_id, c_id) scene images = [ x for x in os.listdir(commons.TRAIN_IMAGES_DIR) if x.startswith("{}-{}".format(imdb_id, c_id)) ] temp_images = [] for image in images: assert (image[-4:] == '.jpg') _, _, timestamp = image[:-4].split('-') timestamp = float(timestamp) if start_time <= timestamp and timestamp <= end_time: temp_images.append(image) assert (len(temp_images) >= 5 ) # a scene is guaranteed to have >= 5 images images = random.sample(temp_images, NUM_IMAGES_USED) image_x = [] for image in images: image_filename = os.path.join(commons.TRAIN_IMAGES_DIR, image) image = keras.preprocessing.image.load_img( image_filename, target_size=(IMAGE_SIZE, IMAGE_SIZE)) # face features x = keras.preprocessing.image.img_to_array(image) x = numpy.expand_dims(x, axis=0) x = keras_vggface.utils.preprocess_input(x, version=1) x = cnn_model.predict(x) x = x.flatten() image_x.append(x) # scene features x = scene_model.extract_scene_feature(vgg19_model, image) x = x.flatten() image_x.append(x) x = numpy.concatenate(tuple(image_x + [audio_x])) if imdb_id in train_ids: train_x.append(x) train_y.append(y) elif imdb_id in test_ids: test_x.append(x) test_y.append(y) print("\rDone. ", datetime.now()) with open(FUSION_TRAIN_X, 'wb') as f: pickle.dump(train_x, f) with open(FUSION_TRAIN_Y, 'wb') as f: pickle.dump(train_y, f) with open(FUSION_TRAIN_WHO, 'w') as f: for who in train_who: f.write("{}\n".format(who)) with open(FUSION_TEST_X, 'wb') as f: pickle.dump(test_x, f) with open(FUSION_TEST_Y, 'wb') as f: pickle.dump(test_y, f) with open(FUSION_TEST_WHO, 'w') as f: for who in test_who: f.write("{}\n".format(who)) return (train_x, train_y, train_who), (test_x, test_y, test_who)
def prepare_images(): # 0. safety check if not os.path.exists(commons.IMAGE_DIR): print("ERROR: IMAGE_DIR {} not exist.".format(commons.IMAGE_DIR)) if not os.path.exists(commons.TRAIN_IMAGES_DIR): os.mkdir(commons.TRAIN_IMAGES_DIR) if not os.path.exists(commons.TRAIN_IMAGES_DONE_DIR): os.mkdir(commons.TRAIN_IMAGES_DONE_DIR) movies = commons.load_movies() images = [ x for x in sorted(os.listdir(commons.IMAGE_DIR)) if len(x) > 4 and x[-4:] == ".jpg" ] errors = [] # 1. categorize images into imdb_id -> character_id -> list of images belonging to that id print("Categorizing images...") imagesof = defaultdict(lambda: defaultdict(list)) for x in images: # already prepared if os.path.exists(os.path.join(commons.TRAIN_IMAGES_DIR, x)): continue imdb_id, character_id, timestamp = x[:-4].split('-') character_id = int(character_id) imagesof[imdb_id][character_id].append(x) # 2. locate face for each image with black edge cropped for imdb_id in imagesof: print("Prepare images for <{}> {}...".format(imdb_id, movies[imdb_id].name)) if os.path.exists(os.path.join(commons.TRAIN_IMAGES_DONE_DIR, imdb_id)): continue character_encodings = commons.get_characters(imdb_id) for character_id in imagesof[imdb_id]: for x in imagesof[imdb_id][character_id]: try: image = cv2.imread(os.path.join(commons.IMAGE_DIR, x)) image = remove_black_edge(image) rgb_image = image[:, :, ::-1] face_locations = face_recognition.face_locations(rgb_image) encodings = face_recognition.face_encodings( rgb_image, known_face_locations=face_locations) except Exception: print("ERROR {}".format(e)) errors.append(e) for i in range(len(face_locations)): try: result = face_recognition.compare_faces( character_encodings[character_id], encodings[i]) if face.is_same_person(result): height, _, _ = image.shape _, right, _, left = face_locations[i] mid = int((left + right) / 2) height = height if height % 2 == 0 else height - 1 x0 = max(0, mid - int(height / 2)) x1 = x0 + height rect_image = image[0:height, x0:x1] cv2.imwrite( os.path.join(commons.TRAIN_IMAGES_DIR, x), rect_image) except Exception as e: print("ERROR {}".format(e)) errors.append(e) open(os.path.join(commons.TRAIN_IMAGES_DONE_DIR, imdb_id), "a").close() for e in errors: print(e)
def generate_audio_features(): movies = commons.load_movies() print("Extract audio pieces...") # if not os.path.exists(commons.AUDIO_UNIT_DIR): # os.mkdir(commons.AUDIO_UNIT_DIR) # if not os.path.exists(commons.AUDIO_UNIT_DONE_DIR): # os.mkdir(commons.AUDIO_UNIT_DONE_DIR) labelof = commons.get_label() train_ids, test_ids = commons.get_train_and_test_imbd_ids() train_x, train_y, train_who, test_x, test_y, test_who = [], [], [], [], [], [] if not os.path.exists(commons.AUDIO_BASELINE_DIR): os.mkdir(commons.AUDIO_BASELINE_DIR) all_scenes = {} # imdb_id -> scenes for imdb_id in movies: if imdb_id not in labelof: continue temp_audio_unit_path = os.path.join(commons.AUDIO_UNIT_DIR, imdb_id) # 0. clean up work if exists if os.path.exists(os.path.join(commons.AUDIO_UNIT_DONE_DIR, imdb_id)): print("<{}> already completed.".format(imdb_id)) return if os.path.exists(os.path.join(commons.AUDIO_UNIT_DIR, imdb_id)): shutil.rmtree(temp_audio_unit_path) # 1. create temp path and start working! os.mkdir(temp_audio_unit_path) frames = [ x for x in os.listdir(commons.TRAIN_IMAGES_DIR) if x.startswith(imdb_id) ] timestampsof = defaultdict(list) # character_id -> timestamps for frame in frames: _, character_id, timestamp = frame[:-4].split('-') character_id = int(character_id) timestamp = float(timestamp) timestampsof[character_id].append(timestamp) for character_id in timestampsof: timestampsof[character_id].sort() scenes = defaultdict( list) # character_id -> [ scene0 = [ timestamp0 ... ] ] # timestamp is a float exactly what in train_image, in milisecond curr_scene = [] for character_id in timestampsof: for timestamp in timestampsof[character_id]: if not is_the_same_scene(curr_scene, timestamp): scenes[character_id].append(curr_scene) curr_scene = [] curr_scene.append(timestamp) scenes[character_id].append(curr_scene) curr_scene = [] for character_id in scenes: scenes[character_id] = filter_scene(scenes[character_id]) all_scenes[imdb_id] = scenes # 2. from character_id -> a list of scene [timestamps], # extract audio file and save to train/test # NOTE: we need to write because we don't know how to convert scipy wavfile to # format known to pyAudioAnalysis partial_xs = [] rate, data = wavfile.read( os.path.join(commons.AUDIO_DIR, "{}.wav".format(imdb_id))) for c_id in scenes: for scene in scenes[c_id]: # clip audio of this scene start, end = scene[0], scene[-1] start_sec, end_sec = start / 1000, end / 1000 # convert to seconds start_frame, end_frame = int(start_sec * rate), int(end_sec * rate) x = data[start_frame:end_frame + 1] # get audio features of this scene from pyAudioAnalysis import audioFeatureExtraction if len(x.shape) > 1 and x.shape[1] == 2: # stereo to mono x = x.sum(axis=1) / 2 features, _ = audioFeatureExtraction.stFeatureExtraction( x, rate, 0.05 * rate, #frame size 0.025 * (end_frame + 1 - start_frame)) #frame step # catch edge case: nothing in audio if not numpy.isfinite(features).all(): features = numpy.nan_to_num(features) print("Catched error: <{}> has nothing in audio.".format( imdb_id)) features = features[:, :38] # drop extra frame data ... sign # add (x, y) label = labelof[imdb_id][c_id] if imdb_id in train_ids: train_x.append(features) train_y.append(label) train_who.append("{}-{}-{}~{}".format( imdb_id, c_id, start, end)) else: test_x.append(features) test_y.append(label) test_who.append("{}-{}-{}~{}".format( imdb_id, c_id, start, end)) print("<{}> Finished".format(imdb_id)) with open(commons.AUDIO_BASELINE_TRAIN_X, 'wb') as f: pickle.dump(train_x, f) with open(commons.AUDIO_BASELINE_TRAIN_Y, 'wb') as f: pickle.dump(train_y, f) with open(commons.AUDIO_BASELINE_TRAIN_WHO, 'w') as f: for who in train_who: f.write(who + '\n') with open(commons.AUDIO_BASELINE_TEST_X, 'wb') as f: pickle.dump(test_x, f) with open(commons.AUDIO_BASELINE_TEST_Y, 'wb') as f: pickle.dump(test_y, f) with open(commons.AUDIO_BASELINE_TEST_WHO, 'w') as f: for who in test_who: f.write(who + '\n') with open(commons.SCENES, 'wb') as f: pickle.dump(all_scenes, f)
# valid image! save image with desired format image_filename = temp_image_dir + \ format_image_filename(imdb_id, char_index, input_movie.get(cv2.CAP_PROP_POS_MSEC)) cv2.imwrite(image_filename, bgr_frame) print(" Done.") # move all images out of temp_image_dir to IMAGE_DIR, delete temp dir, mark success for file in os.listdir(temp_image_dir): filename, extension = os.path.splitext(file) if extension == ".jpg": shutil.move(os.path.join(temp_image_dir, file), os.path.join(commons.IMAGE_DIR, file)) shutil.rmtree(temp_image_dir) open(os.path.join(commons.IMAGE_DONE_DIR, imdb_id), "a").close() # cleanup input_movie.release() cv2.destroyAllWindows() for error_msg in errors: print(error_msg) if __name__ == '__main__': movies = commons.load_movies() #save_faces(movies) #face_clustering(movies) save_images(movies)