def eval_process_image_dir(cluster_dict, images_path, max_num_proc_imgs=None, metric=2, threshold=0.73): Models.altered_mtcnn.keep_all = False try: eval_process_faces(images_path, max_num_proc_imgs=max_num_proc_imgs) except IncompleteDatabaseOperation: return cluster_dict_copy = cluster_dict.copy() def eval_process_image_dir_worker(con): embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) eval_core_algorithm = EvalCoreAlgorithm(metric=metric, classification_threshold=threshold) # passing result cluster dict already overwrites it clustering_result = eval_core_algorithm.cluster_embeddings_no_split(embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) try: DBManager.connection_wrapper(eval_process_image_dir_worker) except IncompleteDatabaseOperation: overwrite_dict(cluster_dict, cluster_dict_copy)
def set_picture_label(embedding_id, new_label, cluster, cluster_dict): # TODO: Refactor! Extract parts to DBManager? # TODO: Don't accept label if it's the same as the old one! new_cluster_id = DBManager.get_max_cluster_id() + 1 embedding = cluster.get_embedding(embedding_id) cluster.remove_embedding_by_id(embedding_id) new_cluster = Cluster(new_cluster_id, [embedding], [embedding_id], new_label) cluster_dict.add_cluster(new_cluster) if cluster.get_size() == 0: cluster_dict.remove_cluster(cluster) modified_clusters = ClusterDict([new_cluster]) else: modified_clusters = ClusterDict([new_cluster, cluster]) def set_pic_label_worker(con): if cluster.get_size() == 0: # TODO: Remove cluster like that??? embeddings_row_dicts = DBManager.remove_cluster(cluster, con=con, close_connections=False) emb_id_to_face_dict = make_emb_id_to_face_dict_from_row_dicts(embeddings_row_dicts) emb_id_to_img_id_dict = make_emb_id_to_img_id_dict_from_row_dicts(embeddings_row_dicts) else: emb_id_to_face_dict = None emb_id_to_img_id_dict = None DBManager.store_clusters(modified_clusters, emb_id_to_face_dict=emb_id_to_face_dict, emb_id_to_img_id_dict=emb_id_to_img_id_dict, con=con, close_connections=False) DBManager.store_certain_labels(cluster=new_cluster, con=con, close_connections=False) try: DBManager.connection_wrapper(set_pic_label_worker) except IncompleteDatabaseOperation: cluster.add_embedding(embedding, embedding_id) if cluster.get_size() == 0: cluster_dict.add_cluster(cluster) cluster_dict.remove_cluster(new_cluster) raise
def reclassify(cluster_dict, embeddings_with_ids=None, con=None, close_connections=True, **kwargs): def reclassify_worker(con): # all operations in worker, so if any DB operation raises error, it is caught if embeddings_with_ids is not None: local_embeddings_with_ids = embeddings_with_ids else: local_embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) if not local_embeddings_with_ids: log_error('no embeddings found, nothing to edit') return new_cluster_dict = DBManager.get_certain_clusters() core_algorithm = CoreAlgorithm() clustering_result = core_algorithm.cluster_embeddings(embeddings=local_embeddings_with_ids, existing_clusters_dict=new_cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters(new_cluster_dict, removed_clusters_dict, no_new_embs=True, clear_clusters=True, con=con, close_connections=False) overwrite_dict(cluster_dict, new_cluster_dict) try: DBManager.connection_wrapper(reclassify_worker, con=con, close_connections=close_connections) except IncompleteDatabaseOperation: pass
def process_faces(images_path, central_con=None, local_con=None, close_connections=True): if local_con is None: path_to_local_db = DBManager.get_local_db_file_path(images_path) else: path_to_local_db = None def process_faces_worker(central_con, local_con): DBManager.create_local_tables(drop_existing_tables=False, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) extract_faces(images_path, central_con=central_con, local_con=local_con, close_connections=False) DBManager.connection_wrapper(process_faces_worker, path_to_local_db=path_to_local_db, central_con=central_con, local_con=local_con, with_central=True, with_local=True, close_connections=close_connections)
def clear_data(cluster_dict, **kwargs): # TODO: Include deletion cascade! data_kinds = { 'l': '[l]ocal tables', 'g': '[g]lobal tables', 'b': '[b]oth local and global tables', 'c': '[c]lusters', 'n': '[n]either' } warning = "----- WARNING: DESTRUCTIVE ACTION -----\n" should_clear_data_func = partial( get_user_decision, warning + "Would you like to clear the local/global data?" " Don't worry, you will have to re-confirm a 'yes'.") data_kind_to_clear_func = partial(get_user_decision, choices_strs=tuple(data_kinds.values()), valid_choices=tuple(data_kinds.keys())) should_clear_data = should_clear_data_func() while should_clear_data == 'y': data_kind_to_clear = data_kind_to_clear_func( prompt=(warning + "Which kind(s) of data would you like to clear?" " Don't worry, you will have to re-confirm your choice.")) if data_kind_to_clear == 'n': should_clear_data = should_clear_data_func() continue chosen_data_to_clear_str = data_kinds[data_kind_to_clear].replace( '[', '').replace(']', '') confirm_data_to_clear = data_kind_to_clear_func(prompt=( warning + f"Are you sure that you want to clear {chosen_data_to_clear_str}?" f" This action cannot be undone. To confirm your choice, simply re-enter it." )) if confirm_data_to_clear != data_kind_to_clear: should_clear_data = should_clear_data_func() continue def clear_data_worker(con): if data_kind_to_clear in ('l', 'b'): # TODO: How to use local connections here? Rollback on multiple? # clear_local_tables() drop_local_tables() if data_kind_to_clear in ('g', 'b'): # clear_central_tables(con=con, close_connections=False) drop_central_tables(con=con, close_connections=False) overwrite_dict(cluster_dict, dict()) if data_kind_to_clear == 'c': clear_clustering(con=con, close_connections=False) overwrite_dict(cluster_dict, dict()) try: DBManager.connection_wrapper(clear_data_worker) except IncompleteDatabaseOperation: continue should_clear_data = 'n'
def set_cluster_label(cluster, new_label): # TODO: Use certain_labels here too? (Probably not) # TODO: Outsource as function to DBManager? def set_cluster_label_worker(con): DBManager.store_clusters([cluster], con=con, close_connections=False) cluster.set_label(new_label) DBManager.connection_wrapper(set_cluster_label_worker)
def process_faces_worker(central_con, local_con): DBManager.create_local_tables(drop_existing_tables=False, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) extract_faces(images_path, central_con=central_con, local_con=local_con, close_connections=False)
def set_pic_label_worker(con): if cluster.get_size() == 0: # TODO: Remove cluster like that??? embeddings_row_dicts = DBManager.remove_cluster(cluster, con=con, close_connections=False) emb_id_to_face_dict = make_emb_id_to_face_dict_from_row_dicts(embeddings_row_dicts) emb_id_to_img_id_dict = make_emb_id_to_img_id_dict_from_row_dicts(embeddings_row_dicts) else: emb_id_to_face_dict = None emb_id_to_img_id_dict = None DBManager.store_clusters(modified_clusters, emb_id_to_face_dict=emb_id_to_face_dict, emb_id_to_img_id_dict=emb_id_to_img_id_dict, con=con, close_connections=False) DBManager.store_certain_labels(cluster=new_cluster, con=con, close_connections=False)
def eval_process_image_dir_worker(con): embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) eval_core_algorithm = EvalCoreAlgorithm(metric=metric, classification_threshold=threshold) # passing result cluster dict already overwrites it clustering_result = eval_core_algorithm.cluster_embeddings_no_split(embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False)
def clear_local_tables(local_db_dir_path=None, con=None, close_connections=True): if local_db_dir_path is None: # none given, allow the user to set path local_db_dir_path = user_choose_local_db_dir_path() if local_db_dir_path is None: # user didn't set path, exit return path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path) DBManager.clear_local_tables(path_to_local_db, con=con, close_connections=close_connections)
def eval_extract_faces(path, check_if_known=True, max_num_proc_imgs=None, central_con=None, local_con=None, close_connections=True): path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_names_and_date = set(DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id(path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) if max_num_proc_imgs is not None: return zip(range(start_img_id, max_num_proc_imgs + 1), img_loader) return enumerate(img_loader, start=start_img_id) def store_embedding_row_dicts(con): print('----- get_embedding_row_dicts -----') # TODO: Also auto-increment emb_id etc. embedding_id = initial_max_embedding_id + 1 for img_id, (img_path, img_name, img) in get_counted_img_loader(): print_progress(img_id, 'image') last_modified = datetime.datetime.fromtimestamp(round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) face = Models.altered_mtcnn.forward_return_results(img) if face is None: log_error(f"no faces found in image '{img_path}'") continue embedding_row_dict = {Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id} DBManager.store_embedding(embedding_row_dict, con=con, close_connections=False) embedding_id += 1 DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)
def run_program_with_user_stats(): write = False command_stats_path = r'C:\Users\Mischa\Desktop\Uni\20-21 WS\Bachelor\BA Papers\Datasets\faces 1999 caltech\commands_stats.txt' t0 = time.time() # Models.altered_mtcnn.keep_all = False init_program() cluster_dict = DBManager.load_cluster_dict() commands = [] cmd_name = get_user_command() while cmd_name != str(Commands.exit): t1 = time.time() cmd = Command.get_command(cmd_name) call_handler(cmd.handler, cluster_dict=cluster_dict) t2 = time.time() commands.append([cmd_name, t2 - t1]) cmd_name = get_user_command() tn = time.time() commands_str = '\n'.join(map( str, commands)) + '\n\n' + f'total runtime: {tn - t0}' if write: with open(command_stats_path, 'w') as file: file.write(commands_str)
def process_image_dir(cluster_dict, threshold=0.73, metric=2, **kwargs): """ Extract faces from user-chosen images and cluster them :param threshold: :param metric: :param cluster_dict: :param kwargs: :return: """ # TODO: Store entered paths(?) --> Makes it easier if user wants to revisit them, but probs rarely? images_path = user_choose_images_path() try: process_faces(images_path) except IncompleteDatabaseOperation: return cluster_dict_copy = cluster_dict.copy() def cluster_processed_faces(con): embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm(metric=metric, classification_threshold=threshold) # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) reset_cluster_ids(con=con, close_connections=False) new_cluster_dict = DBManager.load_cluster_dict(con=con, close_connections=False) overwrite_dict(cluster_dict, new_cluster_dict) try: DBManager.connection_wrapper(cluster_processed_faces) except IncompleteDatabaseOperation: overwrite_dict(cluster_dict, cluster_dict_copy)
def cluster_processed_faces(con): embeddings_with_ids = list( DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm() # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False)
def run_program(): init_program() cluster_dict = DBManager.load_cluster_dict() cmd_name = get_user_command() while cmd_name != str(Commands.exit): cmd = Command.get_command(cmd_name) call_handler(cmd.handler, cluster_dict=cluster_dict) cmd_name = get_user_command()
def clear_data_measure(cluster_dict): local_db_dir_path = DATASET_PATH path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path) def clear_data_worker(central_con, local_con): DBManager.clear_local_tables(path_to_local_db, con=local_con, close_connections=False) clear_central_tables(con=central_con, close_connections=False) overwrite_dict(cluster_dict, dict()) try: DBManager.connection_wrapper(clear_data_worker, path_to_local_db=path_to_local_db, with_central=True, with_local=True) except IncompleteDatabaseOperation as e: print('clear_data_measure error') log_error(e)
def store_embedding_row_dicts(con): print('----- get_embedding_row_dicts -----') # TODO: Also auto-increment emb_id etc. embedding_id = initial_max_embedding_id + 1 for img_id, (img_path, img_name, img) in get_counted_img_loader(): print_progress(img_id, 'image') last_modified = datetime.datetime.fromtimestamp(round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) face = Models.altered_mtcnn.forward_return_results(img) if face is None: log_error(f"no faces found in image '{img_path}'") continue embedding_row_dict = {Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id} DBManager.store_embedding(embedding_row_dict, con=con, close_connections=False) embedding_id += 1
def process_images_dir_measure(cluster_dict, n): images_path = DATASET_PATH try: print('------ PROCESSING FACES') process_faces_measure(images_path, n) print('------ DONE PROCESSING') except IncompleteDatabaseOperation as e: print('process_images_dir_measure error') log_error(e) return cluster_dict_copy = cluster_dict.copy() def cluster_processed_faces(con): embeddings_with_ids = list( DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm() # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) try: DBManager.connection_wrapper(cluster_processed_faces) except IncompleteDatabaseOperation: overwrite_dict(cluster_dict, cluster_dict_copy)
def store_embedding_row_dicts(con): # TODO: Also auto-increment emb_id etc. max_embedding_id = initial_max_embedding_id for img_id, (img_abs_path, img_rel_path, img) in get_counted_img_loader(): # TODO: Implement automatic deletion cascade! (Using among other things on_conflict clause and FKs) # ---> Done? # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_abs_path).st_mtime)) if check_if_known and (img_rel_path, last_modified) in imgs_rel_paths_and_dates: continue DBManager.store_image(img_id=img_id, rel_file_path=img_rel_path, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_abs_path}'") continue # TODO: Better way to create these row_dicts? embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate(faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces)
def store_embedding_row_dicts(con): max_embedding_id = initial_max_embedding_id for img_id, (img_path, img_name, img) in get_counted_img_loader(): # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_path}'") continue embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate( faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces)
def user_choose_local_db_dir_path(): # TODO: Refactor, use user_choose function! local_db_dir_path = input( 'Please enter a path containing a local table you would like to clear.\n' ) # local_db_dir_path = (r'C:\Users\Mischa\Desktop\Uni\20-21 WS' # r'\Bachelor\Programming\BA\Logic\my_test\facenet_Test\group_imgs') while True: if not local_db_dir_path: local_db_dir_path = None break elif not os.path.exists(local_db_dir_path): log_error(f"unable to find path '{local_db_dir_path}'") elif not DBManager.is_local_db_in_dir(local_db_dir_path): log_error( f"unable to find local database file '{...}' in path '{local_db_dir_path}'" ) else: break print("\nPlease try again.") local_db_dir_path = input( 'Please enter a path with images of people you would like to add.\n' ) return local_db_dir_path
def cluster_embeddings_no_split(self, embeddings, embeddings_ids=None, existing_clusters_dict=None, should_reset_cluster_ids=False, final_clusters_only=True): """ Build clusters from face embeddings stored in the given path using the specified classification threshold. (Currently handled as: All embeddings closer than the distance given by the classification threshold are placed in the same cluster. If cluster_save_path is set, store the resulting clusters as directories in the given path. :param should_reset_cluster_ids: :param embeddings: Iterable containing the embeddings. It embeddings_ids is None, must consist of (id, embedding)-pairs :param embeddings_ids: Ordered iterable with the embedding ids. Must be at least as long as embeddings. :param existing_clusters_dict: :param final_clusters_only: If true, only the final iterable of clusters is returned. Otherwise, return that final iterable, as well as a list of modified/newly created and deleted clusters :return: """ # TODO: Allow embeddings_ids to be none? Get next id via DB query? # TODO: Allow embeddings_ids to be shorter than embeddings and 'fill up' remaining ids? # embeddings = list(embeddings) if not embeddings: if final_clusters_only: return ClusterDict() return ClusterDict(), ClusterDict(), ClusterDict() if embeddings_ids is None: embeddings_with_ids = embeddings else: # if len(embeddings) > len(embeddings_ids): # raise ValueError(f'Too few ids for embeddings ({len(embeddings_ids)} passed, but {len(embeddings)}' # f' needed)') embeddings_with_ids = zip(embeddings_ids, embeddings) if existing_clusters_dict is None: existing_clusters_dict = ClusterDict() else: # # Don't iterate over embeddings in existing clusters # embeddings_with_ids = dict(embeddings_with_ids) # existing_embeddings = existing_clusters_dict.get_embeddings() # remove_multiple(embeddings_with_ids, existing_embeddings) # embeddings_with_ids = embeddings_with_ids.items() # Don't iterate over embeddings in existing clusters def exists_in_any_cluster(emb_id, _): return existing_clusters_dict.any_cluster_with_emb(emb_id) embeddings_with_ids = starfilterfalse(exists_in_any_cluster, embeddings_with_ids) cluster_dict = existing_clusters_dict if should_reset_cluster_ids: cluster_dict.reset_ids() next_cluster_id = cluster_dict.get_max_id() + 1 else: max_existing_id = cluster_dict.get_max_id() max_db_id = DBManager.get_max_cluster_id() next_cluster_id = max(max_existing_id, max_db_id) + 1 embeddings_with_ids = list(embeddings_with_ids) random.seed(0) random.shuffle(embeddings_with_ids) modified_clusters_ids, removed_clusters_ids = set(), set() for counter, (embedding_id, new_embedding) in enumerate(embeddings_with_ids, start=1): print_progress(counter, "embedding_id iteration") closest_clusters = self.get_closest_clusters( cluster_dict, new_embedding) # find cluster containing the closest embedding to new_embedding shortest_emb_dist, closest_cluster = self.find_closest_cluster_to_embedding( closest_clusters, new_embedding) if shortest_emb_dist <= self.classification_threshold: closest_cluster.add_embedding(new_embedding, embedding_id) modified_clusters_ids.add(closest_cluster.cluster_id) else: new_cluster = Cluster(next_cluster_id, [new_embedding], [embedding_id]) next_cluster_id += 1 cluster_dict.add_cluster(new_cluster) modified_clusters_ids.add(new_cluster.cluster_id) if final_clusters_only: return cluster_dict modified_clusters = cluster_dict.get_clusters_by_ids( modified_clusters_ids) removed_clusters = cluster_dict.get_clusters_by_ids( removed_clusters_ids) return cluster_dict, ClusterDict(modified_clusters), ClusterDict( removed_clusters)
def drop_central_tables(con=None, close_connections=True): DBManager.create_central_tables(drop_existing_tables=True, con=con, close_connections=close_connections)
def clear_clustering(con=None, close_connections=True): DBManager.clear_clusters(con=con, close_connections=close_connections)
def clear_central_tables(con=None, close_connections=True): DBManager.clear_central_tables(con=con, close_connections=close_connections)
def extract_faces(path, check_if_known=True, central_con=None, local_con=None, close_connections=True): # TODO: Refactor (extract functions)? + rename # TODO: Generate Thumbnails differently? (E.g. via Image.thumbnail or sth. like that) # TODO: Store + update max_img_id and max_embedding_id somewhere rather than (always) get them via DB query? path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_rel_paths_and_dates = set( DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id(path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) return enumerate(img_loader, start=start_img_id) def store_embedding_row_dicts(con): # TODO: Also auto-increment emb_id etc. max_embedding_id = initial_max_embedding_id for img_id, (img_abs_path, img_rel_path, img) in get_counted_img_loader(): # TODO: Implement automatic deletion cascade! (Using among other things on_conflict clause and FKs) # ---> Done? # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_abs_path).st_mtime)) if check_if_known and (img_rel_path, last_modified) in imgs_rel_paths_and_dates: continue DBManager.store_image(img_id=img_id, rel_file_path=img_rel_path, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_abs_path}'") continue # TODO: Better way to create these row_dicts? embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate(faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces) DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)
def set_cluster_label_worker(con): DBManager.store_clusters([cluster], con=con, close_connections=False) cluster.set_label(new_label)
def user_choose_embedding_id(cluster): # TODO: Refactor faces_dict = dict(DBManager.get_thumbnails_from_cluster(cluster.cluster_id, with_embeddings_ids=True)) chosen_embedding_id = user_choose_embedding_id_worker(faces_dict, cluster.label) return chosen_embedding_id
def split_cluster(cls, cluster_to_split, next_cluster_id=None, ret_new_next_id=False): """ Split cluster into two new clusters as follows: 1. Find two embeddings e1, e2 in the cluster with the greatest distance between them. 2. Create a new cluster C1, C2 for each of the two. 3. For each embedding e of the remaining embeddings: Add e to the cluster (C1 or C2) whose center is closer to it. The given cluster must contain at least 2 embeddings. :param ret_new_next_id: :param next_cluster_id: :param cluster_to_split: Cluster to be split :return: Two new clusters containing embeddings of old one """ # TODO: Does this fail due to bad analogy to low-dim. space?! embeddings_with_ids = cluster_to_split.get_embeddings( with_embeddings_ids=True, as_dict=True) (emb1_id, cluster_start_emb1), ( emb2_id, cluster_start_emb2 ) = cls.find_most_distant_embeddings(embeddings_with_ids) remove_multiple(embeddings_with_ids, [emb1_id, emb2_id]) label = cluster_to_split.label if next_cluster_id is None: next_cluster_id = DBManager.get_max_cluster_id() + 1 new_cluster1_id, new_cluster2_id = next_cluster_id, next_cluster_id + 1 new_cluster1, new_cluster2 = (Cluster(new_cluster1_id, [cluster_start_emb1], [emb1_id], label=label), Cluster(new_cluster2_id, [cluster_start_emb2], [emb2_id], label=label)) @spread_args_decorator @ignore_first_n_args_decorator(n=1) def is_closer_to_cluster1(emb): dist_to_cluster1 = new_cluster1.compute_dist_to_center(emb) dist_to_cluster2 = new_cluster2.compute_dist_to_center(emb) return dist_to_cluster1 < dist_to_cluster2 def try_split(cluster_embs_with_ids, new_cluster): split_result = split_items(cluster_embs_with_ids) try: cluster_embs_ids, cluster_embs = split_result except ValueError: # not enough values to unpack pass else: new_cluster.add_embeddings(cluster_embs, cluster_embs_ids) cluster2_embs_with_ids, cluster1_embs_with_ids = partition( is_closer_to_cluster1, embeddings_with_ids.items()) try_split(cluster1_embs_with_ids, new_cluster1) try_split(cluster2_embs_with_ids, new_cluster2) new_clusters = (new_cluster1, new_cluster2) if ret_new_next_id: return new_clusters, new_cluster2_id + 1 return new_clusters
def view_person(cluster_dict, **kwargs): """ 1. Fetch which labels exist (incl. Unknown Person) 2. Prompt user, which person/label they would like to view 3. Fetch all image names/paths for that person 4. Prompt user, which image they would like to view 5. Show image 6. Go to 2. :param cluster_dict: :param kwargs: :return: """ # TODO: Make user choose file *name*, not path (and just inform them of the path they're on beforehand) # TODO: When only one choice (to pick path or image), make choice for user and inform them about it! # TODO: Refactor? (Extract functions) # TODO: Give option of renaming a file/directory? # --> Best practices? How to do so *safely*?!) # TODO: How to include thumbnails and face ids in all of this? # --> Give option to switch to/from edit_handler? get_label_decision = partial(get_user_decision, 'Would you like to select another person?') get_image_decision = partial( get_user_decision, 'Would you like to view another image of the person from this' ' directory?') get_directory_decision = partial( get_user_decision, 'Would you like to select another directory containing images' ' of the person?') cluster_labels = cluster_dict.get_cluster_labels( unique=True) # TODO: faster to use DB?? # TODO: Extract some loop constructs as functions? # TODO: Are these interactions alright? # TODO: Catch errors! continue_label = '' while continue_label != 'n': chosen_label = user_choose_label(cluster_labels) if chosen_label is None: continue_label = get_label_decision() continue try: person_dir_paths_to_img_ids = DBManager.get_dir_paths_to_img_ids( chosen_label) except IncompleteDatabaseOperation: continue_label = get_label_decision() continue person_dir_paths = person_dir_paths_to_img_ids.keys() continue_directory = '' while continue_directory != 'n': chosen_directory_path = user_choose_directory_path( person_dir_paths) if chosen_directory_path is None: continue_directory = get_directory_decision() continue image_ids = person_dir_paths_to_img_ids[chosen_directory_path] try: file_name_to_path_dict = DBManager.get_image_name_to_path_dict( chosen_directory_path, image_ids) except IncompleteDatabaseOperation: continue_directory = get_directory_decision() continue continue_image = '' while continue_image != 'n': print( f"The currently chosen path is: '{chosen_directory_path}'." ) chosen_image_path = user_choose_image_path( file_name_to_path_dict) if chosen_image_path is None: continue_image = get_image_decision() continue chosen_image = Image.open(chosen_image_path) chosen_image.show() continue_image = get_image_decision() continue_directory = get_directory_decision() continue_label = get_label_decision()