def reclassify(cluster_dict, embeddings_with_ids=None, con=None, close_connections=True, **kwargs): def reclassify_worker(con): # all operations in worker, so if any DB operation raises error, it is caught if embeddings_with_ids is not None: local_embeddings_with_ids = embeddings_with_ids else: local_embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) if not local_embeddings_with_ids: log_error('no embeddings found, nothing to edit') return new_cluster_dict = DBManager.get_certain_clusters() core_algorithm = CoreAlgorithm() clustering_result = core_algorithm.cluster_embeddings(embeddings=local_embeddings_with_ids, existing_clusters_dict=new_cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters(new_cluster_dict, removed_clusters_dict, no_new_embs=True, clear_clusters=True, con=con, close_connections=False) overwrite_dict(cluster_dict, new_cluster_dict) try: DBManager.connection_wrapper(reclassify_worker, con=con, close_connections=close_connections) except IncompleteDatabaseOperation: pass
def clear_data_measure(cluster_dict): local_db_dir_path = DATASET_PATH path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path) def clear_data_worker(central_con, local_con): DBManager.clear_local_tables(path_to_local_db, con=local_con, close_connections=False) clear_central_tables(con=central_con, close_connections=False) overwrite_dict(cluster_dict, dict()) try: DBManager.connection_wrapper(clear_data_worker, path_to_local_db=path_to_local_db, with_central=True, with_local=True) except IncompleteDatabaseOperation as e: print('clear_data_measure error') log_error(e)
def process_images_dir_measure(cluster_dict, n): images_path = DATASET_PATH try: print('------ PROCESSING FACES') process_faces_measure(images_path, n) print('------ DONE PROCESSING') except IncompleteDatabaseOperation as e: print('process_images_dir_measure error') log_error(e) return cluster_dict_copy = cluster_dict.copy() def cluster_processed_faces(con): embeddings_with_ids = list( DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm() # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) try: DBManager.connection_wrapper(cluster_processed_faces) except IncompleteDatabaseOperation: overwrite_dict(cluster_dict, cluster_dict_copy)
def extract_faces_measure(path, n, check_if_known=True, central_con=None, local_con=None, close_connections=True): path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_names_and_date = set( DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id( path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) nums = range(start_img_id, start_img_id + n) return zip(nums, img_loader) def store_embedding_row_dicts(con): max_embedding_id = initial_max_embedding_id for img_id, (img_path, img_name, img) in get_counted_img_loader(): # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_path}'") continue embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate( faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces) DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)