def run_program_with_user_stats(): write = False command_stats_path = r'C:\Users\Mischa\Desktop\Uni\20-21 WS\Bachelor\BA Papers\Datasets\faces 1999 caltech\commands_stats.txt' t0 = time.time() # Models.altered_mtcnn.keep_all = False init_program() cluster_dict = DBManager.load_cluster_dict() commands = [] cmd_name = get_user_command() while cmd_name != str(Commands.exit): t1 = time.time() cmd = Command.get_command(cmd_name) call_handler(cmd.handler, cluster_dict=cluster_dict) t2 = time.time() commands.append([cmd_name, t2 - t1]) cmd_name = get_user_command() tn = time.time() commands_str = '\n'.join(map( str, commands)) + '\n\n' + f'total runtime: {tn - t0}' if write: with open(command_stats_path, 'w') as file: file.write(commands_str)
def run_program(): init_program() cluster_dict = DBManager.load_cluster_dict() cmd_name = get_user_command() while cmd_name != str(Commands.exit): cmd = Command.get_command(cmd_name) call_handler(cmd.handler, cluster_dict=cluster_dict) cmd_name = get_user_command()
def cluster_processed_faces(con): embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm(metric=metric, classification_threshold=threshold) # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) reset_cluster_ids(con=con, close_connections=False) new_cluster_dict = DBManager.load_cluster_dict(con=con, close_connections=False) overwrite_dict(cluster_dict, new_cluster_dict)
def measure_commands(): # TODO: process faces should be limited by n! write = False start, stop, step = 90, 450, 90 COMMAND_STATS_PATH = r'C:\Users\Mischa\Desktop\Uni\20-21 WS\Bachelor\BA Papers\Datasets\faces 1999 caltech\commands_stats.txt' DATASET_PATH = r'C:\Users\Mischa\Desktop\Uni\20-21 WS\Bachelor\BA Papers\Datasets\faces 1999 caltech' def process_images_dir_measure(cluster_dict, n): images_path = DATASET_PATH try: print('------ PROCESSING FACES') process_faces_measure(images_path, n) print('------ DONE PROCESSING') except IncompleteDatabaseOperation as e: print('process_images_dir_measure error') log_error(e) return cluster_dict_copy = cluster_dict.copy() def cluster_processed_faces(con): embeddings_with_ids = list( DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm() # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) try: DBManager.connection_wrapper(cluster_processed_faces) except IncompleteDatabaseOperation: overwrite_dict(cluster_dict, cluster_dict_copy) def process_faces_measure(images_path, n, central_con=None, local_con=None, close_connections=True): if local_con is None: path_to_local_db = DBManager.get_local_db_file_path(images_path) else: path_to_local_db = None def process_faces_worker(central_con, local_con): DBManager.create_local_tables(drop_existing_tables=False, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) extract_faces_measure(images_path, n, central_con=central_con, local_con=local_con, close_connections=False) DBManager.connection_wrapper(process_faces_worker, path_to_local_db=path_to_local_db, central_con=central_con, local_con=local_con, with_central=True, with_local=True, close_connections=close_connections) def extract_faces_measure(path, n, check_if_known=True, central_con=None, local_con=None, close_connections=True): path_to_local_db = DBManager.get_local_db_file_path(path) path_id = DBManager.get_path_id(path) if path_id is None: # path not yet known path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False) DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) imgs_names_and_date = set( DBManager.get_images_attributes(path_to_local_db=path_to_local_db)) # Note: 'MAX' returns None / (None, ) as a default value max_img_id = DBManager.get_max_image_id( path_to_local_db=path_to_local_db) start_img_id = max_img_id + 1 initial_max_embedding_id = DBManager.get_max_embedding_id() def get_counted_img_loader(): img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True) nums = range(start_img_id, start_img_id + n) return zip(nums, img_loader) def store_embedding_row_dicts(con): max_embedding_id = initial_max_embedding_id for img_id, (img_path, img_name, img) in get_counted_img_loader(): # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_path}'") continue embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate( faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces) DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections) def clear_data_measure(cluster_dict): local_db_dir_path = DATASET_PATH path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path) def clear_data_worker(central_con, local_con): DBManager.clear_local_tables(path_to_local_db, con=local_con, close_connections=False) clear_central_tables(con=central_con, close_connections=False) overwrite_dict(cluster_dict, dict()) try: DBManager.connection_wrapper(clear_data_worker, path_to_local_db=path_to_local_db, with_central=True, with_local=True) except IncompleteDatabaseOperation as e: print('clear_data_measure error') log_error(e) cmds_list = [ ('process_images_dir', process_images_dir_measure), ('reclassify', reclassify), ('clear_data', clear_data_measure), ] clear_data_measure(dict()) commands = [] for n in range(start, stop + 1, step): print(f'ITERATION: {n}') init_program() cluster_dict = DBManager.load_cluster_dict() for cmd_name, cmd in cmds_list: print(f'--- COMMAND: {cmd_name}') t1 = time.time() args = [cluster_dict] if cmd_name != 'process_images_dir' else [ cluster_dict, n ] cmd(*args) t2 = time.time() commands.append([cmd_name, n, t2 - t1]) commands_str = '\n'.join(map(str, commands)) if write: with open(COMMAND_STATS_PATH, 'w') as file: file.write(commands_str)