def get_xs_and_ys(thres_and_met_to_f_measure, thres=False, metric=False): if thres: ind = 1 elif metric: ind = 0 else: log_error('thres or metric must be true') return param_f_measure = [ (thres_met.split('__')[ind], f_measure) for thres_met, f_measure in thres_and_met_to_f_measure.items() ] param_f_measure = [(float(param.lstrip('LT')), float(f_measure)) for param, f_measure in param_f_measure] # TODO: Incorporate variance, too? # thres_met_f_measures_groups_dict = group_pairs(param_f_measure, ret_dict=True) # # map_dict_vals(thres_met_f_measures_groups_dict, func=np.mean) # # return thres_met_f_measures_groups_dict # # thres_met_f_measures_groups = sorted(thres_met_f_measures_groups_dict.items()) # grouped_thres_met_f_measures = defaultdict(list) # for met, f_measures in groupby(met_f_measure, key=lambda mf: mf[0]): # grouped_thres_met_f_measures[met].append(np.mean(f_measures)) # grouped_thres_met_f_measures = sorted(grouped_thres_met_f_measures.items()) xs = get_every_nth_item(param_f_measure, n=0) ys = get_every_nth_item(param_f_measure, n=1) return list(xs), list(ys)
def store_embedding_row_dicts(con): print('----- get_embedding_row_dicts -----') # TODO: Also auto-increment emb_id etc. embedding_id = initial_max_embedding_id + 1 for img_id, (img_path, img_name, img) in get_counted_img_loader(): print_progress(img_id, 'image') last_modified = datetime.datetime.fromtimestamp(round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) face = Models.altered_mtcnn.forward_return_results(img) if face is None: log_error(f"no faces found in image '{img_path}'") continue embedding_row_dict = {Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id} DBManager.store_embedding(embedding_row_dict, con=con, close_connections=False) embedding_id += 1
def get_command(cls, cmd_name): try: cmd = cls.commands[cmd_name] except KeyError: log_error(f"could not remove unknown command '{cmd_name}'") return None return cmd
def store_embedding_row_dicts(con): # TODO: Also auto-increment emb_id etc. max_embedding_id = initial_max_embedding_id for img_id, (img_abs_path, img_rel_path, img) in get_counted_img_loader(): # TODO: Implement automatic deletion cascade! (Using among other things on_conflict clause and FKs) # ---> Done? # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_abs_path).st_mtime)) if check_if_known and (img_rel_path, last_modified) in imgs_rel_paths_and_dates: continue DBManager.store_image(img_id=img_id, rel_file_path=img_rel_path, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_abs_path}'") continue # TODO: Better way to create these row_dicts? embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate(faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces)
def edit_labels(cluster_dict, **kwargs): # TODO: Refactor # TODO: Include option to delete people (and remember that in case same dir is read again? --> Probs optional) if not cluster_dict: log_error('no clusters found, no labels to edit') return get_cluster_decision = partial(get_user_decision, 'Would you like to choose another cluster?') get_face_decision = partial(get_user_decision, 'Would you like to relabel another face in this cluster?') # TODO: Nicer parameter passing? get_label_scope_decision = partial(get_user_decision, 'Should the whole cluster receive that label or just the picture?', choices_strs=('[c]luster', '[p]icture'), valid_choices=('c', 'p')) continue_choosing_cluster = '' while continue_choosing_cluster != 'n': cluster = user_choose_cluster(cluster_dict) if cluster is None: continue_choosing_cluster = get_cluster_decision() continue continue_choosing_face = '' while continue_choosing_face != 'n': try: embedding_id = user_choose_embedding_id(cluster) except IncompleteDatabaseOperation: continue if embedding_id is None: # User *doesn't* want to relabel another face in this cluster! break new_label = user_choose_face_label(cluster.label) if new_label is None: continue_choosing_face = get_face_decision() continue label_scope = get_label_scope_decision() try: if label_scope == 'c': set_cluster_label(cluster, new_label) else: set_picture_label(embedding_id, new_label, cluster, cluster_dict) except IncompleteDatabaseOperation: pass # TODO: error notification here!!! # Auto-stop choosing faces if cluster is empty or consists of only one face continue_choosing_face = get_face_decision() if cluster.get_size() > 2 else 'n' continue_choosing_cluster = get_cluster_decision()
def user_choose_images_path(): images_path = input( 'Please enter a path with images of people you would like to add.\n') while not os.path.exists(images_path): log_error(f"unable to find path '{images_path}'") print("\nPlease try again.") images_path = input( 'Please enter a path with images of people you would like to add.\n' ) # TODO: Implement check_if_known question(?) # check_if_known_decision = get_user_decision( # "Should already processed images be processed again? This can be useful if for example some files have changed" # " in a way the program doesn't recognize, or some faces from these images have been deleted and you would like" # " to make them available again." # ) # check_if_known = (check_if_known_decision == "n") return images_path
def remove_embedding_by_id(self, embedding_id): try: embedding = self.embeddings_dict.pop(embedding_id) except KeyError: log_error(f'embedding with id {embedding_id} not found.') return old_num_embeddings = self.num_embeddings self.num_embeddings -= 1 # TODO: Check the math!!! # (old_center is a uniformly weighted sum of the old embeddings) try: self.center_point = (old_num_embeddings * self.center_point - embedding) / self.num_embeddings except ZeroDivisionError: # num_embeddings is 0 self.center_point = None
def store_embedding_row_dicts(con): max_embedding_id = initial_max_embedding_id for img_id, (img_path, img_name, img) in get_counted_img_loader(): # Check if image already stored --> don't process again # known = (name, last modified) as a pair known for this director last_modified = datetime.datetime.fromtimestamp( round(os.stat(img_path).st_mtime)) if check_if_known and (img_name, last_modified) in imgs_names_and_date: continue DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified, path_to_local_db=path_to_local_db, con=local_con, close_connections=False) DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False) faces = Models.altered_mtcnn.forward_return_results(img) if not faces: log_error(f"no faces found in image '{img_path}'") continue embeddings_row_dicts = [{ Columns.cluster_id.col_name: 'NULL', Columns.embedding.col_name: face_to_embedding(face), Columns.thumbnail.col_name: face, Columns.image_id.col_name: img_id, Columns.embedding_id.col_name: embedding_id } for embedding_id, face in enumerate( faces, start=max_embedding_id + 1)] DBManager.store_embeddings(embeddings_row_dicts, con=con, close_connections=False) max_embedding_id += len(faces)
def clear_data_measure(cluster_dict): local_db_dir_path = DATASET_PATH path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path) def clear_data_worker(central_con, local_con): DBManager.clear_local_tables(path_to_local_db, con=local_con, close_connections=False) clear_central_tables(con=central_con, close_connections=False) overwrite_dict(cluster_dict, dict()) try: DBManager.connection_wrapper(clear_data_worker, path_to_local_db=path_to_local_db, with_central=True, with_local=True) except IncompleteDatabaseOperation as e: print('clear_data_measure error') log_error(e)
def user_choose_cluster(cluster_dict): # TODO: Refactor cluster_ids = cluster_dict.get_cluster_ids() print_cluster_ids(cluster_dict) get_user_cluster_id = partial(get_user_input_of_type, class_=int, obj_name='cluster id', allow_empty=True) chosen_cluster_id = get_user_cluster_id() while chosen_cluster_id is not None and chosen_cluster_id not in cluster_ids: log_error( f'cluster "{chosen_cluster_id}" not found; Please try again.') print_cluster_ids(cluster_dict) chosen_cluster_id = get_user_cluster_id() if chosen_cluster_id is None: return chosen_cluster = cluster_dict.get_cluster_by_id(chosen_cluster_id) return chosen_cluster
def reclassify_worker(con): # all operations in worker, so if any DB operation raises error, it is caught if embeddings_with_ids is not None: local_embeddings_with_ids = embeddings_with_ids else: local_embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True)) if not local_embeddings_with_ids: log_error('no embeddings found, nothing to edit') return new_cluster_dict = DBManager.get_certain_clusters() core_algorithm = CoreAlgorithm() clustering_result = core_algorithm.cluster_embeddings(embeddings=local_embeddings_with_ids, existing_clusters_dict=new_cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters(new_cluster_dict, removed_clusters_dict, no_new_embs=True, clear_clusters=True, con=con, close_connections=False) overwrite_dict(cluster_dict, new_cluster_dict)
def user_choose_local_db_dir_path(): # TODO: Refactor, use user_choose function! local_db_dir_path = input( 'Please enter a path containing a local table you would like to clear.\n' ) # local_db_dir_path = (r'C:\Users\Mischa\Desktop\Uni\20-21 WS' # r'\Bachelor\Programming\BA\Logic\my_test\facenet_Test\group_imgs') while True: if not local_db_dir_path: local_db_dir_path = None break elif not os.path.exists(local_db_dir_path): log_error(f"unable to find path '{local_db_dir_path}'") elif not DBManager.is_local_db_in_dir(local_db_dir_path): log_error( f"unable to find local database file '{...}' in path '{local_db_dir_path}'" ) else: break print("\nPlease try again.") local_db_dir_path = input( 'Please enter a path with images of people you would like to add.\n' ) return local_db_dir_path
def process_images_dir_measure(cluster_dict, n): images_path = DATASET_PATH try: print('------ PROCESSING FACES') process_faces_measure(images_path, n) print('------ DONE PROCESSING') except IncompleteDatabaseOperation as e: print('process_images_dir_measure error') log_error(e) return cluster_dict_copy = cluster_dict.copy() def cluster_processed_faces(con): embeddings_with_ids = list( DBManager.get_all_embeddings(with_ids=True)) # TODO: Call reclassify handler here? # TODO: Clear existing clusters? Issues with ids etc.???? core_algorithm = CoreAlgorithm() # passing result cluster dict already overwrites it clustering_result = core_algorithm.cluster_embeddings( embeddings_with_ids, existing_clusters_dict=cluster_dict, should_reset_cluster_ids=True, final_clusters_only=False) _, modified_clusters_dict, removed_clusters_dict = clustering_result DBManager.overwrite_clusters_simplified(modified_clusters_dict, removed_clusters_dict, con=con, close_connections=False) try: DBManager.connection_wrapper(cluster_processed_faces) except IncompleteDatabaseOperation: overwrite_dict(cluster_dict, cluster_dict_copy)
def get_cluster_by_id(self, cluster_id): try: return self[cluster_id] except KeyError: log_error(f"no cluster with id '{cluster_id}' found") return None
def remove_command(cls, cmd_name): # TODO: needed? try: cls.commands.pop(cmd_name) except KeyError: log_error(f"could not remove unknown command '{cmd_name}'")
def call_handler(handler, *args, **kwargs): try: return handler(*args, **kwargs) except Exception as e: log_error(e)