Ejemplo n.º 1
0
def reclassify(cluster_dict, embeddings_with_ids=None, con=None, close_connections=True, **kwargs):
    def reclassify_worker(con):
        # all operations in worker, so if any DB operation raises error, it is caught
        if embeddings_with_ids is not None:
            local_embeddings_with_ids = embeddings_with_ids
        else:
            local_embeddings_with_ids = list(DBManager.get_all_embeddings(with_ids=True))

        if not local_embeddings_with_ids:
            log_error('no embeddings found, nothing to edit')
            return

        new_cluster_dict = DBManager.get_certain_clusters()
        core_algorithm = CoreAlgorithm()
        clustering_result = core_algorithm.cluster_embeddings(embeddings=local_embeddings_with_ids,
                                                              existing_clusters_dict=new_cluster_dict,
                                                              should_reset_cluster_ids=True,
                                                              final_clusters_only=False)
        _, modified_clusters_dict, removed_clusters_dict = clustering_result
        DBManager.overwrite_clusters(new_cluster_dict, removed_clusters_dict, no_new_embs=True,
                                     clear_clusters=True, con=con, close_connections=False)
        overwrite_dict(cluster_dict, new_cluster_dict)

    try:
        DBManager.connection_wrapper(reclassify_worker, con=con, close_connections=close_connections)
    except IncompleteDatabaseOperation:
        pass
Ejemplo n.º 2
0
    def clear_data_measure(cluster_dict):
        local_db_dir_path = DATASET_PATH
        path_to_local_db = DBManager.get_local_db_file_path(local_db_dir_path)

        def clear_data_worker(central_con, local_con):
            DBManager.clear_local_tables(path_to_local_db,
                                         con=local_con,
                                         close_connections=False)
            clear_central_tables(con=central_con, close_connections=False)
            overwrite_dict(cluster_dict, dict())

        try:
            DBManager.connection_wrapper(clear_data_worker,
                                         path_to_local_db=path_to_local_db,
                                         with_central=True,
                                         with_local=True)
        except IncompleteDatabaseOperation as e:
            print('clear_data_measure error')
            log_error(e)
Ejemplo n.º 3
0
    def process_images_dir_measure(cluster_dict, n):
        images_path = DATASET_PATH
        try:
            print('------ PROCESSING FACES')
            process_faces_measure(images_path, n)
            print('------ DONE PROCESSING')
        except IncompleteDatabaseOperation as e:
            print('process_images_dir_measure error')
            log_error(e)
            return

        cluster_dict_copy = cluster_dict.copy()

        def cluster_processed_faces(con):
            embeddings_with_ids = list(
                DBManager.get_all_embeddings(with_ids=True))

            # TODO: Call reclassify handler here?
            # TODO: Clear existing clusters? Issues with ids etc.????
            core_algorithm = CoreAlgorithm()
            # passing result cluster dict already overwrites it
            clustering_result = core_algorithm.cluster_embeddings(
                embeddings_with_ids,
                existing_clusters_dict=cluster_dict,
                should_reset_cluster_ids=True,
                final_clusters_only=False)
            _, modified_clusters_dict, removed_clusters_dict = clustering_result
            DBManager.overwrite_clusters_simplified(modified_clusters_dict,
                                                    removed_clusters_dict,
                                                    con=con,
                                                    close_connections=False)

        try:
            DBManager.connection_wrapper(cluster_processed_faces)
        except IncompleteDatabaseOperation:
            overwrite_dict(cluster_dict, cluster_dict_copy)
Ejemplo n.º 4
0
    def extract_faces_measure(path,
                              n,
                              check_if_known=True,
                              central_con=None,
                              local_con=None,
                              close_connections=True):
        path_to_local_db = DBManager.get_local_db_file_path(path)
        path_id = DBManager.get_path_id(path)
        if path_id is None:
            # path not yet known
            path_id = DBManager.store_directory_path(path,
                                                     con=central_con,
                                                     close_connections=False)
            DBManager.store_path_id(path_id,
                                    path_to_local_db=path_to_local_db,
                                    con=local_con,
                                    close_connections=False)
        imgs_names_and_date = set(
            DBManager.get_images_attributes(path_to_local_db=path_to_local_db))

        # Note: 'MAX' returns None / (None, ) as a default value
        max_img_id = DBManager.get_max_image_id(
            path_to_local_db=path_to_local_db)
        start_img_id = max_img_id + 1
        initial_max_embedding_id = DBManager.get_max_embedding_id()

        def get_counted_img_loader():
            img_loader = load_imgs_from_path(path,
                                             recursive=True,
                                             output_file_names=True,
                                             output_file_paths=True)
            nums = range(start_img_id, start_img_id + n)
            return zip(nums, img_loader)

        def store_embedding_row_dicts(con):
            max_embedding_id = initial_max_embedding_id
            for img_id, (img_path, img_name, img) in get_counted_img_loader():
                # Check if image already stored --> don't process again
                # known = (name, last modified) as a pair known for this director
                last_modified = datetime.datetime.fromtimestamp(
                    round(os.stat(img_path).st_mtime))
                if check_if_known and (img_name,
                                       last_modified) in imgs_names_and_date:
                    continue

                DBManager.store_image(img_id=img_id,
                                      rel_file_path=img_name,
                                      last_modified=last_modified,
                                      path_to_local_db=path_to_local_db,
                                      con=local_con,
                                      close_connections=False)
                DBManager.store_image_path(img_id=img_id,
                                           path_id=path_id,
                                           con=central_con,
                                           close_connections=False)

                faces = Models.altered_mtcnn.forward_return_results(img)
                if not faces:
                    log_error(f"no faces found in image '{img_path}'")
                    continue

                embeddings_row_dicts = [{
                    Columns.cluster_id.col_name:
                    'NULL',
                    Columns.embedding.col_name:
                    face_to_embedding(face),
                    Columns.thumbnail.col_name:
                    face,
                    Columns.image_id.col_name:
                    img_id,
                    Columns.embedding_id.col_name:
                    embedding_id
                } for embedding_id, face in enumerate(
                    faces, start=max_embedding_id + 1)]
                DBManager.store_embeddings(embeddings_row_dicts,
                                           con=con,
                                           close_connections=False)
                max_embedding_id += len(faces)

        DBManager.connection_wrapper(store_embedding_row_dicts,
                                     con=central_con,
                                     close_connections=close_connections)