Beispiel #1
0
def eval_extract_faces(path, check_if_known=True, max_num_proc_imgs=None, central_con=None, local_con=None,
                       close_connections=True):
    path_to_local_db = DBManager.get_local_db_file_path(path)
    path_id = DBManager.get_path_id(path)
    if path_id is None:
        # path not yet known
        path_id = DBManager.store_directory_path(path, con=central_con, close_connections=False)
        DBManager.store_path_id(path_id, path_to_local_db=path_to_local_db, con=local_con, close_connections=False)
    imgs_names_and_date = set(DBManager.get_images_attributes(path_to_local_db=path_to_local_db))

    # Note: 'MAX' returns None / (None, ) as a default value
    max_img_id = DBManager.get_max_image_id(path_to_local_db=path_to_local_db)
    start_img_id = max_img_id + 1
    initial_max_embedding_id = DBManager.get_max_embedding_id()

    def get_counted_img_loader():
        img_loader = load_imgs_from_path(path, recursive=True, output_file_names=True, output_file_paths=True)
        if max_num_proc_imgs is not None:
            return zip(range(start_img_id, max_num_proc_imgs + 1), img_loader)
        return enumerate(img_loader, start=start_img_id)

    def store_embedding_row_dicts(con):
        print('----- get_embedding_row_dicts -----')
        # TODO: Also auto-increment emb_id etc.
        embedding_id = initial_max_embedding_id + 1
        for img_id, (img_path, img_name, img) in get_counted_img_loader():
            print_progress(img_id, 'image')

            last_modified = datetime.datetime.fromtimestamp(round(os.stat(img_path).st_mtime))
            if check_if_known and (img_name, last_modified) in imgs_names_and_date:
                continue

            DBManager.store_image(img_id=img_id, rel_file_path=img_name, last_modified=last_modified,
                                  path_to_local_db=path_to_local_db, con=local_con, close_connections=False)
            DBManager.store_image_path(img_id=img_id, path_id=path_id, con=central_con, close_connections=False)

            face = Models.altered_mtcnn.forward_return_results(img)
            if face is None:
                log_error(f"no faces found in image '{img_path}'")
                continue

            embedding_row_dict = {Columns.cluster_id.col_name: 'NULL',
                                  Columns.embedding.col_name: face_to_embedding(face),
                                  Columns.thumbnail.col_name: face,
                                  Columns.image_id.col_name: img_id,
                                  Columns.embedding_id.col_name: embedding_id}
            DBManager.store_embedding(embedding_row_dict, con=con, close_connections=False)
            embedding_id += 1

    DBManager.connection_wrapper(store_embedding_row_dicts, con=central_con, close_connections=close_connections)
def extract_faces(path,
                  check_if_known=True,
                  central_con=None,
                  local_con=None,
                  close_connections=True):
    # TODO: Refactor (extract functions)? + rename
    # TODO: Generate Thumbnails differently? (E.g. via Image.thumbnail or sth. like that)
    # TODO: Store + update max_img_id and max_embedding_id somewhere rather than (always) get them via DB query?

    path_to_local_db = DBManager.get_local_db_file_path(path)
    path_id = DBManager.get_path_id(path)
    if path_id is None:
        # path not yet known
        path_id = DBManager.store_directory_path(path,
                                                 con=central_con,
                                                 close_connections=False)
        DBManager.store_path_id(path_id,
                                path_to_local_db=path_to_local_db,
                                con=local_con,
                                close_connections=False)
    imgs_rel_paths_and_dates = set(
        DBManager.get_images_attributes(path_to_local_db=path_to_local_db))

    # Note: 'MAX' returns None / (None, ) as a default value
    max_img_id = DBManager.get_max_image_id(path_to_local_db=path_to_local_db)
    start_img_id = max_img_id + 1
    initial_max_embedding_id = DBManager.get_max_embedding_id()

    def get_counted_img_loader():
        img_loader = load_imgs_from_path(path,
                                         recursive=True,
                                         output_file_names=True,
                                         output_file_paths=True)
        return enumerate(img_loader, start=start_img_id)

    def store_embedding_row_dicts(con):
        # TODO: Also auto-increment emb_id etc.
        max_embedding_id = initial_max_embedding_id
        for img_id, (img_abs_path, img_rel_path,
                     img) in get_counted_img_loader():
            # TODO: Implement automatic deletion cascade! (Using among other things on_conflict clause and FKs)
            #       ---> Done?
            # Check if image already stored --> don't process again
            # known = (name, last modified) as a pair known for this director
            last_modified = datetime.datetime.fromtimestamp(
                round(os.stat(img_abs_path).st_mtime))
            if check_if_known and (img_rel_path,
                                   last_modified) in imgs_rel_paths_and_dates:
                continue

            DBManager.store_image(img_id=img_id,
                                  rel_file_path=img_rel_path,
                                  last_modified=last_modified,
                                  path_to_local_db=path_to_local_db,
                                  con=local_con,
                                  close_connections=False)
            DBManager.store_image_path(img_id=img_id,
                                       path_id=path_id,
                                       con=central_con,
                                       close_connections=False)

            faces = Models.altered_mtcnn.forward_return_results(img)
            if not faces:
                log_error(f"no faces found in image '{img_abs_path}'")
                continue

            # TODO: Better way to create these row_dicts?
            embeddings_row_dicts = [{
                Columns.cluster_id.col_name:
                'NULL',
                Columns.embedding.col_name:
                face_to_embedding(face),
                Columns.thumbnail.col_name:
                face,
                Columns.image_id.col_name:
                img_id,
                Columns.embedding_id.col_name:
                embedding_id
            } for embedding_id, face in enumerate(faces,
                                                  start=max_embedding_id + 1)]
            DBManager.store_embeddings(embeddings_row_dicts,
                                       con=con,
                                       close_connections=False)
            max_embedding_id += len(faces)

    DBManager.connection_wrapper(store_embedding_row_dicts,
                                 con=central_con,
                                 close_connections=close_connections)
Beispiel #3
0
    def extract_faces_measure(path,
                              n,
                              check_if_known=True,
                              central_con=None,
                              local_con=None,
                              close_connections=True):
        path_to_local_db = DBManager.get_local_db_file_path(path)
        path_id = DBManager.get_path_id(path)
        if path_id is None:
            # path not yet known
            path_id = DBManager.store_directory_path(path,
                                                     con=central_con,
                                                     close_connections=False)
            DBManager.store_path_id(path_id,
                                    path_to_local_db=path_to_local_db,
                                    con=local_con,
                                    close_connections=False)
        imgs_names_and_date = set(
            DBManager.get_images_attributes(path_to_local_db=path_to_local_db))

        # Note: 'MAX' returns None / (None, ) as a default value
        max_img_id = DBManager.get_max_image_id(
            path_to_local_db=path_to_local_db)
        start_img_id = max_img_id + 1
        initial_max_embedding_id = DBManager.get_max_embedding_id()

        def get_counted_img_loader():
            img_loader = load_imgs_from_path(path,
                                             recursive=True,
                                             output_file_names=True,
                                             output_file_paths=True)
            nums = range(start_img_id, start_img_id + n)
            return zip(nums, img_loader)

        def store_embedding_row_dicts(con):
            max_embedding_id = initial_max_embedding_id
            for img_id, (img_path, img_name, img) in get_counted_img_loader():
                # Check if image already stored --> don't process again
                # known = (name, last modified) as a pair known for this director
                last_modified = datetime.datetime.fromtimestamp(
                    round(os.stat(img_path).st_mtime))
                if check_if_known and (img_name,
                                       last_modified) in imgs_names_and_date:
                    continue

                DBManager.store_image(img_id=img_id,
                                      rel_file_path=img_name,
                                      last_modified=last_modified,
                                      path_to_local_db=path_to_local_db,
                                      con=local_con,
                                      close_connections=False)
                DBManager.store_image_path(img_id=img_id,
                                           path_id=path_id,
                                           con=central_con,
                                           close_connections=False)

                faces = Models.altered_mtcnn.forward_return_results(img)
                if not faces:
                    log_error(f"no faces found in image '{img_path}'")
                    continue

                embeddings_row_dicts = [{
                    Columns.cluster_id.col_name:
                    'NULL',
                    Columns.embedding.col_name:
                    face_to_embedding(face),
                    Columns.thumbnail.col_name:
                    face,
                    Columns.image_id.col_name:
                    img_id,
                    Columns.embedding_id.col_name:
                    embedding_id
                } for embedding_id, face in enumerate(
                    faces, start=max_embedding_id + 1)]
                DBManager.store_embeddings(embeddings_row_dicts,
                                           con=con,
                                           close_connections=False)
                max_embedding_id += len(faces)

        DBManager.connection_wrapper(store_embedding_row_dicts,
                                     con=central_con,
                                     close_connections=close_connections)