from typing import Dict, AnyStr
from ratelimit import limits, RateLimitException
from retry import retry

from plugin_params_loader import PluginParamsLoader
from amazon_rekognition_api_client import API_EXCEPTIONS, call_api_generic
from dku_io_utils import set_column_description
from api_parallelizer import api_parallelizer
from amazon_rekognition_api_formatting import UnsafeContentAPIFormatter


# ==============================================================================
# SETUP
# ==============================================================================

plugin_params = PluginParamsLoader().validate_load_params()
column_prefix = "moderation_api"


@retry((RateLimitException, OSError), delay=plugin_params.api_quota_period, tries=5)
@limits(calls=plugin_params.api_quota_rate_limit, period=plugin_params.api_quota_period)
def call_api_moderation(row: Dict, minimum_score: int) -> AnyStr:
    response_json = call_api_generic(
        row=row,
        minimum_score=minimum_score,
        api_client=plugin_params.api_client,
        api_client_method_name="detect_moderation_labels",
        input_folder=plugin_params.input_folder,
        input_folder_is_s3=plugin_params.input_folder_is_s3,
        input_folder_bucket=plugin_params.input_folder_bucket,
        input_folder_root_path=plugin_params.input_folder_root_path,
# -*- coding: utf-8 -*-
"""Image Cropping recipe script"""

from plugin_params_loader import PluginParamsLoader, RecipeID
from parallelizer import parallelizer
from google_vision_api_formatting import CropHintsAPIResponseFormatter
from dku_io_utils import set_column_description

params = PluginParamsLoader(RecipeID.CROPPING).validate_load_params()

df = parallelizer(
    function=params.api_wrapper.call_api_annotate_image,
    batch_response_parser=params.api_wrapper.batch_api_response_parser,
    exceptions=params.api_wrapper.API_EXCEPTIONS,
    folder=params.input_folder,
    folder_is_gcs=params.input_folder_is_gcs,
    folder_bucket=params.input_folder_bucket,
    folder_root_path=params.input_folder_root_path,
    **vars(params))

api_formatter = CropHintsAPIResponseFormatter(**vars(params))
output_df = api_formatter.format_df(df)
params.output_dataset.write_with_schema(output_df)
set_column_description(params.output_dataset,
                       api_formatter.column_description_dict)

if params.output_folder:
    api_formatter.format_save_images(params.output_folder)
# -*- coding: utf-8 -*-
"""Image Content Detection & Labeling recipe script"""

from plugin_params_loader import PluginParamsLoader, RecipeID
from parallelizer import parallelizer
from google_vision_api_formatting import ContentDetectionLabelingAPIResponseFormatter
from dku_io_utils import set_column_description

params = PluginParamsLoader(RecipeID.CONTENT_DETECTION_LABELING).validate_load_params()

df = parallelizer(
    function=params.api_wrapper.call_api_annotate_image,
    batch_response_parser=params.api_wrapper.batch_api_response_parser,
    exceptions=params.api_wrapper.API_EXCEPTIONS,
    folder=params.input_folder,
    folder_is_gcs=params.input_folder_is_gcs,
    folder_bucket=params.input_folder_bucket,
    folder_root_path=params.input_folder_root_path,
    **vars(params)
)

api_formatter = ContentDetectionLabelingAPIResponseFormatter(**vars(params))
output_df = api_formatter.format_df(df)
params.output_dataset.write_with_schema(output_df)
set_column_description(params.output_dataset, api_formatter.column_description_dict)

if params.output_folder:
    api_formatter.format_save_images(params.output_folder)
# -*- coding: utf-8 -*-
"""Image Text Detection recipe script"""

from plugin_params_loader import PluginParamsLoader, RecipeID
from parallelizer import parallelizer
from google_vision_api_formatting import ImageTextDetectionAPIResponseFormatter
from dku_io_utils import set_column_description

params = PluginParamsLoader(
    RecipeID.IMAGE_TEXT_DETECTION).validate_load_params()

df = parallelizer(
    function=params.api_wrapper.call_api_annotate_image,
    batch_response_parser=params.api_wrapper.batch_api_response_parser,
    exceptions=params.api_wrapper.API_EXCEPTIONS,
    folder=params.input_folder,
    folder_is_gcs=params.input_folder_is_gcs,
    folder_bucket=params.input_folder_bucket,
    folder_root_path=params.input_folder_root_path,
    **vars(params))

api_formatter = ImageTextDetectionAPIResponseFormatter(**vars(params))
output_df = api_formatter.format_df(df)
params.output_dataset.write_with_schema(output_df)
set_column_description(params.output_dataset,
                       api_formatter.column_description_dict)

if params.output_folder:
    api_formatter.format_save_images(params.output_folder)
# -*- coding: utf-8 -*-
"""Document Text Detection recipe script"""

from plugin_params_loader import PluginParamsLoader, RecipeID
from document_utils import DocumentHandler, DocumentSplitError
from parallelizer import parallelizer
from google_vision_api_formatting import DocumentTextDetectionAPIResponseFormatter
from dku_io_utils import set_column_description

params = PluginParamsLoader(
    RecipeID.DOCUMENT_TEXT_DETECTION).validate_load_params()
doc_handler = DocumentHandler(params.error_handling, params.parallel_workers)

document_df = doc_handler.split_all_documents(
    path_df=params.input_df,
    input_folder=params.input_folder,
    output_folder=params.output_folder,
)
params_dict = vars(params)
params_dict.pop("input_df")

df = parallelizer(
    input_df=document_df,
    function=params.api_wrapper.call_api_document_text_detection,
    batch_response_parser=params.api_wrapper.batch_api_response_parser,
    exceptions=params.api_wrapper.API_EXCEPTIONS + (DocumentSplitError, ),
    folder=params.output_folder,
    folder_is_gcs=params.output_folder_is_gcs,
    folder_bucket=params.output_folder_bucket,
    folder_root_path=params.output_folder_root_path,
    **params_dict)
Exemple #6
0
# -*- coding: utf-8 -*-
"""Image Unsafe Content Moderation recipe script"""

from plugin_params_loader import PluginParamsLoader, RecipeID
from parallelizer import parallelizer
from google_vision_api_formatting import UnsafeContentAPIResponseFormatter
from dku_io_utils import set_column_description

params = PluginParamsLoader(
    RecipeID.UNSAFE_CONTENT_MODERATION).validate_load_params()

df = parallelizer(
    function=params.api_wrapper.call_api_annotate_image,
    batch_response_parser=params.api_wrapper.batch_api_response_parser,
    exceptions=params.api_wrapper.API_EXCEPTIONS,
    folder=params.input_folder,
    folder_is_gcs=params.input_folder_is_gcs,
    folder_bucket=params.input_folder_bucket,
    folder_root_path=params.input_folder_root_path,
    **vars(params))

api_formatter = UnsafeContentAPIResponseFormatter(**vars(params))
output_df = api_formatter.format_df(df)
params.output_dataset.write_with_schema(output_df)
set_column_description(params.output_dataset,
                       api_formatter.column_description_dict)