from typing import Dict, AnyStr from ratelimit import limits, RateLimitException from retry import retry from plugin_params_loader import PluginParamsLoader from amazon_rekognition_api_client import API_EXCEPTIONS, call_api_generic from dku_io_utils import set_column_description from api_parallelizer import api_parallelizer from amazon_rekognition_api_formatting import UnsafeContentAPIFormatter # ============================================================================== # SETUP # ============================================================================== plugin_params = PluginParamsLoader().validate_load_params() column_prefix = "moderation_api" @retry((RateLimitException, OSError), delay=plugin_params.api_quota_period, tries=5) @limits(calls=plugin_params.api_quota_rate_limit, period=plugin_params.api_quota_period) def call_api_moderation(row: Dict, minimum_score: int) -> AnyStr: response_json = call_api_generic( row=row, minimum_score=minimum_score, api_client=plugin_params.api_client, api_client_method_name="detect_moderation_labels", input_folder=plugin_params.input_folder, input_folder_is_s3=plugin_params.input_folder_is_s3, input_folder_bucket=plugin_params.input_folder_bucket, input_folder_root_path=plugin_params.input_folder_root_path,
# -*- coding: utf-8 -*- """Image Cropping recipe script""" from plugin_params_loader import PluginParamsLoader, RecipeID from parallelizer import parallelizer from google_vision_api_formatting import CropHintsAPIResponseFormatter from dku_io_utils import set_column_description params = PluginParamsLoader(RecipeID.CROPPING).validate_load_params() df = parallelizer( function=params.api_wrapper.call_api_annotate_image, batch_response_parser=params.api_wrapper.batch_api_response_parser, exceptions=params.api_wrapper.API_EXCEPTIONS, folder=params.input_folder, folder_is_gcs=params.input_folder_is_gcs, folder_bucket=params.input_folder_bucket, folder_root_path=params.input_folder_root_path, **vars(params)) api_formatter = CropHintsAPIResponseFormatter(**vars(params)) output_df = api_formatter.format_df(df) params.output_dataset.write_with_schema(output_df) set_column_description(params.output_dataset, api_formatter.column_description_dict) if params.output_folder: api_formatter.format_save_images(params.output_folder)
# -*- coding: utf-8 -*- """Image Content Detection & Labeling recipe script""" from plugin_params_loader import PluginParamsLoader, RecipeID from parallelizer import parallelizer from google_vision_api_formatting import ContentDetectionLabelingAPIResponseFormatter from dku_io_utils import set_column_description params = PluginParamsLoader(RecipeID.CONTENT_DETECTION_LABELING).validate_load_params() df = parallelizer( function=params.api_wrapper.call_api_annotate_image, batch_response_parser=params.api_wrapper.batch_api_response_parser, exceptions=params.api_wrapper.API_EXCEPTIONS, folder=params.input_folder, folder_is_gcs=params.input_folder_is_gcs, folder_bucket=params.input_folder_bucket, folder_root_path=params.input_folder_root_path, **vars(params) ) api_formatter = ContentDetectionLabelingAPIResponseFormatter(**vars(params)) output_df = api_formatter.format_df(df) params.output_dataset.write_with_schema(output_df) set_column_description(params.output_dataset, api_formatter.column_description_dict) if params.output_folder: api_formatter.format_save_images(params.output_folder)
# -*- coding: utf-8 -*- """Image Text Detection recipe script""" from plugin_params_loader import PluginParamsLoader, RecipeID from parallelizer import parallelizer from google_vision_api_formatting import ImageTextDetectionAPIResponseFormatter from dku_io_utils import set_column_description params = PluginParamsLoader( RecipeID.IMAGE_TEXT_DETECTION).validate_load_params() df = parallelizer( function=params.api_wrapper.call_api_annotate_image, batch_response_parser=params.api_wrapper.batch_api_response_parser, exceptions=params.api_wrapper.API_EXCEPTIONS, folder=params.input_folder, folder_is_gcs=params.input_folder_is_gcs, folder_bucket=params.input_folder_bucket, folder_root_path=params.input_folder_root_path, **vars(params)) api_formatter = ImageTextDetectionAPIResponseFormatter(**vars(params)) output_df = api_formatter.format_df(df) params.output_dataset.write_with_schema(output_df) set_column_description(params.output_dataset, api_formatter.column_description_dict) if params.output_folder: api_formatter.format_save_images(params.output_folder)
# -*- coding: utf-8 -*- """Document Text Detection recipe script""" from plugin_params_loader import PluginParamsLoader, RecipeID from document_utils import DocumentHandler, DocumentSplitError from parallelizer import parallelizer from google_vision_api_formatting import DocumentTextDetectionAPIResponseFormatter from dku_io_utils import set_column_description params = PluginParamsLoader( RecipeID.DOCUMENT_TEXT_DETECTION).validate_load_params() doc_handler = DocumentHandler(params.error_handling, params.parallel_workers) document_df = doc_handler.split_all_documents( path_df=params.input_df, input_folder=params.input_folder, output_folder=params.output_folder, ) params_dict = vars(params) params_dict.pop("input_df") df = parallelizer( input_df=document_df, function=params.api_wrapper.call_api_document_text_detection, batch_response_parser=params.api_wrapper.batch_api_response_parser, exceptions=params.api_wrapper.API_EXCEPTIONS + (DocumentSplitError, ), folder=params.output_folder, folder_is_gcs=params.output_folder_is_gcs, folder_bucket=params.output_folder_bucket, folder_root_path=params.output_folder_root_path, **params_dict)
# -*- coding: utf-8 -*- """Image Unsafe Content Moderation recipe script""" from plugin_params_loader import PluginParamsLoader, RecipeID from parallelizer import parallelizer from google_vision_api_formatting import UnsafeContentAPIResponseFormatter from dku_io_utils import set_column_description params = PluginParamsLoader( RecipeID.UNSAFE_CONTENT_MODERATION).validate_load_params() df = parallelizer( function=params.api_wrapper.call_api_annotate_image, batch_response_parser=params.api_wrapper.batch_api_response_parser, exceptions=params.api_wrapper.API_EXCEPTIONS, folder=params.input_folder, folder_is_gcs=params.input_folder_is_gcs, folder_bucket=params.input_folder_bucket, folder_root_path=params.input_folder_root_path, **vars(params)) api_formatter = UnsafeContentAPIResponseFormatter(**vars(params)) output_df = api_formatter.format_df(df) params.output_dataset.write_with_schema(output_df) set_column_description(params.output_dataset, api_formatter.column_description_dict)