# RUN
# ==============================================================================

# Call API in parallel
df = api_parallelizer(
    input_df=plugin_params.input_df,
    api_call_function=call_api_moderation,
    api_exceptions=API_EXCEPTIONS,
    parallel_workers=plugin_params.parallel_workers,
    error_handling=plugin_params.error_handling,
    minimum_score=plugin_params.minimum_score,
    column_prefix=column_prefix,
)

# Format API results
api_formatter = UnsafeContentAPIFormatter(
    input_df=plugin_params.input_df,
    category_level=plugin_params.unsafe_content_category_level,
    content_categories_top_level=plugin_params.unsafe_content_categories_top_level,
    content_categories_second_level=plugin_params.unsafe_content_categories_second_level,
    error_handling=plugin_params.error_handling,
    column_prefix=column_prefix,
)
output_df = api_formatter.format_df(df)

# Write back results
plugin_params.output_dataset.write_with_schema(output_df)
set_column_description(
    output_dataset=plugin_params.output_dataset, column_description_dict=api_formatter.column_description_dict
)
예제 #2
0
        return MessageToJson(response)


df = api_parallelizer(
    input_df=input_df,
    api_call_function=call_api_named_entity_recognition,
    api_exceptions=API_EXCEPTIONS,
    column_prefix=column_prefix,
    parallel_workers=parallel_workers,
    error_handling=error_handling,
    text_column=text_column,
    text_language=text_language,
    entity_sentiment=entity_sentiment,
)

api_formatter = NamedEntityRecognitionAPIFormatter(
    input_df=input_df,
    column_prefix=column_prefix,
    entity_types=entity_types,
    minimum_score=minimum_score,
    error_handling=error_handling,
)
output_df = api_formatter.format_df(df)

output_dataset.write_with_schema(output_df)
set_column_description(
    input_dataset=input_dataset,
    output_dataset=output_dataset,
    column_description_dict=api_formatter.column_description_dict,
)
# -*- coding: utf-8 -*-
"""Image Cropping recipe script"""

from plugin_params_loader import PluginParamsLoader, RecipeID
from parallelizer import parallelizer
from google_vision_api_formatting import CropHintsAPIResponseFormatter
from dku_io_utils import set_column_description

params = PluginParamsLoader(RecipeID.CROPPING).validate_load_params()

df = parallelizer(
    function=params.api_wrapper.call_api_annotate_image,
    batch_response_parser=params.api_wrapper.batch_api_response_parser,
    exceptions=params.api_wrapper.API_EXCEPTIONS,
    folder=params.input_folder,
    folder_is_gcs=params.input_folder_is_gcs,
    folder_bucket=params.input_folder_bucket,
    folder_root_path=params.input_folder_root_path,
    **vars(params))

api_formatter = CropHintsAPIResponseFormatter(**vars(params))
output_df = api_formatter.format_df(df)
params.output_dataset.write_with_schema(output_df)
set_column_description(params.output_dataset,
                       api_formatter.column_description_dict)

if params.output_folder:
    api_formatter.format_save_images(params.output_folder)
예제 #4
0
# -*- coding: utf-8 -*-
import dataiku
from dataiku.customrecipe import get_input_names_for_role, get_output_names_for_role, get_recipe_config
from plugin_config_loading import load_plugin_config
from language_detection import LanguageDetector
from dku_io_utils import process_dataset_chunks, set_column_description

# Setup
input_dataset = dataiku.Dataset(get_input_names_for_role("input_dataset")[0])
output_dataset = dataiku.Dataset(
    get_output_names_for_role("output_dataset")[0])
params = load_plugin_config(get_recipe_config())
detector = LanguageDetector(
    language_scope=params["language_scope"],
    minimum_score=params["minimum_score"],
    fallback_language=params["fallback_language"],
)

# Run
process_dataset_chunks(
    input_dataset=input_dataset,
    output_dataset=output_dataset,
    func=detector.detect_languages_df,
    text_column=params["text_column"],
)
set_column_description(
    input_dataset=input_dataset,
    output_dataset=output_dataset,
    column_description_dict=detector.column_description_dict)