Example #1
0
from dagster.core.events import log_step_event
from dagster.core.execution.plan.external_step import (
    PICKLED_EVENTS_FILE_NAME,
    PICKLED_STEP_RUN_REF_FILE_NAME,
    step_context_to_step_run_ref,
)

# On EMR, Spark is installed here
EMR_SPARK_HOME = '/usr/lib/spark/'

CODE_ZIP_NAME = 'code.zip'


@resource({
    'spark_config':
    get_spark_config(),
    'cluster_id':
    Field(StringSource,
          description='Name of the job flow (cluster) on which to execute'),
    'region_name':
    Field(StringSource),
    'action_on_failure':
    Field(str, is_required=False, default_value='CANCEL_AND_WAIT'),
    'staging_bucket':
    Field(
        StringSource,
        is_required=True,
        description=
        'S3 bucket to use for passing files between the plan process and EMR '
        'process.',
    ),
from dagster.core.events import log_step_event
from dagster.core.execution.plan.external_step import (
    PICKLED_EVENTS_FILE_NAME,
    PICKLED_STEP_RUN_REF_FILE_NAME,
    step_context_to_step_run_ref,
)

# On EMR, Spark is installed here
EMR_SPARK_HOME = "/usr/lib/spark/"

CODE_ZIP_NAME = "code.zip"


@resource(
    {
        "spark_config": get_spark_config(),
        "cluster_id": Field(
            StringSource, description="Name of the job flow (cluster) on which to execute."
        ),
        "region_name": Field(StringSource, description="The AWS region that the cluster is in."),
        "action_on_failure": Field(
            str,
            is_required=False,
            default_value="CANCEL_AND_WAIT",
            description="The EMR action to take when the cluster step fails: "
            "https://docs.aws.amazon.com/emr/latest/APIReference/API_StepConfig.html",
        ),
        "staging_bucket": Field(
            StringSource,
            is_required=True,
            description="S3 bucket to use for passing files between the plan process and EMR "