Esempio n. 1
0
def main():
    sagemaker_session = sagemaker.Session()
    stepfunctions.set_stream_logger(level=logging.INFO)

    bucket = 's3://pixiv-image-backet'

    sagemaker_execution_role = 'arn:aws:iam::829044821271:role/service-role/AmazonSageMaker-ExecutionRole-20200412T194702'
    workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole'

    estimator1 = PyTorch(entry_point='train.py',
                         source_dir='projection_discriminator',
                         role=sagemaker_execution_role,
                         framework_version='1.4.0',
                         train_instance_count=2,
                         train_instance_type='ml.m5.2xlarge',
                         hyperparameters={
                             'train_epoch': 1,
                         })

    estimator2 = PyTorch(entry_point='train.py',
                         source_dir='wgan_gp',
                         role=sagemaker_execution_role,
                         framework_version='1.4.0',
                         train_instance_count=2,
                         train_instance_type='ml.m5.2xlarge',
                         hyperparameters={
                             'train_epoch': 1,
                         })

    training_step1 = steps.TrainingStep(state_id='Train Step1',
                                        estimator=estimator1,
                                        data={
                                            'training': bucket,
                                        },
                                        job_name='PD-Train-{0}'.format(
                                            uuid.uuid4()))

    training_step2 = steps.TrainingStep(state_id='Train Step2',
                                        estimator=estimator2,
                                        data={
                                            'training': bucket,
                                        },
                                        job_name='PD-Train-{0}'.format(
                                            uuid.uuid4()))

    parallel_state = steps.Parallel(state_id='Parallel', )

    parallel_state.add_branch(training_step1)
    parallel_state.add_branch(training_step2)

    workflow_definition = steps.Chain([parallel_state])

    workflow = Workflow(
        name='MyTraining-{0}'.format(uuid.uuid4()),
        definition=workflow_definition,
        role=workflow_execution_role,
    )

    workflow.create()
    workflow.execute()
Esempio n. 2
0
def test_workflow_creation_failure_duplicate_state_ids(client):
    improper_definition = steps.Chain(
        [steps.Pass('HelloWorld'),
         steps.Succeed('HelloWorld')])
    with pytest.raises(ValueError):
        workflow = Workflow(name=state_machine_name,
                            definition=improper_definition,
                            role=role_arn,
                            client=client)
def test_parallel_state_machine_creation(sfn_client, sfn_role_arn):
    parallel_state_name = "Parallel"
    left_pass_name = "Left Pass"
    right_pass_name = "Right Pass"
    final_state_name = "Final State"
    parallel_state_result = "Parallel Result"

    asl_state_machine_definition = {
        "StartAt": parallel_state_name,
        "States": {
            parallel_state_name: {
                "Type": "Parallel",
                "Next": final_state_name,
                "Branches": [
                    {
                        "StartAt": left_pass_name,
                        "States": {
                            left_pass_name: {
                                "Type": "Pass",
                                "End": True
                            }
                        }
                    },
                    {
                        "StartAt": right_pass_name,
                        "States": {
                            right_pass_name: {
                                "Type": "Pass",
                                "End": True
                            }
                        }
                    }
                ]
            },
            final_state_name: {
                "Type": "Pass",
                "Result": parallel_state_result,
                "End": True
            }
        }
    }
    parallel_waits = steps.Parallel(parallel_state_name)
    parallel_waits.add_branch(steps.Pass(left_pass_name))
    parallel_waits.add_branch(steps.Pass(right_pass_name))

    definition = steps.Chain([
        parallel_waits,
        steps.Pass(final_state_name, result=parallel_state_result)
    ])

    workflow = Workflow(
        'Test_Parallel_Workflow',
        definition=definition,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, parallel_state_result)
def test_map_state_machine_creation(sfn_client, sfn_role_arn):
    map_state_name = "Map State"
    iterated_state_name = "Pass State"
    final_state_name = "Final State"
    items_path = "$.array"
    max_concurrency = 0
    map_state_result = "Map Result"
    state_machine_input = {
        "array": [1, 2, 3]
    }

    asl_state_machine_definition = {
        "StartAt": map_state_name,
        "States": {
            map_state_name: {
                "ItemsPath": items_path,
                "Iterator": {
                    "StartAt": iterated_state_name,
                    "States": {
                        iterated_state_name: {
                            "Type": "Pass",
                            "End": True
                        }
                    }
                },
                "MaxConcurrency": max_concurrency,
                "Type": "Map",
                "Next": final_state_name
            },
            final_state_name: {
                "Type": "Pass",
                "Result": map_state_result,
                "End": True
            }
        }
    }

    map_state = steps.Map(
        map_state_name, 
        items_path=items_path,
        iterator=steps.Pass(iterated_state_name), 
        max_concurrency=max_concurrency)

    definition = steps.Chain([
        map_state,
        steps.Pass(final_state_name, result=map_state_result)
    ])

    workflow = Workflow(
        'Test_Map_Workflow',
        definition=definition,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, map_state_result, state_machine_input)
 def _build_workflow(self):
     """create a step functions workflow from the chain_of_tasks."""
     logger.debug(
         f"creating a chain from all the different steps. \n {self.chain_of_tasks}"
     )
     workflow_definition = steps.Chain(self.chain_of_tasks)
     logger.debug(f"creating a workflow with name {self.unique_name}")
     self.client = boto3.client("stepfunctions")
     self.workflow = Workflow(
         name=self.unique_name,
         definition=workflow_definition,
         role=self.role.role_arn,
         client=self.client,
     )
Esempio n. 6
0
def main():
    stepfunctions.set_stream_logger(level=logging.INFO)
    workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole'

    # Load job name
    with open('./stepfunctions_name.json', 'r') as f:
        stepfunctions_name = json.load(f)

    with open('./face_clip/aws_batch/batch_names.json', 'r') as f:
        face_clip_name = json.load(f)
        
    with open('./tag_extraction/aws_batch/batch_names.json', 'r') as f:
        tag_extraction_name = json.load(f)

    # Define steps
    face_clip_step = steps.BatchSubmitJobStep(
        state_id = 'Face Clip Step',
        parameters={
            'JobDefinition': face_clip_name['jobDefinition'],
            'JobName': face_clip_name['job'],
            'JobQueue': face_clip_name['jobQueue']
        }
    )

    tag_extraction_step = steps.BatchSubmitJobStep(
        state_id = 'Tag Extraction Step',
        parameters={
            'JobDefinition': tag_extraction_name['jobDefinition'],
            'JobName': tag_extraction_name['job'],
            'JobQueue': tag_extraction_name['jobQueue']
        }
    )

    # Define workflow
    chain_list = [face_clip_step, tag_extraction_step]
    workflow_definition = steps.Chain(chain_list)

    workflow = Workflow(
        name=stepfunctions_name['workflow'],
        definition=workflow_definition,
        role=workflow_execution_role,
    )

    #  workflow
    workflow.create()
def test_task_state_machine_creation(sfn_client, sfn_role_arn, training_job_parameters):
    task_state_name = "TaskState"
    final_state_name = "FinalState"
    resource = "arn:aws:states:::sagemaker:createTrainingJob.sync"
    task_state_result = "Task State Result"
    asl_state_machine_definition = { 
        "StartAt": task_state_name,
        "States": { 
            task_state_name: { 
                "Resource": resource,
                "Parameters": training_job_parameters,
                "Type": "Task",
                "Next": final_state_name
            },
            final_state_name: {
                "Type": "Pass",
                "Result" : task_state_result,
                "End": True
            }
        }
    }

    definition = steps.Chain([
        steps.Task(
            task_state_name,
            resource=resource,
            parameters=training_job_parameters
        ),
        steps.Pass(final_state_name, result=task_state_result)
    ])
    
    workflow = Workflow(
        'Test_Task_Workflow',
        definition=definition,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, task_state_result)
Esempio n. 8
0
import uuid
import boto3
import yaml
import json

from datetime import datetime
from unittest.mock import MagicMock, Mock
from stepfunctions import steps
from stepfunctions.exceptions import WorkflowNotFound, MissingRequiredParameter
from stepfunctions.workflow import Workflow, Execution, ExecutionStatus

state_machine_name = 'HelloWorld'
state_machine_arn = 'arn:aws:states:us-east-1:1234567890:stateMachine:HelloWorld'
role_arn = 'arn:aws:iam::1234567890:role/service-role/StepFunctionsRole'
execution_arn = 'arn:aws:states:us-east-1:1234567890:execution:HelloWorld:execution-1'
definition = steps.Chain([steps.Pass('HelloWorld'), steps.Succeed('Complete')])


@pytest.fixture
def client():
    sfn = boto3.client('stepfunctions')
    sfn.describe_state_machine = MagicMock(
        return_value={
            'creationDate': datetime(2019, 9, 9, 9, 59, 59, 276000),
            'definition': steps.Graph(definition).to_json(),
            'name': state_machine_name,
            'roleArn': role_arn,
            'stateMachineArn': state_machine_arn,
            'status': 'ACTIVE'
        })
    sfn.create_state_machine = MagicMock(return_value={
Esempio n. 9
0
                             model=training_step.get_expected_model(),
                             model_name=job_name)

endpoint_config_step = steps.EndpointConfigStep("Create Endpoint Config",
                                                endpoint_config_name=job_name,
                                                model_name=job_name,
                                                initial_instance_count=1,
                                                instance_type='ml.m5.large')

endpoint_step = steps.EndpointStep(
    "Create or Update Endpoint",
    endpoint_name=execution_input['EndpointName'],
    endpoint_config_name=job_name,
    update=update_endpoint)

workflow_definition = steps.Chain(
    [training_step, model_step, endpoint_config_step, endpoint_step])

# Update the workflow that is already created

workflow = Workflow.attach(workflow_arn)
workflow.update(definition=workflow_definition)
print('Workflow updated: {}'.format(workflow_arn))

# Sleep for 5 seconds then execute after this is applied
time.sleep(5)

execution = workflow.execute(inputs=execution_params)
stepfunction_arn = execution.execution_arn
print('Workflow exectuted: {}'.format(stepfunction_arn))

# Export environment variables
Esempio n. 10
0
    "Test model",
    parameters={
        "FunctionName": multivariant_test_step,
        "Payload":{
            "Input.$":"$"
        }
    },
    result_path='$.model_test_step_result'
)
### END MODEL TEST ACCURACY ####

### START WORK FLOW DEFINITION ####

train_step_A = steps.Chain([
    training_stepA,
    model_stepA,
    register_artifact_step_a
    ])
train_step_B = steps.Chain([
    training_stepB,
    model_stepB,
    register_artifact_step_b
    ])

training_step = stepfunctions.steps.states.Parallel("Customer Estimators")
training_step.add_branch(train_step_A)
training_step.add_branch(train_step_B)

# Chain the steps together to generate a full AWS Step Functions
workflow_definition = steps.Chain([
    training_step,
Esempio n. 11
0
)'''

fail_step = steps.states.Fail(
    'Model Accuracy Too Low',
    comment='Validation accuracy lower than threshold'
)

threshold_rule = steps.choice_rule.ChoiceRule.NumericLessThan(variable=lambda_step.output()['Payload']['trainingMetrics'][0]['Value'], value=.1)

check_accuracy_step.add_choice(rule=threshold_rule, next_step=registry_lambda_step)
check_accuracy_step.default_choice(next_step=fail_step)

workflow_definition = steps.Chain([
    etl_step,
    training_step,
    model_step,
    lambda_step,
    check_accuracy_step
])

# This can be used to create a brand new workflow
try:
    # This is used to update the existing workflow. 
    # That way you can still see all the step function run history
    # You could alternatively delete and recreate the workflow
    state_machine_arn = 'arn:aws:states:ap-southeast-2:' + account_id + ':stateMachine:' + workflow_name
    workflow = Workflow.attach(state_machine_arn=state_machine_arn)
    workflow.update(
        definition = workflow_definition,
        role=workflow_execution_role
    )
Esempio n. 12
0
            }
    )

    ## SageMaker の学習ジョブを実行するステップ
    estimator = create_estimator()
    data_path = {'train': args.data_path}

    training_step = steps.TrainingStep(
        'Train Step', 
        estimator=estimator,
        data=data_path,
        job_name=execution_input['TrainJobName'],  
        wait_for_completion=False  # SFnを実行した後に Bitbucket へプルリクを上げるように変更したため、ここは True で良いかも。
    )

    # 各 Step を連結
    chain_list = [etl_step, training_step]
    workflow_definition = steps.Chain(chain_list)

    # Workflow の作成
    workflow = Workflow(
        name=FLOW_NAME,
        definition=workflow_definition,
        role=WORKFLOW_ROLE,
        execution_input=execution_input
    )
    workflow.create()

    # Workflow の実行
    execution = workflow.execute(inputs=inputs)
def test_wait_state_machine_creation(sfn_client, sfn_role_arn):
    first_state_name = "FirstState"
    first_wait_state_name = "WaitInSeconds"
    second_wait_state_name = "WaitTimestamp"
    third_wait_state_name = "WaitTimestampPath"
    fourth_wait_state_name = "WaitInSecondsPath"
    final_state_name = "FinalState"
    timestamp = "2019-09-04T01:59:00Z"
    timestamp_path = "$.expirydate"
    seconds = 2
    seconds_path = "$.expiryseconds"
    wait_state_result = "Wait Result"
    parameters = {'expirydate': timestamp, 'expiryseconds': seconds}

    asl_state_machine_definition = {
        "StartAt": first_state_name,
        "States": {
            first_state_name: {
                "Type": "Pass",
                "Next": first_wait_state_name,
                "Parameters": parameters
            },
            first_wait_state_name: {
                "Seconds": seconds,
                "Type": "Wait",
                "Next": second_wait_state_name
            },
            second_wait_state_name: {
                "Timestamp": timestamp,
                "Type": "Wait",
                "Next": third_wait_state_name
            },
            third_wait_state_name: {
                "TimestampPath": timestamp_path,
                "Type": "Wait",
                "Next": fourth_wait_state_name
            },
            fourth_wait_state_name: {
                "SecondsPath": seconds_path,
                "Type": "Wait",
                "Next": final_state_name
            },
            final_state_name: {
                "Type": "Pass",
                "Result": wait_state_result,
                "End": True
            }
        }
    }

    definition = steps.Chain([
        steps.Pass(first_state_name, parameters=parameters),
        steps.Wait(first_wait_state_name, seconds=seconds),
        steps.Wait(second_wait_state_name, timestamp=timestamp),
        steps.Wait(third_wait_state_name, timestamp_path=timestamp_path),
        steps.Wait(fourth_wait_state_name, seconds_path=seconds_path),
        steps.Pass(final_state_name, result=wait_state_result)
    ])

    workflow = Workflow(unique_name_from_base('Test_Wait_Workflow'),
                        definition=definition,
                        role=sfn_role_arn)

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition,
                        wait_state_result)
Esempio n. 14
0
def setup_workflow(project, purpose, workflow_execution_role, script_dir,
                   ecr_repository):
    """ to setup all needed for a step function with sagemaker.
    arg: 
        project: project name under sagemaker
        purpose: subproject
        workflow_execution_role: arn to execute step functions
        script_dir: processing file name, like a .py file
        ecr_repository: ecr repository name
    return:
        workflow: a stepfunctions.workflow.Workflow instance  
    example: 
        PROJECT = '[dpt-proj-2022]'
        PURPOSE = '[processing]'
        WORKFLOW_EXECUTION_ROLE = "arn:aws-cn:iam::[*********]:role/[**************]"
        SCRIPT_DIR = "[processing].py"
        ECR_REPOSITORY = '[ecr-2022]'
    """

    # SageMaker Session setup
    # ========================================================================================
    # SageMaker Session
    # ====================================
    account_id = boto3.client('sts').get_caller_identity().get('Account')
    role = sagemaker.get_execution_role()

    # Storage
    # ====================================
    session = sagemaker.Session()
    region = session.boto_region_name
    s3_output = session.default_bucket()

    # Code storage
    # ==================
    s3_prefix = '{}/{}'.format(project, purpose)
    s3_prefix_code = '{}/code'.format(s3_prefix)
    s3CodePath = 's3://{}/{}/code'.format(s3_output, s3_prefix)

    ## preprocess & prediction
    script_list = [script_dir]

    for script in script_list:
        session.upload_data(script,
                            bucket=session.default_bucket(),
                            key_prefix=s3_prefix_code)

    # ECR environment
    # ====================================
    uri_suffix = 'amazonaws.com.cn'
    tag = ':latest'
    ecr_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region,
                                                      uri_suffix,
                                                      ecr_repository + tag)

    # SageMaker Experiments setup
    # ========================================================================================
    experiment = Experiment.create(
        experiment_name="{}-{}".format(project, int(time.time())),
        description="machine learning project",
        sagemaker_boto_client=boto3.client('sagemaker'))
    print(experiment)

    execution_input = ExecutionInput(schema={
        "ProcessingJobName": str,
        "ResultPath": str,
    })

    # setup script processor
    script_processor = ScriptProcessor(command=['python3'],
                                       image_uri=ecr_repository_uri,
                                       role=role,
                                       instance_count=1,
                                       instance_type='ml.m5.4xlarge')

    # Step
    # ========================================================================================

    optimizing_step = steps.ProcessingStep(
        "Processing Step",
        processor=script_processor,
        job_name=execution_input["ProcessingJobName"],
        inputs=[
            ProcessingInput(source=s3CodePath,
                            destination='/opt/ml/processing/input/code',
                            input_name='code')
        ],
        outputs=[
            ProcessingOutput(output_name=purpose,
                             destination=execution_input["ResultPath"],
                             source='/opt/ml/processing/{}'.format(purpose))
        ],
        container_entrypoint=[
            "python3", "/opt/ml/processing/input/code/" + script_dir
        ],
    )

    # Fail Sate
    # ========================================================================================
    failed_state = steps.states.Fail("Processing Workflow failed",
                                     cause="SageMakerProcessingJobFailed")

    catch_state_processing = steps.states.Catch(
        error_equals=["States.TaskFailed"], next_step=failed_state)

    # Create Workflow
    # ========================================================================================
    optimizing_step.add_catch(catch_state_processing)

    workflow_name = workflow_name = "workflow-{}-{}".format(project,
                                                            purpose).upper()
    workflow_graph = steps.Chain([optimizing_step])

    workflow = Workflow(name=workflow_name,
                        definition=workflow_graph,
                        role=workflow_execution_role)

    workflow.create()
    return workflow
Esempio n. 15
0
            "Input.$": "$"  # Pass all the state machine data to Lambda event
        }
    },
    result_path='$.model_test_step_result'  # Pass results to output
)

################# ADD CODE HERE #################

############### REPLACE THIS CODE ###############

# Chain the steps together to generate a full AWS Step Functions
workflow_definition = steps.Chain([
    training_step,
    model_step,
    register_artifact_step,
    endpoint_config_step,
    endpoint_step,
    endpoint_wait_step,  # Add the endpoint wait step
    model_test_step  # Add the model test step
])

############### REPLACE THIS CODE ###############

# Create a Amazon Step Function workflow based in inputs
workflow = Workflow(name=state_machine_name,
                    state_machine_arn=state_machine_arn,
                    definition=workflow_definition,
                    role=workflow_role,
                    execution_input=event_input)

# Manually update some settings that are not generated correctly by the AWS Step Functions Data Science SDK