Ejemplo n.º 1
0
def main():
    sagemaker_session = sagemaker.Session()
    stepfunctions.set_stream_logger(level=logging.INFO)

    bucket = 's3://pixiv-image-backet'

    sagemaker_execution_role = 'arn:aws:iam::829044821271:role/service-role/AmazonSageMaker-ExecutionRole-20200412T194702'
    workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole'

    estimator1 = PyTorch(entry_point='train.py',
                         source_dir='projection_discriminator',
                         role=sagemaker_execution_role,
                         framework_version='1.4.0',
                         train_instance_count=2,
                         train_instance_type='ml.m5.2xlarge',
                         hyperparameters={
                             'train_epoch': 1,
                         })

    estimator2 = PyTorch(entry_point='train.py',
                         source_dir='wgan_gp',
                         role=sagemaker_execution_role,
                         framework_version='1.4.0',
                         train_instance_count=2,
                         train_instance_type='ml.m5.2xlarge',
                         hyperparameters={
                             'train_epoch': 1,
                         })

    training_step1 = steps.TrainingStep(state_id='Train Step1',
                                        estimator=estimator1,
                                        data={
                                            'training': bucket,
                                        },
                                        job_name='PD-Train-{0}'.format(
                                            uuid.uuid4()))

    training_step2 = steps.TrainingStep(state_id='Train Step2',
                                        estimator=estimator2,
                                        data={
                                            'training': bucket,
                                        },
                                        job_name='PD-Train-{0}'.format(
                                            uuid.uuid4()))

    parallel_state = steps.Parallel(state_id='Parallel', )

    parallel_state.add_branch(training_step1)
    parallel_state.add_branch(training_step2)

    workflow_definition = steps.Chain([parallel_state])

    workflow = Workflow(
        name='MyTraining-{0}'.format(uuid.uuid4()),
        definition=workflow_definition,
        role=workflow_execution_role,
    )

    workflow.create()
    workflow.execute()
Ejemplo n.º 2
0
def workflow(client):
    workflow = Workflow(name=state_machine_name,
                        definition=definition,
                        role=role_arn,
                        client=client)
    workflow.create()
    return workflow
Ejemplo n.º 3
0
def main():
    stepfunctions.set_stream_logger(level=logging.INFO)
    workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole'

    # Load job name
    with open('./stepfunctions_name.json', 'r') as f:
        stepfunctions_name = json.load(f)

    with open('./face_clip/aws_batch/batch_names.json', 'r') as f:
        face_clip_name = json.load(f)
        
    with open('./tag_extraction/aws_batch/batch_names.json', 'r') as f:
        tag_extraction_name = json.load(f)

    # Define steps
    face_clip_step = steps.BatchSubmitJobStep(
        state_id = 'Face Clip Step',
        parameters={
            'JobDefinition': face_clip_name['jobDefinition'],
            'JobName': face_clip_name['job'],
            'JobQueue': face_clip_name['jobQueue']
        }
    )

    tag_extraction_step = steps.BatchSubmitJobStep(
        state_id = 'Tag Extraction Step',
        parameters={
            'JobDefinition': tag_extraction_name['jobDefinition'],
            'JobName': tag_extraction_name['job'],
            'JobQueue': tag_extraction_name['jobQueue']
        }
    )

    # Define workflow
    chain_list = [face_clip_step, tag_extraction_step]
    workflow_definition = steps.Chain(chain_list)

    workflow = Workflow(
        name=stepfunctions_name['workflow'],
        definition=workflow_definition,
        role=workflow_execution_role,
    )

    #  workflow
    workflow.create()
Ejemplo n.º 4
0
def create_sfn_workflow(params, steps):
    sfn_workflow_name = params['sfn-workflow-name']
    workflow_execution_role = params['sfn-role-arn']

    workflow_graph = Chain(steps)

    branching_workflow = Workflow(
        name=sfn_workflow_name,
        definition=workflow_graph,
        role=workflow_execution_role,
    )

    branching_workflow.create()
    branching_workflow.update(workflow_graph)

    time.sleep(5)

    return branching_workflow
def create_workflow_and_check_definition(workflow_graph, workflow_name,
                                         sfn_client, sfn_role_arn):
    # Create workflow
    workflow = Workflow(name=workflow_name,
                        definition=workflow_graph,
                        role=sfn_role_arn,
                        client=sfn_client)
    state_machine_arn = workflow.create()

    # Check workflow definition
    state_machine_desc = sfn_client.describe_state_machine(
        stateMachineArn=state_machine_arn)
    assert workflow.definition.to_dict() == json.loads(
        state_machine_desc.get('definition'))

    return workflow
Ejemplo n.º 6
0
    # That way you can still see all the step function run history
    # You could alternatively delete and recreate the workflow
    state_machine_arn = 'arn:aws:states:ap-southeast-2:' + account_id + ':stateMachine:' + workflow_name
    workflow = Workflow.attach(state_machine_arn=state_machine_arn)
    workflow.update(
        definition = workflow_definition,
        role=workflow_execution_role
    )
except:
    workflow = Workflow(
        name=workflow_name,
        definition=workflow_definition,
        role=workflow_execution_role,
        execution_input=execution_input
    )
    workflow.create()


# Documentation states the following:
# Updates an existing state machine by modifying its definition and/or role. Executions started immediately after calling this method may use the previous definition and role.
import time
time.sleep(60) # wait 60 secs to allow the update of the workflow to complete. The method is not syncronous!


# Finally, run the workflow!
execution = workflow.execute(
    inputs={
        'TrainingJobName': training_job_name, # Each Sagemaker Job requires a unique name,
        'ModelName': model_name # Each Model requires a unique name,   
   }
)
Ejemplo n.º 7
0
def setup_workflow(project, purpose, workflow_execution_role, script_dir,
                   ecr_repository):
    """ to setup all needed for a step function with sagemaker.
    arg: 
        project: project name under sagemaker
        purpose: subproject
        workflow_execution_role: arn to execute step functions
        script_dir: processing file name, like a .py file
        ecr_repository: ecr repository name
    return:
        workflow: a stepfunctions.workflow.Workflow instance  
    example: 
        PROJECT = '[dpt-proj-2022]'
        PURPOSE = '[processing]'
        WORKFLOW_EXECUTION_ROLE = "arn:aws-cn:iam::[*********]:role/[**************]"
        SCRIPT_DIR = "[processing].py"
        ECR_REPOSITORY = '[ecr-2022]'
    """

    # SageMaker Session setup
    # ========================================================================================
    # SageMaker Session
    # ====================================
    account_id = boto3.client('sts').get_caller_identity().get('Account')
    role = sagemaker.get_execution_role()

    # Storage
    # ====================================
    session = sagemaker.Session()
    region = session.boto_region_name
    s3_output = session.default_bucket()

    # Code storage
    # ==================
    s3_prefix = '{}/{}'.format(project, purpose)
    s3_prefix_code = '{}/code'.format(s3_prefix)
    s3CodePath = 's3://{}/{}/code'.format(s3_output, s3_prefix)

    ## preprocess & prediction
    script_list = [script_dir]

    for script in script_list:
        session.upload_data(script,
                            bucket=session.default_bucket(),
                            key_prefix=s3_prefix_code)

    # ECR environment
    # ====================================
    uri_suffix = 'amazonaws.com.cn'
    tag = ':latest'
    ecr_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region,
                                                      uri_suffix,
                                                      ecr_repository + tag)

    # SageMaker Experiments setup
    # ========================================================================================
    experiment = Experiment.create(
        experiment_name="{}-{}".format(project, int(time.time())),
        description="machine learning project",
        sagemaker_boto_client=boto3.client('sagemaker'))
    print(experiment)

    execution_input = ExecutionInput(schema={
        "ProcessingJobName": str,
        "ResultPath": str,
    })

    # setup script processor
    script_processor = ScriptProcessor(command=['python3'],
                                       image_uri=ecr_repository_uri,
                                       role=role,
                                       instance_count=1,
                                       instance_type='ml.m5.4xlarge')

    # Step
    # ========================================================================================

    optimizing_step = steps.ProcessingStep(
        "Processing Step",
        processor=script_processor,
        job_name=execution_input["ProcessingJobName"],
        inputs=[
            ProcessingInput(source=s3CodePath,
                            destination='/opt/ml/processing/input/code',
                            input_name='code')
        ],
        outputs=[
            ProcessingOutput(output_name=purpose,
                             destination=execution_input["ResultPath"],
                             source='/opt/ml/processing/{}'.format(purpose))
        ],
        container_entrypoint=[
            "python3", "/opt/ml/processing/input/code/" + script_dir
        ],
    )

    # Fail Sate
    # ========================================================================================
    failed_state = steps.states.Fail("Processing Workflow failed",
                                     cause="SageMakerProcessingJobFailed")

    catch_state_processing = steps.states.Catch(
        error_equals=["States.TaskFailed"], next_step=failed_state)

    # Create Workflow
    # ========================================================================================
    optimizing_step.add_catch(catch_state_processing)

    workflow_name = workflow_name = "workflow-{}-{}".format(project,
                                                            purpose).upper()
    workflow_graph = steps.Chain([optimizing_step])

    workflow = Workflow(name=workflow_name,
                        definition=workflow_graph,
                        role=workflow_execution_role)

    workflow.create()
    return workflow
    ChoiceRule.StringEquals(variable=check_autopilot_job_status.output()
                            ['Payload']['AutopilotJobStatus'],
                            value='Completed'),
    next_step=model_and_endpoint_step)

workflow_definition = Chain([
    create_autopilot_job_step, check_autopilot_job_status,
    check_job_wait_state, check_job_choice
])

autopilot_ml_workflow = Workflow(name="AutopilotStateMachineWorkflow",
                                 definition=workflow_definition,
                                 role=utils.get_workflow_role())

try:
    state_machine_arn = autopilot_ml_workflow.create()
except sfn_client.exceptions.StateMachineAlreadyExists as e:
    print(e.message)
else:
    print("Updating workflow definition")
    state_machine_arn = autopilot_ml_workflow.update(workflow_definition)

utils.save_state_machine_arn(state_machine_arn)

timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

# Uncomment below when you're ready to execute workflow on deployment
# autopilot_ml_workflow.execute(
#     inputs={
#         'AutoMLJobName': f'autopilot-workflow-job-{timestamp_suffix}',
#         'ModelName': f'autopilot-workflow-{timestamp_suffix}-model',
Ejemplo n.º 9
0
# COMMAND ----------

Dataset_workflow = Workflow(
    name="Dataset-workflow",
    definition=Dataset_workflow_definition,
    role=workflow_execution_role
)

# COMMAND ----------

Dataset_workflow.render_graph()

# COMMAND ----------

DatasetWorkflowArn = Dataset_workflow.create()

# COMMAND ----------

# MAGIC %md
# MAGIC ### DatasetImportWorkflow

# COMMAND ----------

DatasetImport_workflow_definition=Chain([lambda_state_createdataset,
                                   wait_state_dataset,
                                   lambda_state_datasetimportjob,
                                   wait_state_datasetimportjob,
                                   lambda_state_datasetimportjob_status,
                                   datasetimportjob_choice_state
                                  ])