def main(): sagemaker_session = sagemaker.Session() stepfunctions.set_stream_logger(level=logging.INFO) bucket = 's3://pixiv-image-backet' sagemaker_execution_role = 'arn:aws:iam::829044821271:role/service-role/AmazonSageMaker-ExecutionRole-20200412T194702' workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole' estimator1 = PyTorch(entry_point='train.py', source_dir='projection_discriminator', role=sagemaker_execution_role, framework_version='1.4.0', train_instance_count=2, train_instance_type='ml.m5.2xlarge', hyperparameters={ 'train_epoch': 1, }) estimator2 = PyTorch(entry_point='train.py', source_dir='wgan_gp', role=sagemaker_execution_role, framework_version='1.4.0', train_instance_count=2, train_instance_type='ml.m5.2xlarge', hyperparameters={ 'train_epoch': 1, }) training_step1 = steps.TrainingStep(state_id='Train Step1', estimator=estimator1, data={ 'training': bucket, }, job_name='PD-Train-{0}'.format( uuid.uuid4())) training_step2 = steps.TrainingStep(state_id='Train Step2', estimator=estimator2, data={ 'training': bucket, }, job_name='PD-Train-{0}'.format( uuid.uuid4())) parallel_state = steps.Parallel(state_id='Parallel', ) parallel_state.add_branch(training_step1) parallel_state.add_branch(training_step2) workflow_definition = steps.Chain([parallel_state]) workflow = Workflow( name='MyTraining-{0}'.format(uuid.uuid4()), definition=workflow_definition, role=workflow_execution_role, ) workflow.create() workflow.execute()
def workflow(client): workflow = Workflow(name=state_machine_name, definition=definition, role=role_arn, client=client) workflow.create() return workflow
def main(): stepfunctions.set_stream_logger(level=logging.INFO) workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole' # Load job name with open('./stepfunctions_name.json', 'r') as f: stepfunctions_name = json.load(f) with open('./face_clip/aws_batch/batch_names.json', 'r') as f: face_clip_name = json.load(f) with open('./tag_extraction/aws_batch/batch_names.json', 'r') as f: tag_extraction_name = json.load(f) # Define steps face_clip_step = steps.BatchSubmitJobStep( state_id = 'Face Clip Step', parameters={ 'JobDefinition': face_clip_name['jobDefinition'], 'JobName': face_clip_name['job'], 'JobQueue': face_clip_name['jobQueue'] } ) tag_extraction_step = steps.BatchSubmitJobStep( state_id = 'Tag Extraction Step', parameters={ 'JobDefinition': tag_extraction_name['jobDefinition'], 'JobName': tag_extraction_name['job'], 'JobQueue': tag_extraction_name['jobQueue'] } ) # Define workflow chain_list = [face_clip_step, tag_extraction_step] workflow_definition = steps.Chain(chain_list) workflow = Workflow( name=stepfunctions_name['workflow'], definition=workflow_definition, role=workflow_execution_role, ) # workflow workflow.create()
def create_sfn_workflow(params, steps): sfn_workflow_name = params['sfn-workflow-name'] workflow_execution_role = params['sfn-role-arn'] workflow_graph = Chain(steps) branching_workflow = Workflow( name=sfn_workflow_name, definition=workflow_graph, role=workflow_execution_role, ) branching_workflow.create() branching_workflow.update(workflow_graph) time.sleep(5) return branching_workflow
def create_workflow_and_check_definition(workflow_graph, workflow_name, sfn_client, sfn_role_arn): # Create workflow workflow = Workflow(name=workflow_name, definition=workflow_graph, role=sfn_role_arn, client=sfn_client) state_machine_arn = workflow.create() # Check workflow definition state_machine_desc = sfn_client.describe_state_machine( stateMachineArn=state_machine_arn) assert workflow.definition.to_dict() == json.loads( state_machine_desc.get('definition')) return workflow
# That way you can still see all the step function run history # You could alternatively delete and recreate the workflow state_machine_arn = 'arn:aws:states:ap-southeast-2:' + account_id + ':stateMachine:' + workflow_name workflow = Workflow.attach(state_machine_arn=state_machine_arn) workflow.update( definition = workflow_definition, role=workflow_execution_role ) except: workflow = Workflow( name=workflow_name, definition=workflow_definition, role=workflow_execution_role, execution_input=execution_input ) workflow.create() # Documentation states the following: # Updates an existing state machine by modifying its definition and/or role. Executions started immediately after calling this method may use the previous definition and role. import time time.sleep(60) # wait 60 secs to allow the update of the workflow to complete. The method is not syncronous! # Finally, run the workflow! execution = workflow.execute( inputs={ 'TrainingJobName': training_job_name, # Each Sagemaker Job requires a unique name, 'ModelName': model_name # Each Model requires a unique name, } )
def setup_workflow(project, purpose, workflow_execution_role, script_dir, ecr_repository): """ to setup all needed for a step function with sagemaker. arg: project: project name under sagemaker purpose: subproject workflow_execution_role: arn to execute step functions script_dir: processing file name, like a .py file ecr_repository: ecr repository name return: workflow: a stepfunctions.workflow.Workflow instance example: PROJECT = '[dpt-proj-2022]' PURPOSE = '[processing]' WORKFLOW_EXECUTION_ROLE = "arn:aws-cn:iam::[*********]:role/[**************]" SCRIPT_DIR = "[processing].py" ECR_REPOSITORY = '[ecr-2022]' """ # SageMaker Session setup # ======================================================================================== # SageMaker Session # ==================================== account_id = boto3.client('sts').get_caller_identity().get('Account') role = sagemaker.get_execution_role() # Storage # ==================================== session = sagemaker.Session() region = session.boto_region_name s3_output = session.default_bucket() # Code storage # ================== s3_prefix = '{}/{}'.format(project, purpose) s3_prefix_code = '{}/code'.format(s3_prefix) s3CodePath = 's3://{}/{}/code'.format(s3_output, s3_prefix) ## preprocess & prediction script_list = [script_dir] for script in script_list: session.upload_data(script, bucket=session.default_bucket(), key_prefix=s3_prefix_code) # ECR environment # ==================================== uri_suffix = 'amazonaws.com.cn' tag = ':latest' ecr_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region, uri_suffix, ecr_repository + tag) # SageMaker Experiments setup # ======================================================================================== experiment = Experiment.create( experiment_name="{}-{}".format(project, int(time.time())), description="machine learning project", sagemaker_boto_client=boto3.client('sagemaker')) print(experiment) execution_input = ExecutionInput(schema={ "ProcessingJobName": str, "ResultPath": str, }) # setup script processor script_processor = ScriptProcessor(command=['python3'], image_uri=ecr_repository_uri, role=role, instance_count=1, instance_type='ml.m5.4xlarge') # Step # ======================================================================================== optimizing_step = steps.ProcessingStep( "Processing Step", processor=script_processor, job_name=execution_input["ProcessingJobName"], inputs=[ ProcessingInput(source=s3CodePath, destination='/opt/ml/processing/input/code', input_name='code') ], outputs=[ ProcessingOutput(output_name=purpose, destination=execution_input["ResultPath"], source='/opt/ml/processing/{}'.format(purpose)) ], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/" + script_dir ], ) # Fail Sate # ======================================================================================== failed_state = steps.states.Fail("Processing Workflow failed", cause="SageMakerProcessingJobFailed") catch_state_processing = steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state) # Create Workflow # ======================================================================================== optimizing_step.add_catch(catch_state_processing) workflow_name = workflow_name = "workflow-{}-{}".format(project, purpose).upper() workflow_graph = steps.Chain([optimizing_step]) workflow = Workflow(name=workflow_name, definition=workflow_graph, role=workflow_execution_role) workflow.create() return workflow
ChoiceRule.StringEquals(variable=check_autopilot_job_status.output() ['Payload']['AutopilotJobStatus'], value='Completed'), next_step=model_and_endpoint_step) workflow_definition = Chain([ create_autopilot_job_step, check_autopilot_job_status, check_job_wait_state, check_job_choice ]) autopilot_ml_workflow = Workflow(name="AutopilotStateMachineWorkflow", definition=workflow_definition, role=utils.get_workflow_role()) try: state_machine_arn = autopilot_ml_workflow.create() except sfn_client.exceptions.StateMachineAlreadyExists as e: print(e.message) else: print("Updating workflow definition") state_machine_arn = autopilot_ml_workflow.update(workflow_definition) utils.save_state_machine_arn(state_machine_arn) timestamp_suffix = strftime('%d-%H-%M-%S', gmtime()) # Uncomment below when you're ready to execute workflow on deployment # autopilot_ml_workflow.execute( # inputs={ # 'AutoMLJobName': f'autopilot-workflow-job-{timestamp_suffix}', # 'ModelName': f'autopilot-workflow-{timestamp_suffix}-model',
# COMMAND ---------- Dataset_workflow = Workflow( name="Dataset-workflow", definition=Dataset_workflow_definition, role=workflow_execution_role ) # COMMAND ---------- Dataset_workflow.render_graph() # COMMAND ---------- DatasetWorkflowArn = Dataset_workflow.create() # COMMAND ---------- # MAGIC %md # MAGIC ### DatasetImportWorkflow # COMMAND ---------- DatasetImport_workflow_definition=Chain([lambda_state_createdataset, wait_state_dataset, lambda_state_datasetimportjob, wait_state_datasetimportjob, lambda_state_datasetimportjob_status, datasetimportjob_choice_state ])