def main(): sagemaker_session = sagemaker.Session() stepfunctions.set_stream_logger(level=logging.INFO) bucket = 's3://pixiv-image-backet' sagemaker_execution_role = 'arn:aws:iam::829044821271:role/service-role/AmazonSageMaker-ExecutionRole-20200412T194702' workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole' estimator1 = PyTorch(entry_point='train.py', source_dir='projection_discriminator', role=sagemaker_execution_role, framework_version='1.4.0', train_instance_count=2, train_instance_type='ml.m5.2xlarge', hyperparameters={ 'train_epoch': 1, }) estimator2 = PyTorch(entry_point='train.py', source_dir='wgan_gp', role=sagemaker_execution_role, framework_version='1.4.0', train_instance_count=2, train_instance_type='ml.m5.2xlarge', hyperparameters={ 'train_epoch': 1, }) training_step1 = steps.TrainingStep(state_id='Train Step1', estimator=estimator1, data={ 'training': bucket, }, job_name='PD-Train-{0}'.format( uuid.uuid4())) training_step2 = steps.TrainingStep(state_id='Train Step2', estimator=estimator2, data={ 'training': bucket, }, job_name='PD-Train-{0}'.format( uuid.uuid4())) parallel_state = steps.Parallel(state_id='Parallel', ) parallel_state.add_branch(training_step1) parallel_state.add_branch(training_step2) workflow_definition = steps.Chain([parallel_state]) workflow = Workflow( name='MyTraining-{0}'.format(uuid.uuid4()), definition=workflow_definition, role=workflow_execution_role, ) workflow.create() workflow.execute()
role=workflow_execution_role, execution_input=execution_input ) workflow.create() # Documentation states the following: # Updates an existing state machine by modifying its definition and/or role. Executions started immediately after calling this method may use the previous definition and role. import time time.sleep(60) # wait 60 secs to allow the update of the workflow to complete. The method is not syncronous! # Finally, run the workflow! execution = workflow.execute( inputs={ 'TrainingJobName': training_job_name, # Each Sagemaker Job requires a unique name, 'ModelName': model_name # Each Model requires a unique name, } ) # now let's create the cloudformation template parameters file ready for the CodeDeploy step in the pipeline model_data_url = 's3://{}/{}/output'.format(bucket, project_name) + "/" + training_job_name + "/output/model.tar.gz" parameter_file_data = { "Parameters" : { "ModelName" : model_name, "ModelDataUrl" : model_data_url, "TrainingImage": container, "InstanceType" : "ml.t2.xlarge", "InstanceCount": "1",
} ) ## SageMaker の学習ジョブを実行するステップ estimator = create_estimator() data_path = {'train': args.data_path} training_step = steps.TrainingStep( 'Train Step', estimator=estimator, data=data_path, job_name=execution_input['TrainJobName'], wait_for_completion=False # SFnを実行した後に Bitbucket へプルリクを上げるように変更したため、ここは True で良いかも。 ) # 各 Step を連結 chain_list = [etl_step, training_step] workflow_definition = steps.Chain(chain_list) # Workflow の作成 workflow = Workflow( name=FLOW_NAME, definition=workflow_definition, role=WORKFLOW_ROLE, execution_input=execution_input ) workflow.create() # Workflow の実行 execution = workflow.execute(inputs=inputs)
name="Main-workflow", definition=Main_workflow_definition, role=workflow_execution_role ) # COMMAND ---------- Main_workflow.render_graph() # COMMAND ---------- Main_workflow.create() # COMMAND ---------- Main_workflow_execution = Main_workflow.execute() # COMMAND ---------- # MAGIC %md # MAGIC Main_workflow_execution = Workflow( # MAGIC name="Campaign_Workflow", # MAGIC definition=path1, # MAGIC role=workflow_execution_role # MAGIC ) # COMMAND ---------- # COMMAND ----------