def create_experiment(Experiment_name, Experiment_description=None): try: experiment = Experiment.load(experiment_name=Experiment_name) except Exception as ex: if "ResourceNotFound" in str(ex): experiment = Experiment.create(experiment_name=Experiment_name, description=Experiment_description)
def cleanup_experiment(Experiment_name): try: experiment = Experiment.load(experiment_name=Experiment_name) for trial_summary in experiment.list_trials(): trial = Trial.load(trial_name=trial_summary.trial_name) for trial_component_summary in trial.list_trial_components(): tc = TrialComponent.load( trial_component_name=trial_component_summary. trial_component_name) trial.remove_trial_component(tc) try: # comment out to keep trial components tc.delete() except: # tc is associated with another trial continue # to prevent throttling time.sleep(.5) trial.delete() experiment_name = experiment.experiment_name experiment.delete() except Exception as ex: if 'ResourceNotFound' in str(ex): print('%s is a new experiment. Nothing to delete' % Experiment_name)
def cleanup_trial(Experiment_name, Trial_name): experiment = Experiment.load(experiment_name=Experiment_name) for trial_summary in experiment.list_trials(): trial = Trial.load(trial_name=trial_summary.trial_name) #print(trial_summary.trial_name) if trial_summary.trial_name == Trial_name: for trial_component_summary in trial.list_trial_components(): tc = TrialComponent.load( trial_component_name=trial_component_summary. trial_component_name) print(trial_component_summary.trial_component_name) trial.remove_trial_component(tc) try: # comment out to keep trial components tc.delete() except: # tc is associated with another trial continue # to prevent throttling time.sleep(.5) trial.delete()
def main(): # pragma: no cover """The main harness that creates or updates and runs the pipeline. Creates or updates the pipeline and runs it. """ parser = argparse.ArgumentParser( "Creates or updates and runs the pipeline for the pipeline script.") parser.add_argument( "-n", "--module-name", dest="module_name", type=str, help="The module name of the pipeline to import.", ) parser.add_argument( "-kwargs", "--kwargs", dest="kwargs", default=None, help= "Dict string of keyword arguments for the pipeline generation (if supported)", ) parser.add_argument( "-role-arn", "--role-arn", dest="role_arn", type=str, help="The role arn for the pipeline service execution role.", ) parser.add_argument( "-description", "--description", dest="description", type=str, default=None, help="The description of the pipeline.", ) parser.add_argument( "-tags", "--tags", dest="tags", default=None, help= """List of dict strings of '[{"Key": "string", "Value": "string"}, ..]'""", ) args = parser.parse_args() if args.module_name is None or args.role_arn is None: parser.print_help() sys.exit(2) tags = convert_struct(args.tags) try: pipeline = get_pipeline_driver(args.module_name, args.kwargs) print( "###### Creating/updating a SageMaker Pipeline with the following definition:" ) parsed = json.loads(pipeline.definition()) print(json.dumps(parsed, indent=2, sort_keys=True)) upsert_response = pipeline.upsert(role_arn=args.role_arn, description=args.description, tags=tags) print( "\n###### Created/Updated SageMaker Pipeline: Response received:") print(upsert_response) execution = pipeline.start() print( f"\n###### Execution started with PipelineExecutionArn: {execution.arn}" ) # Now we describe execution instance and list the steps in the execution to find out more about the execution. execution_run = execution.describe() print(execution_run) # Create or Load the 'Experiment' try: experiment = Experiment.create( experiment_name=pipeline.name, description='Amazon Customer Reviews BERT Pipeline Experiment') except: experiment = Experiment.load(experiment_name=pipeline.name) print('Experiment name: {}'.format(experiment.experiment_name)) # Add Execution Run as Trial to Experiments execution_run_name = execution_run['PipelineExecutionDisplayName'] print(execution_run_name) # Create the `Trial` timestamp = int(time.time()) trial = Trial.create(trial_name=execution_run_name, experiment_name=experiment.experiment_name, sagemaker_boto_client=sm) trial_name = trial.trial_name print('Trial name: {}'.format(trial_name)) ###################################################### ## Parse Pipeline Definition For Processing Job Args ###################################################### processing_param_dict = {} for step in parsed['Steps']: print('step: {}'.format(step)) if step['Name'] == 'Processing': print('Step Name is Processing...') arg_list = step['Arguments']['AppSpecification'][ 'ContainerArguments'] print(arg_list) num_args = len(arg_list) print(num_args) # arguments are (key, value) pairs in this list, so we extract them in pairs # using [i] and [i+1] indexes and stepping by 2 through the list for i in range(0, num_args, 2): key = arg_list[i].replace('--', '') value = arg_list[i + 1] print('arg key: {}'.format(key)) print('arg value: {}'.format(value)) processing_param_dict[key] = value ############################## ## Wait For Execution To Finish ############################## print("Waiting for the execution to finish...") execution.wait() print("\n#####Execution completed. Execution step details:") # List Execution Steps print(execution.list_steps()) # List All Artifacts Generated By The Pipeline processing_job_name = None training_job_name = None from sagemaker.lineage.visualizer import LineageTableVisualizer viz = LineageTableVisualizer(sagemaker.session.Session()) for execution_step in reversed(execution.list_steps()): print(execution_step) # We are doing this because there appears to be a bug of this LineageTableVisualizer handling the Processing Step if execution_step['StepName'] == 'Processing': processing_job_name = execution_step['Metadata'][ 'ProcessingJob']['Arn'].split('/')[-1] print(processing_job_name) #display(viz.show(processing_job_name=processing_job_name)) elif execution_step['StepName'] == 'Train': training_job_name = execution_step['Metadata']['TrainingJob'][ 'Arn'].split('/')[-1] print(training_job_name) #display(viz.show(training_job_name=training_job_name)) else: #display(viz.show(pipeline_execution_step=execution_step)) time.sleep(5) # Add Trial Compontents To Experiment Trial processing_job_tc = '{}-aws-processing-job'.format(processing_job_name) print(processing_job_tc) # -aws-processing-job is the default name assigned by ProcessingJob response = sm.associate_trial_component( TrialComponentName=processing_job_tc, TrialName=trial_name) # -aws-training-job is the default name assigned by TrainingJob training_job_tc = '{}-aws-training-job'.format(training_job_name) print(training_job_tc) response = sm.associate_trial_component( TrialComponentName=training_job_tc, TrialName=trial_name) ############## # Log Additional Parameters within Trial ############## print('Logging Processing Job Parameters within Experiment Trial...') processing_job_tracker = tracker.Tracker.load( trial_component_name=processing_job_tc) for key, value in processing_param_dict.items(): print('key: {}, value: {}'.format(key, value)) processing_job_tracker.log_parameters({key: str(value)}) # must save after logging processing_job_tracker.trial_component.save() except Exception as e: # pylint: disable=W0703 print(f"Exception: {e}") sys.exit(1)