def main(): logging.warning("Loading environment variables...") e = Env() e.load_environment_variables(env_file_path='local.env', fallback_to_os=True) # Get Azure machine learning workspace logging.warning( "Getting reference to existing Azure Machine Learning workspace...") auth = InteractiveLoginAuthentication(tenant_id=e.tenant_id) ws = get_workspace(e.workspace_name, auth, e.subscription_id, e.resource_group) # Get compute target. It has to be a GPU compute as such unit is requested by the 'Feature Extraction Inference' step compute_target = get_compute_target(ws, compute_name=e.gpu_compute_name, vm_size=e.gpu_vm_size) # Create run configuration run_config = create_run_configuration(ws) # ------- # Step 1 # ------- # Define input 'prepared datasets' input_prepared_datasets = [] experiment_configuration = ExperimentConfigurationWrapper() experiment_configuration.load( os.path.join(cfg.StepsStructure.SNAPSHOT_ROOT_DIR, cfg.StepsStructure.get_experiments_config_filepath()) ) # noqa: E501 for data_config in experiment_configuration.json['OBJECT_DETECTION'][ 'inference']['data']: dataset_name = data_config['input']['dataset_name'] dataset = ws.datasets.get(dataset_name) input_prepared_datasets.extend([dataset.as_named_input(dataset_name)]) # Create pipeline datastore objects to create links between steps, so they are executed in a sequence, not in parallel pipeline_datastore = ws.get_default_datastore() object_detection_inference_output = PipelineData( name="centers", datastore=pipeline_datastore, is_directory=True) step_object_detection_inference = PythonScriptStep( name="Object Detection - Inference", source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR, script_name=cfg.StepsStructure.ObjectDetection. INFERENCE_STEP_SCRIPT_PATH, arguments=[ '--subscription_id', e.subscription_id, '--resource_group', e.resource_group, '--workspace_name', e.workspace_name, '--experiments_config_filepath', cfg.StepsStructure.get_experiments_config_filepath(), '--model_name', cfg.MLModelNames.OBJECT_DETECTION_MODEL, '--model_version', cfg.MLModelNames.OBJECT_DETECTION_MODEL_BEST_VERSION, '--output_folder', object_detection_inference_output, '--should_register_dataset', True ], inputs=input_prepared_datasets, outputs=[object_detection_inference_output], compute_target=compute_target, runconfig=run_config, allow_reuse=True) # ------- # Step 2 # ------- # input should contain 'prepared datasets' and centers object_extraction_input = object_detection_inference_output.as_input( 'centers') object_extraction_inputs = [object_extraction_input] object_extraction_output = PipelineData(name="cropped_objects", datastore=pipeline_datastore, is_directory=True) step_object_extraction = PythonScriptStep( name="Object Extraction", source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR, script_name=cfg.StepsStructure.ObjectExtraction.STEP_SCRIPT_PATH, arguments=[ "--subscription_id", e.subscription_id, "--resource_group", e.resource_group, "--workspace_name", e.workspace_name, "--experiments_config_filepath", cfg.StepsStructure.get_experiments_config_filepath(), "--output_folder", object_extraction_output, "--should_register_dataset", True, # This flag might be handy when we really want to recreate a cropped objects dataset (e.g. changed implementation # of the NucleiExtractor, although there are no changes in the input datasets). "--force_dataset_recreation", True ], inputs=object_extraction_inputs, outputs=[object_extraction_output], compute_target=compute_target, runconfig=run_config, allow_reuse=True, ) # ------- # Step 3a # ------- step_object_images_upload = PythonScriptStep( name="Cropped Object Images Upload to Blob Storage", source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR, script_name=cfg.StepsStructure.ObjectImagesUpload.STEP_SCRIPT_PATH, arguments=[ # '--subscription_id', e.subscription_id, # '--resource_group', e.resource_group, # '--workspace_name', e.workspace_name, '--experiments_config_filepath', cfg.StepsStructure.get_experiments_config_filepath(), # '--model_name', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL, # '--model_version', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL_BEST_VERSION, # '--output_folder', feature_extraction_inference_output, # '--should_register_dataset', True ], inputs=[object_extraction_output.as_input('cropped_objects')], outputs=[], compute_target=compute_target, runconfig=run_config, allow_reuse=True) # ------- # Step 3b # ------- feature_extraction_inference_input = object_extraction_output.as_input( 'cropped_objects') feature_extraction_inference_inputs = [feature_extraction_inference_input] feature_extraction_inference_output = PipelineData( name="latent_dims", datastore=pipeline_datastore, is_directory=True) step_feature_extraction_inference = PythonScriptStep( name="Feature Extraction - Inference", source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR, script_name=cfg.StepsStructure.FeatureExtraction. INFERENCE_STEP_SCRIPT_PATH, arguments=[ '--subscription_id', e.subscription_id, '--resource_group', e.resource_group, '--workspace_name', e.workspace_name, '--experiments_config_filepath', cfg.StepsStructure.get_experiments_config_filepath(), '--model_name', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL, '--model_version', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL_BEST_VERSION, '--output_folder', feature_extraction_inference_output, '--should_register_dataset', True ], inputs=feature_extraction_inference_inputs, outputs=[feature_extraction_inference_output], compute_target=compute_target, runconfig=run_config, allow_reuse=True) # ------- # Pipeline composition # ------- pipeline_steps = [ step_object_detection_inference, step_object_extraction, step_object_images_upload, step_feature_extraction_inference ] pipeline = Pipeline(workspace=ws, steps=pipeline_steps) # Create and submit an experiment logging.warning("Submitting experiment...") experiment = Experiment(ws, cfg.ExperimentNames.INFERENCE_REMOTE) experiment.submit( pipeline, regenerate_outputs=False) # Allow data reuse for this run logging.warning('Experiment submitted!')
"--input", ds_input.as_download(), "--output", prepared_data ], inputs=[ds_input], outputs=[prepared_data], compute_target=compute_target, runconfig=aml_run_config, allow_reuse=True) # Step 2: Train our model training_results = PipelineData(name="training_results", datastore=blob_store) train_step = PythonScriptStep(script_name=train_entry_point, source_directory=train_source_dir, arguments=[ "--prepped_data", prepared_data.as_input(), "--training_results", training_results ], compute_target=compute_target, runconfig=aml_run_config, allow_reuse=True) # Step 3: Build the pipeline pipeline_steps = [data_prep_step, train_step] pipeline = Pipeline(workspace=workspace, steps=[pipeline_steps]) pipeline_run1 = Experiment(workspace, "mckee_bot").submit(pipeline) pipeline_run1.wait_for_completion()