print("SDK version:", azureml.core.VERSION) dataset_name = 'grib-dataset' ws = Workspace.from_config() print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n') datastore = ws.get_default_datastore() input_ds = Dataset.get_by_name(ws, dataset_name) batch_data = DatasetConsumptionConfig("batch_dataset", input_ds, mode='mount') output_dir = PipelineData(name='batch_output', datastore=datastore) parallel_run_config = ParallelRunConfig.load_yaml(workspace=ws, path='convert_parallel.yml') batch_step = ParallelRunStep(name="batch-conversion-step", parallel_run_config=parallel_run_config, arguments=['--data_output_path', output_dir], inputs=[batch_data], output=output_dir, allow_reuse=False) steps = [batch_step] pipeline = Pipeline(workspace=ws, steps=steps) pipeline.validate() pipeline_run = Experiment(ws, 'convert-batch-pipeline').submit(pipeline) pipeline_run.wait_for_completion()
parser.add_argument("--runconfig", type=str, help="Path to the parallel runconfig for pipeline", dest="runconfig", required=True) args = parser.parse_args() print(f'Arguments: {args}') print('Connecting to workspace') ws = Workspace.from_config() print( f'WS name: {ws.name}\nRegion: {ws.location}\nSubscription id: {ws.subscription_id}\nResource group: {ws.resource_group}' ) print('Loading parallel runconfig for pipeline') parallel_run_config = ParallelRunConfig.load_yaml(workspace=ws, path=args.runconfig) print('Loading default batch dataset') batch_dataset = Dataset.get_by_name(ws, args.dataset) # Parametrize dataset input and dataset output name (batch scoring result) to the pipeline batch_dataset_parameter = PipelineParameter(name="batch_dataset", default_value=batch_dataset) batch_dataset_consumption = DatasetConsumptionConfig( "batch_dataset", batch_dataset_parameter).as_mount() datastore = ws.get_default_datastore() output_dataset_name = "batch_scoring_results" # Existing, GA-code - does not allow to specify the path on the datastore # output_dataset = PipelineData(name='batch_output', datastore=datastore).as_dataset()