def main(argv=None): parser = argparse.ArgumentParser(description='ML Analyzer') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--cluster', type=str, help='The name of the cluster to run job.') parser.add_argument('--output', type=str, help='GCS path to use for output.') parser.add_argument('--train', type=str, help='GCS path of the training csv file.') parser.add_argument('--schema', type=str, help='GCS path of the json schema file.') parser.add_argument('--output-dir-uri-output-path', type=str, default='/output.txt', help='Local output path for the file containing the output dir URI.') args = parser.parse_args() code_path = os.path.dirname(os.path.realpath(__file__)) runfile_source = os.path.join(code_path, 'analyze_run.py') dest_files = _utils.copy_resources_to_gcs([runfile_source], args.output) try: api = _utils.get_client() print('Submitting job...') spark_args = ['--output', args.output, '--train', args.train, '--schema', args.schema] job_id = _utils.submit_pyspark_job( api, args.project, args.region, args.cluster, dest_files[0], spark_args) print('Job request submitted. Waiting for completion...') _utils.wait_for_job(api, args.project, args.region, job_id) Path(args.output_dir_uri_output_path).parent.mkdir(parents=True, exist_ok=True) Path(args.output_dir_uri_output_path).write_text(args.output) print('Job completed.') finally: _utils.remove_resources_from_gcs(dest_files)
def main(argv=None): parser = argparse.ArgumentParser(description='Submit PySpark Job') parser.add_argument('--region', type=str, help='The region where the cluster launches.') parser.add_argument('--jobflow_id', type=str, help='The name of the cluster to run job.') parser.add_argument('--job_name', type=str, help='The name of spark job.') parser.add_argument('--py_file', type=str, help='A path to a pyspark file run during the step') parser.add_argument('--input', type=str, help='File path of the dataset.') parser.add_argument('--output', type=str, help='Output path of the result files.') parser.add_argument('--output_file', type=str, help='S3 URI of the training job results.') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting job to %s...', args.jobflow_id) spark_args = [args.input, args.output] step_id = _utils.submit_pyspark_job(client, args.jobflow_id, args.job_name, args.py_file, spark_args) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_job(client, args.jobflow_id, step_id) Path('/output.txt').write_text(unicode(step_id)) Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Path(args.output_file).write_text(unicode(args.output)) logging.info('Job completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='ML DataProc Setup') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone for GCE VMs.') parser.add_argument('--name', type=str, help='The name of the cluster to create.') parser.add_argument('--staging', type=str, help='GCS path to use for staging.') args = parser.parse_args() code_path = os.path.dirname(os.path.realpath(__file__)) dirname = os.path.basename(__file__).split('.')[0] init_file_source = os.path.join(code_path, dirname, 'initialization_actions.sh') dest_files = _utils.copy_resources_to_gcs([init_file_source], args.staging) try: api = _utils.get_client() print('Creating cluster...') create_response = _utils.create_cluster(api, args.project, args.region, args.name, dest_files[0]) print('Cluster creation request submitted. Waiting for completion...') _utils.wait_for_operation(api, create_response['name']) with open('/output.txt', 'w') as f: f.write(args.name) print('Cluster created.') finally: _utils.remove_resources_from_gcs(dest_files)
def main(argv=None): parser = argparse.ArgumentParser( description='SageMaker Hyperparameter Tuning Job') parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.') parser.add_argument('--team_name', type=str.strip, required=True, help='The name of your work team.') parser.add_argument('--description', type=str.strip, required=True, help='A description of the work team.') parser.add_argument( '--user_pool', type=str.strip, required=False, help= 'An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='') parser.add_argument( '--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='') parser.add_argument( '--client_id', type=str.strip, required=False, help= 'An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='') parser.add_argument( '--sns_topic', type=str.strip, required=False, help= 'The ARN for the SNS topic to which notifications should be published.', default='') parser.add_argument( '--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting a create workteam request to SageMaker...') workteam_arn = _utils.create_workteam(client, vars(args)) logging.info('Workteam created.') with open('/tmp/workteam_arn.txt', 'w') as f: f.write(workteam_arn)
def main(argv=None): parser = argparse.ArgumentParser(description='ML DataProc Setup') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone for GCE VMs.') parser.add_argument('--name', type=str, help='The name of the cluster to create.') parser.add_argument('--staging', type=str, help='GCS path to use for staging.') parser.add_argument('--output-dir-uri-output-path', type=str, default='/output.txt', help='Local output path for the file containing the output dir URI.') args = parser.parse_args() code_path = os.path.dirname(os.path.realpath(__file__)) init_file_source = os.path.join(code_path, 'initialization_actions.sh') dest_files = _utils.copy_resources_to_gcs([init_file_source], args.staging) try: api = _utils.get_client() print('Creating cluster...') create_response = _utils.create_cluster(api, args.project, args.region, args.name, dest_files[0]) print('Cluster creation request submitted. Waiting for completion...') _utils.wait_for_operation(api, create_response['name']) Path(args.output_dir_uri_output_path).parent.mkdir(parents=True, exist_ok=True) Path(args.output_dir_uri_output_path).write_text(args.output) print('Cluster created.') finally: _utils.remove_resources_from_gcs(dest_files)
def main(argv=None): parser = create_parser() args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Training Job to SageMaker...') job_name = _utils.create_training_job(client, vars(args)) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_training_job(client, job_name) image = _utils.get_image_from_job(client, job_name) model_artifact_url = _utils.get_model_artifacts_from_job(client, job_name) logging.info('Get model artifacts %s from training job %s.', model_artifact_url, job_name) with open('/tmp/model_artifact_url.txt', 'w') as f: f.write(model_artifact_url) with open('/tmp/job_name.txt', 'w') as f: f.write(job_name) with open('/tmp/training_image.txt', 'w') as f: f.write(image) logging.info('Job completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='ML Trainer') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--cluster', type=str, help='The name of the cluster to run job.') parser.add_argument('--package', type=str, help='GCS Path of XGBoost distributed trainer package.') parser.add_argument('--output', type=str, help='GCS path to use for output.') parser.add_argument('--conf', type=str, help='GCS path of the training json config file.') parser.add_argument('--rounds', type=int, help='Number of rounds to train.') parser.add_argument('--workers', type=int, help='Number of workers to use for training.') parser.add_argument('--train', type=str, help='GCS path of the training libsvm file pattern.') parser.add_argument('--eval', type=str, help='GCS path of the eval libsvm file pattern.') parser.add_argument('--analysis', type=str, help='GCS path of the analysis input.') parser.add_argument('--target', type=str, help='Target column name.') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) api = _utils.get_client() logging.info('Submitting job...') spark_args = [args.conf, str(args.rounds), str(args.workers), args.analysis, args.target, args.train, args.eval, args.output] job_id = _utils.submit_spark_job( api, args.project, args.region, args.cluster, [args.package], 'ml.dmlc.xgboost4j.scala.example.spark.XGBoostTrainer', spark_args) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_job(api, args.project, args.region, job_id) with open('/output.txt', 'w') as f: f.write(args.output) logging.info('Job completed.')
def main(argv=None): parser = create_parser() args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info( 'Submitting HyperParameter Tuning Job request to SageMaker...') hpo_job_name = _utils.create_hyperparameter_tuning_job(client, vars(args)) logging.info( 'HyperParameter Tuning Job request submitted. Waiting for completion...' ) _utils.wait_for_hyperparameter_training_job(client, hpo_job_name) best_job, best_hyperparameters = _utils.get_best_training_job_and_hyperparameters( client, hpo_job_name) model_artifact_url = _utils.get_model_artifacts_from_job(client, best_job) image = _utils.get_image_from_job(client, best_job) logging.info('HyperParameter Tuning Job completed.') with open('/tmp/hpo_job_name.txt', 'w') as f: f.write(hpo_job_name) with open('/tmp/best_job_name.txt', 'w') as f: f.write(best_job) with open('/tmp/best_hyperparameters.txt', 'w') as f: f.write(json.dumps(best_hyperparameters)) with open('/tmp/model_artifact_url.txt', 'w') as f: f.write(model_artifact_url) with open('/tmp/training_image.txt', 'w') as f: f.write(image)
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Training Job') parser.add_argument('--region', type=str, help='The region where the cluster launches.') parser.add_argument( '--image', type=str, help= 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.' ) parser.add_argument('--model_artifact_url', type=str, help='S3 model artifacts url') parser.add_argument('--model_name', type=str, help='The name of the new model.') parser.add_argument( '--role', type=str, help= 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' ) args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting model creation request to SageMaker...') _utils.create_model(client, args.model_artifact_url, args.model_name, args.image, args.role) logging.info('Model creation completed.') with open('/tmp/model_name.txt', 'w') as f: f.write(args.model_name)
def main(argv=None): parser = argparse.ArgumentParser(description='ML Predictor') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--cluster', type=str, help='The name of the cluster to run job.') parser.add_argument( '--package', type=str, help='GCS Path of XGBoost distributed trainer package.') parser.add_argument('--model', type=str, help='GCS path of the model file.') parser.add_argument('--output', type=str, help='GCS path to use for output.') parser.add_argument('--predict', type=str, help='GCS path of prediction libsvm file.') parser.add_argument('--analysis', type=str, help='GCS path of the analysis input.') parser.add_argument('--target', type=str, help='Target column name.') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) api = _utils.get_client() logging.info('Submitting job...') spark_args = [ args.model, args.predict, args.analysis, args.target, args.output ] job_id = _utils.submit_spark_job( api, args.project, args.region, args.cluster, [args.package], 'ml.dmlc.xgboost4j.scala.example.spark.XGBoostPredictor', spark_args) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_job(api, args.project, args.region, job_id) prediction_results = os.path.join(args.output, 'part-*.csv') with open('/output.txt', 'w') as f: f.write(prediction_results) with file_io.FileIO(os.path.join(args.output, 'schema.json'), 'r') as f: schema = json.load(f) metadata = { 'outputs': [{ 'type': 'table', 'storage': 'gcs', 'format': 'csv', 'header': [x['name'] for x in schema], 'source': prediction_results }] } with open('/mlpipeline-ui-metadata.json', 'w') as f: json.dump(metadata, f) logging.info('Job completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job') # Set required=True only if parameter is required in the request and there is no default value for it, where default values are based on default values in SageMaker UI parser.add_argument('--region', type=str, required=True, help='The region where the cluster launches.') parser.add_argument('--job_name', type=str, required=True, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.') parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') parser.add_argument('--image', type=str, required=False, help='The registry path of the Docker image that contains the training algorithm.') parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.') parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], required=True, help='The input mode that the algorithm supports. File or Pipe.', default='File') parser.add_argument('--metric_definitions', type=json.loads, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}') parser.add_argument('--strategy', choices=['Bayesian', 'Random'], required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian') parser.add_argument('--metric_name', type=str, required=True, help='The name of the metric to use for the objective metric.') parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], required=True, help='Whether to minimize or maximize the objective metric.') parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], required=False, help='Whether to minimize or maximize the objective metric.', default='Off') parser.add_argument('--static_parameters', type=json.loads, required=False, help='The values of hyperparameters that do not change for the tuning job.', default='{}') parser.add_argument('--integer_parameters', type=json.loads, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default='[]') parser.add_argument('--continuous_parameters', type=json.loads, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default='[]') parser.add_argument('--categorical_parameters', type=json.loads, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default='[]') parser.add_argument('--channels', type=json.loads, required=True, help='A list of dicts specifying the input channels. Must have at least one.', default='[{}]') parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], required=False, help='The ML compute instance type.', default='ml.m4.xlarge') parser.add_argument('--instance_count', type=str_to_int, required=False, help='The number of ML compute instances to use in each training job.', default=1) parser.add_argument('--volume_size', type=str_to_int, required=False, help='The size of the ML storage volume that you want to provision.', default=1) parser.add_argument('--max_num_jobs', type=str_to_int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.') parser.add_argument('--max_parallel_jobs', type=str_to_int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.') parser.add_argument('--max_run_time', type=str_to_int, required=False, help='The maximum run time in seconds per training job.', default=86400) parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') parser.add_argument('--network_isolation', type=str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument('--traffic_encryption', type=str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False) parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"') parser.add_argument('--parent_hpo_jobs', type=str, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='') parser.add_argument('--tags', type=json.loads, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting HyperParameter Tuning Job request to SageMaker...') hpo_job_name = _utils.create_hyperparameter_tuning_job(client, vars(args)) logging.info('HyperParameter Tuning Job request submitted. Waiting for completion...') _utils.wait_for_hyperparameter_training_job(client, hpo_job_name) best_job, best_hyperparameters = _utils.get_best_training_job_and_hyperparameters(client, hpo_job_name) model_artifact_url = _utils.get_model_artifacts_from_job(client, best_job) logging.info('HyperParameter Tuning Job completed.') with open('/tmp/best_job_name.txt', 'w') as f: f.write(best_job) with open('/tmp/best_hyperparameters.txt', 'w') as f: f.write(json.dumps(best_hyperparameters)) with open('/tmp/model_artifact_url.txt', 'w') as f: f.write(model_artifact_url)
def main(argv=None): parser = argparse.ArgumentParser(description='Shutdown EMR cluster') parser.add_argument('--region', type=str, help='The region where the cluster launches.') parser.add_argument('--jobflow_id', type=str, help='Job flows to be shutdown.') parser.add_argument('--job_id', type=str, help='Job id before cluster termination.') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Tearing down cluster...') _utils.delete_cluster(client, args.jobflow_id) logging.info('Cluster deletion request submitted. Cluster will be shut down in the background')
def main(argv=None): parser = argparse.ArgumentParser(description='Create EMR Cluster') parser.add_argument('--region', type=str, help='EMR Cluster region.') parser.add_argument('--name', type=str, help='The name of the cluster to create.') parser.add_argument( '--release_label', type=str, default="emr-5.23.0", help= 'The Amazon EMR release label, which determines the version of open-source application packages installed on the cluster.' ) parser.add_argument( '--log_s3_uri', type=str, help= 'The path to the Amazon S3 location where logs for this cluster are stored.' ) parser.add_argument( '--instance_type', type=str, default="m4.xlarge", help='The EC2 instance type of master, the core and task nodes.') parser.add_argument('--instance_count', type=int, default=3, help='The number of EC2 instances in the cluster.') parser.add_argument( '--output_location_file', type=str, help= 'File path where the program will write the Amazon S3 URI of the transform job results.' ) args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Creating cluster...') create_response = _utils.create_cluster(client, args.name, args.log_s3_uri, args.release_label, args.instance_type, args.instance_count) logging.info( 'Cluster creation request submitted. Waiting for completion...') _utils.wait_for_cluster(client, create_response['JobFlowId']) Path('/output.txt').write_text(unicode(create_response['JobFlowId'])) logging.info('Cluster created.')
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job') parser.add_argument('--region', type=str.strip, required=True, help='The region where the resources are.') parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') parser.add_argument('--job_name', type=str.strip, required=True, help='The name of the labeling job.') parser.add_argument('--label_attribute_name', type=str.strip, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='') parser.add_argument('--manifest_location', type=str.strip, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.') parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 location to write output data.') parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--task_type', type=str.strip, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.') parser.add_argument('--worker_type', type=str.strip, required=True, help='The workteam for data labeling, either public, private, or vendor.') parser.add_argument('--workteam_arn', type=str.strip, required=False, help='The ARN of the work team assigned to complete the tasks.') parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False') parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False') parser.add_argument('--label_category_config', type=str.strip, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='') parser.add_argument('--max_human_labeled_objects', type=_utils.str_to_int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0) parser.add_argument('--max_percent_objects', type=_utils.str_to_int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0) parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False) parser.add_argument('--initial_model_arn', type=str.strip, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='') parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument('--ui_template', type=str.strip, required=True, help='The Amazon S3 bucket location of the UI template.') parser.add_argument('--pre_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='') parser.add_argument('--post_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='') parser.add_argument('--task_keywords', type=str.strip, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='') parser.add_argument('--title', type=str.strip, required=True, help='A title for the task for your human workers.') parser.add_argument('--description', type=str.strip, required=True, help='A description of the task for your human workers.') parser.add_argument('--num_workers_per_object', type=_utils.str_to_int, required=True, help='The number of human workers that will label an object.') parser.add_argument('--time_limit', type=_utils.str_to_int, required=True, help='The amount of time that a worker has to complete a task in seconds') parser.add_argument('--task_availibility', type=_utils.str_to_int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0) parser.add_argument('--max_concurrent_tasks', type=_utils.str_to_int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0) parser.add_argument('--workforce_task_price', type=_utils.str_to_float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000) parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Ground Truth Job request to SageMaker...') _utils.create_labeling_job(client, vars(args)) logging.info('Ground Truth labeling job request submitted. Waiting for completion...') _utils.wait_for_labeling_job(client, args.job_name) output_manifest, active_learning_model_arn = _utils.get_labeling_job_outputs(client, args.job_name, args.enable_auto_labeling) logging.info('Ground Truth Labeling Job completed.') with open('/tmp/output_manifest_location.txt', 'w') as f: f.write(output_manifest) with open('/tmp/active_learning_model_arn.txt', 'w') as f: f.write(active_learning_model_arn)
def main(argv=None): parser = argparse.ArgumentParser( description='SageMaker Batch Transformation Job') parser.add_argument('--region', type=str, help='The region where the cluster launches.') parser.add_argument( '--model_name', type=str, help='The name of the model that you want to use for the transform job.' ) parser.add_argument( '--input_location', type=str, help= 'The S3 location of the data source that is associated with a channel.' ) parser.add_argument( '--output_location', type=str, help= 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.' ) parser.add_argument( '--output_location_file', type=str, help= 'File path where the program will write the Amazon S3 URI of the transform job results.' ) args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Batch Transformation request to SageMaker...') batch_job_name = _utils.create_transform_job(client, args.model_name, args.input_location, args.output_location) logging.info('Batch Job request submitted. Waiting for completion...') _utils.wait_for_transform_job(client, batch_job_name) _utils.print_tranformation_job_result(args.output_location) Path(args.output_location_file).parent.mkdir(parents=True, exist_ok=True) Path(args.output_location_file).write_text(unicode(args.output_location)) logging.info('Batch Transformation creation completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='ML Analyzer') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--cluster', type=str, help='The name of the cluster to run job.') parser.add_argument('--output', type=str, help='GCS path to use for output.') parser.add_argument('--train', type=str, help='GCS path of the training csv file.') parser.add_argument('--schema', type=str, help='GCS path of the json schema file.') args = parser.parse_args() code_path = os.path.dirname(os.path.realpath(__file__)) dirname = os.path.basename(__file__).split('.')[0] runfile_source = os.path.join(code_path, dirname, 'run.py') dest_files = _utils.copy_resources_to_gcs([runfile_source], args.output) try: api = _utils.get_client() print('Submitting job...') spark_args = [ '--output', args.output, '--train', args.train, '--schema', args.schema ] job_id = _utils.submit_pyspark_job(api, args.project, args.region, args.cluster, dest_files[0], spark_args) print('Job request submitted. Waiting for completion...') _utils.wait_for_job(api, args.project, args.region, job_id) with open('/output.txt', 'w') as f: f.write(args.output) print('Job completed.') finally: _utils.remove_resources_from_gcs(dest_files)
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Training Job') parser.add_argument('--region', type=str, help='The region where the cluster launches.') parser.add_argument('--model_name', type=str, help='The name of the new model.') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Endpoint request to SageMaker...') endpoint_name = _utils.deploy_model(client, args.model_name) with open('/tmp/endpoint_name.txt', 'w') as f: f.write(endpoint_name) logging.info('Endpoint creation completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='Submit Spark Job') parser.add_argument('--region', type=str, help='The region where the cluster launches.') parser.add_argument('--jobflow_id', type=str, help='The name of the cluster to run job.') parser.add_argument('--job_name', type=str, help='The name of spark job.') parser.add_argument('--jar_path', type=str, help='A path to a JAR file run during the step') parser.add_argument( '--main_class', type=str, default=None, help= 'The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file.' ) parser.add_argument('--input', type=str, help='File path of the dataset.') parser.add_argument('--output', type=str, help='Output path of the result files') parser.add_argument('--output_file', type=str, help='S3 URI of the training job results.') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting job...') spark_args = [args.input, args.output] step_id = _utils.submit_spark_job(client, args.jobflow_id, args.job_name, args.jar_path, args.main_class, spark_args) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_job(client, args.jobflow_id, step_id) Path('/output.txt').write_text(unicode(args.step_id)) Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Path(args.output_file).write_text(unicode(args.output)) logging.info('Job completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='ML DataProc Deletion') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--name', type=str, help='The name of the cluster to create.') args = parser.parse_args() api = _utils.get_client() print('Tearing down cluster...') delete_response = _utils.delete_cluster(api, args.project, args.region, args.name) print('Cluster deletion request submitted. Waiting for completion...') _utils.wait_for_operation(api, delete_response['name']) print('Cluster deleted.')
def main(argv=None): parser = argparse.ArgumentParser( description='SageMaker Batch Transformation Job') parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.') parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the transform job.', default='') parser.add_argument( '--model_name', type=str.strip, required=True, help='The name of the model that you want to use for the transform job.' ) parser.add_argument( '--max_concurrent', type=_utils.str_to_int, required=False, help= 'The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0') parser.add_argument('--max_payload', type=_utils.str_to_int, required=False, help='The maximum allowed size of the payload, in MB.', default='6') parser.add_argument( '--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str.strip, required=False, help= 'The number of records to include in a mini-batch for an HTTP inference request.', default='') parser.add_argument( '--environment', type=_utils.str_to_json_dict, required=False, help= 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}') parser.add_argument( '--input_location', type=str.strip, required=True, help= 'The S3 location of the data source that is associated with a channel.' ) parser.add_argument( '--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str.strip, required=False, help= 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix') parser.add_argument( '--content_type', type=str.strip, required=False, help= 'The multipurpose internet mail extension (MIME) type of the data.', default='') parser.add_argument( '--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str.strip, required=False, help= 'The method to use to split the transform job data files into smaller batches.', default='None') parser.add_argument( '--compression_type', choices=['None', 'Gzip', ''], type=str.strip, required=False, help= 'If the transform data is compressed, the specification of the compression type.', default='None') parser.add_argument( '--output_location', type=str.strip, required=True, help= 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.' ) parser.add_argument('--accept', type=str.strip, required=False, help='The MIME type used to specify the output data.') parser.add_argument( '--assemble_with', choices=['None', 'Line', ''], type=str.strip, required=False, help= 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.' ) parser.add_argument( '--output_encryption_key', type=str.strip, required=False, help= 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument( '--input_filter', type=str.strip, required=False, help= 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='') parser.add_argument( '--output_filter', type=str.strip, required=False, help= 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='') parser.add_argument( '--join_source', choices=['None', 'Input', ''], type=str.strip, required=False, help= 'Specifies the source of the data to join with the transformed data.', default='None') parser.add_argument( '--instance_type', choices=[ 'ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge' ], type=str.strip, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge') parser.add_argument( '--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in the transform job.') parser.add_argument( '--resource_encryption_key', type=str.strip, required=False, help= 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument( '--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument( '--output_location_file', type=str.strip, required=True, help= 'File path where the program will write the Amazon S3 URI of the transform job results.' ) args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Batch Transformation request to SageMaker...') batch_job_name = _utils.create_transform_job(client, vars(args)) logging.info('Batch Job request submitted. Waiting for completion...') _utils.wait_for_transform_job(client, batch_job_name) Path(args.output_location_file).parent.mkdir(parents=True, exist_ok=True) Path(args.output_location_file).write_text(unicode(args.output_location)) logging.info('Batch Transformation creation completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Training Job') parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.') parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the new model.') parser.add_argument( '--role', type=str.strip, required=True, help= 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' ) parser.add_argument( '--container_host_name', type=str.strip, required=False, help= 'When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='') parser.add_argument( '--image', type=str.strip, required=False, help= 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='') parser.add_argument( '--model_artifact_url', type=str.strip, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='') parser.add_argument( '--environment', type=_utils.str_to_json_dict, required=False, help= 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}') parser.add_argument( '--model_package', type=str.strip, required=False, help= 'The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='') parser.add_argument( '--secondary_containers', type=_utils.str_to_json_list, required=False, help= 'A list of dicts that specifies the additional containers in the inference pipeline.', default='{}') parser.add_argument( '--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='') parser.add_argument( '--vpc_subnets', type=str.strip, required=False, help= 'The ID of the subnets in the VPC to which you want to connect your hpo job.', default='') parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument( '--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting model creation request to SageMaker...') _utils.create_model(client, vars(args)) logging.info('Model creation completed.') with open('/tmp/model_name.txt', 'w') as f: f.write(args.model_name)
def main(argv=None): parser = argparse.ArgumentParser(description='ML Transfomer') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--cluster', type=str, help='The name of the cluster to run job.') parser.add_argument('--output', type=str, help='GCS path to use for output.') parser.add_argument('--train', type=str, help='GCS path of the training csv file.') parser.add_argument('--eval', type=str, help='GCS path of the eval csv file.') parser.add_argument('--analysis', type=str, help='GCS path of the analysis results.') parser.add_argument('--target', type=str, help='Target column name.') args = parser.parse_args() # Remove existing [output]/train and [output]/eval if they exist. # It should not be done in the run time code because run time code should be portable # to on-prem while we need gsutil here. _utils.delete_directory_from_gcs(os.path.join(args.output, 'train')) _utils.delete_directory_from_gcs(os.path.join(args.output, 'eval')) code_path = os.path.dirname(os.path.realpath(__file__)) dirname = os.path.basename(__file__).split('.')[0] runfile_source = os.path.join(code_path, dirname, 'run.py') dest_files = _utils.copy_resources_to_gcs([runfile_source], args.output) try: api = _utils.get_client() print('Submitting job...') spark_args = [ '--output', args.output, '--analysis', args.analysis, '--target', args.target ] if args.train: spark_args.extend(['--train', args.train]) if args.eval: spark_args.extend(['--eval', args.eval]) job_id = _utils.submit_pyspark_job(api, args.project, args.region, args.cluster, dest_files[0], spark_args) print('Job request submitted. Waiting for completion...') _utils.wait_for_job(api, args.project, args.region, job_id) with open('/output_train.txt', 'w') as f: f.write(os.path.join(args.output, 'train', 'part-*')) with open('/output_eval.txt', 'w') as f: f.write(os.path.join(args.output, 'eval', 'part-*')) print('Job completed.') finally: _utils.remove_resources_from_gcs(dest_files)
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Training Job') parser.add_argument('--region', type=str, help='The region where the training job launches.') parser.add_argument( '--image', type=str, help= 'The registry path of the Docker image that contains the training algorithm.' ) parser.add_argument('--instance_type', type=str, help='The ML compute instance type.') parser.add_argument( '--instance_count', type=int, help= 'The registry path of the Docker image that contains the training algorithm.' ) parser.add_argument( '--volume_size', type=int, help='The size of the ML storage volume that you want to provision.') parser.add_argument( '--dataset_path', type=str, help= 'The S3 location of the data source that is associated with a channel.' ) parser.add_argument( '--model_artifact_path', type=str, help= 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.' ) parser.add_argument( '--role', type=str, help= 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' ) args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Training Job to SageMaker...') job_name = _utils.create_training_job(client, args.image, args.instance_type, args.instance_count, args.volume_size, args.dataset_path, args.model_artifact_path, args.role) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_training_job(client, job_name) model_artifact_url = _utils.get_model_artifacts_from_job(client, job_name) logging.info('Get model artifacts %s from training job %s.', model_artifact_url, job_name) with open('/tmp/model_artifact_url.txt', 'w') as f: f.write(model_artifact_url) with open('/tmp/job_name.txt', 'w') as f: f.write(job_name) logging.info('Job completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Training Job') parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.') parser.add_argument('--endpoint_config_name', type=str.strip, required=False, help='The name of the endpoint configuration.', default='') parser.add_argument('--variant_name_1', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-1') parser.add_argument('--model_name_1', type=str.strip, required=True, help='The model name used for endpoint deployment.') parser.add_argument('--initial_instance_count_1', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument( '--instance_type_1', choices=[ 'ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', '' ], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') parser.add_argument( '--initial_variant_weight_1', type=_utils.str_to_float, required=False, help= 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) parser.add_argument( '--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help= 'The size of the Elastic Inference (EI) instance to use for the production variant.', default='') parser.add_argument('--variant_name_2', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-2') parser.add_argument('--model_name_2', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='') parser.add_argument('--initial_instance_count_2', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument( '--instance_type_2', choices=[ 'ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', '' ], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') parser.add_argument( '--initial_variant_weight_2', type=_utils.str_to_float, required=False, help= 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) parser.add_argument( '--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help= 'The size of the Elastic Inference (EI) instance to use for the production variant.', default='') parser.add_argument('--variant_name_3', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-3') parser.add_argument('--model_name_3', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='') parser.add_argument('--initial_instance_count_3', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument( '--instance_type_3', choices=[ 'ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', '' ], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') parser.add_argument( '--initial_variant_weight_3', type=_utils.str_to_float, required=False, help= 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) parser.add_argument( '--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help= 'The size of the Elastic Inference (EI) instance to use for the production variant.', default='') parser.add_argument( '--resource_encryption_key', type=str.strip, required=False, help= 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument( '--endpoint_config_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') parser.add_argument('--endpoint_name', type=str.strip, required=False, help='The name of the endpoint.', default='') parser.add_argument( '--endpoint_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Endpoint request to SageMaker...') endpoint_name = _utils.deploy_model(client, vars(args)) logging.info( 'Endpoint creation request submitted. Waiting for completion...') _utils.wait_for_endpoint_creation(client, endpoint_name) with open('/tmp/endpoint_name.txt', 'w') as f: f.write(endpoint_name) logging.info('Endpoint creation completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='ML Predictor') parser.add_argument('--project', type=str, help='Google Cloud project ID to use.') parser.add_argument('--region', type=str, help='Which zone to run the analyzer.') parser.add_argument('--cluster', type=str, help='The name of the cluster to run job.') parser.add_argument( '--package', type=str, help='GCS Path of XGBoost distributed trainer package.') parser.add_argument('--model', type=str, help='GCS path of the model file.') parser.add_argument('--output', type=str, help='GCS path to use for output.') parser.add_argument('--predict', type=str, help='GCS path of prediction libsvm file.') parser.add_argument('--analysis', type=str, help='GCS path of the analysis input.') parser.add_argument('--target', type=str, help='Target column name.') parser.add_argument( '--prediction-results-uri-pattern-output-path', type=str, default='/output.txt', help= 'Local output path for the file containing prediction results URI pattern.' ) parser.add_argument( '--ui-metadata-output-path', type=str, default='/mlpipeline-ui-metadata.json', help= 'Local output path for the file containing UI metadata JSON structure.' ) args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) api = _utils.get_client() logging.info('Submitting job...') spark_args = [ args.model, args.predict, args.analysis, args.target, args.output ] job_id = _utils.submit_spark_job( api, args.project, args.region, args.cluster, [args.package], 'ml.dmlc.xgboost4j.scala.example.spark.XGBoostPredictor', spark_args) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_job(api, args.project, args.region, job_id) prediction_results_uri_pattern = os.path.join(args.output, 'part-*.csv') Path(args.prediction_results_uri_pattern_output_path).parent.mkdir( parents=True, exist_ok=True) Path(args.prediction_results_uri_pattern_output_path).write_text( prediction_results_uri_pattern) with file_io.FileIO(os.path.join(args.output, 'schema.json'), 'r') as f: schema = json.load(f) metadata = { 'outputs': [{ 'type': 'table', 'storage': 'gcs', 'format': 'csv', 'header': [x['name'] for x in schema], 'source': prediction_results_uri_pattern }] } Path(args.ui_metadata_output_path).parent.mkdir(parents=True, exist_ok=True) Path(args.ui_metadata_output_path).write_text(json.dumps(metadata)) logging.info('Job completed.')
def main(argv=None): parser = argparse.ArgumentParser(description='SageMaker Training Job') parser.add_argument('--region', type=str.strip, required=True, help='The region where the training job launches.') parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the training job.', default='') parser.add_argument( '--role', type=str.strip, required=True, help= 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' ) parser.add_argument( '--image', type=str.strip, required=True, help= 'The registry path of the Docker image that contains the training algorithm.', default='') parser.add_argument( '--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the training job.', default='') parser.add_argument( '--metric_definitions', type=_utils.str_to_json_dict, required=False, help= 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}') parser.add_argument( '--training_input_mode', choices=['File', 'Pipe'], type=str.strip, help='The input mode that the algorithm supports. File or Pipe.', default='File') parser.add_argument( '--hyperparameters', type=_utils.str_to_json_dict, help='Dictionary of hyperparameters for the the algorithm.', default='{}') parser.add_argument( '--channels', type=_utils.str_to_json_list, required=True, help= 'A list of dicts specifying the input channels. Must have at least one.' ) parser.add_argument( '--instance_type', required=True, choices=[ 'ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge' ], type=str.strip, help='The ML compute instance type.', default='ml.m4.xlarge') parser.add_argument( '--instance_count', required=True, type=_utils.str_to_int, help= 'The registry path of the Docker image that contains the training algorithm.', default=1) parser.add_argument( '--volume_size', type=_utils.str_to_int, required=True, help='The size of the ML storage volume that you want to provision.', default=1) parser.add_argument( '--resource_encryption_key', type=str.strip, required=False, help= 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') parser.add_argument( '--max_run_time', type=_utils.str_to_int, required=True, help='The maximum run time in seconds for the training job.', default=86400) parser.add_argument( '--model_artifact_path', type=str.strip, required=True, help= 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.' ) parser.add_argument( '--output_encryption_key', type=str.strip, required=False, help= 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument( '--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') parser.add_argument( '--vpc_subnets', type=str.strip, required=False, help= 'The ID of the subnets in the VPC to which you want to connect your hpo job.' ) parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument( '--traffic_encryption', type=_utils.str_to_bool, required=False, help= 'Encrypts all communications between ML compute instances in distributed training.', default=False) parser.add_argument( '--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') args = parser.parse_args() logging.getLogger().setLevel(logging.INFO) client = _utils.get_client(args.region) logging.info('Submitting Training Job to SageMaker...') job_name = _utils.create_training_job(client, vars(args)) logging.info('Job request submitted. Waiting for completion...') _utils.wait_for_training_job(client, job_name) image = _utils.get_image_from_job(client, job_name) model_artifact_url = _utils.get_model_artifacts_from_job(client, job_name) logging.info('Get model artifacts %s from training job %s.', model_artifact_url, job_name) with open('/tmp/model_artifact_url.txt', 'w') as f: f.write(model_artifact_url) with open('/tmp/job_name.txt', 'w') as f: f.write(job_name) with open('/tmp/training_image.txt', 'w') as f: f.write(image) logging.info('Job completed.')