Beispiel #1
0
def main(argv=None):
  parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job')
  parser.add_argument('--region', type=str.strip, required=True, help='The region where the resources are.')
  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
  parser.add_argument('--job_name', type=str.strip, required=True, help='The name of the labeling job.')
  parser.add_argument('--label_attribute_name', type=str.strip, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='')
  parser.add_argument('--manifest_location', type=str.strip, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.')
  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 location to write output data.')
  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
  parser.add_argument('--task_type', type=str.strip, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.')
  parser.add_argument('--worker_type', type=str.strip, required=True, help='The workteam for data labeling, either public, private, or vendor.')
  parser.add_argument('--workteam_arn', type=str.strip, required=False, help='The ARN of the work team assigned to complete the tasks.')
  parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False')
  parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False')
  parser.add_argument('--label_category_config', type=str.strip, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='')
  parser.add_argument('--max_human_labeled_objects', type=_utils.str_to_int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0)
  parser.add_argument('--max_percent_objects', type=_utils.str_to_int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0)
  parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False)
  parser.add_argument('--initial_model_arn', type=str.strip, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='')
  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
  parser.add_argument('--ui_template', type=str.strip, required=True, help='The Amazon S3 bucket location of the UI template.')
  parser.add_argument('--pre_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='')
  parser.add_argument('--post_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='')
  parser.add_argument('--task_keywords', type=str.strip, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='')
  parser.add_argument('--title', type=str.strip, required=True, help='A title for the task for your human workers.')
  parser.add_argument('--description', type=str.strip, required=True, help='A description of the task for your human workers.')
  parser.add_argument('--num_workers_per_object', type=_utils.str_to_int, required=True, help='The number of human workers that will label an object.')
  parser.add_argument('--time_limit', type=_utils.str_to_int, required=True, help='The amount of time that a worker has to complete a task in seconds')
  parser.add_argument('--task_availibility', type=_utils.str_to_int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0)
  parser.add_argument('--max_concurrent_tasks', type=_utils.str_to_int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0)
  parser.add_argument('--workforce_task_price', type=_utils.str_to_float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000)
  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')

  args = parser.parse_args()

  logging.getLogger().setLevel(logging.INFO)
  client = _utils.get_client(args.region)
  logging.info('Submitting Ground Truth Job request to SageMaker...')
  _utils.create_labeling_job(client, vars(args))
  logging.info('Ground Truth labeling job request submitted. Waiting for completion...')
  _utils.wait_for_labeling_job(client, args.job_name)
  output_manifest, active_learning_model_arn = _utils.get_labeling_job_outputs(client, args.job_name, args.enable_auto_labeling)

  logging.info('Ground Truth Labeling Job completed.')

  with open('/tmp/output_manifest_location.txt', 'w') as f:
    f.write(output_manifest)
  with open('/tmp/active_learning_model_arn.txt', 'w') as f:
    f.write(active_learning_model_arn)
Beispiel #2
0
def main(argv=None):
  parser = create_parser()
  args = parser.parse_args()

  logging.getLogger().setLevel(logging.INFO)
  client = _utils.get_sagemaker_client(args.region, args.endpoint_url)
  logging.info('Submitting Ground Truth Job request to SageMaker...')
  _utils.create_labeling_job(client, vars(args))
  logging.info('Ground Truth labeling job request submitted. Waiting for completion...')
  _utils.wait_for_labeling_job(client, args.job_name)
  output_manifest, active_learning_model_arn = _utils.get_labeling_job_outputs(client, args.job_name, args.enable_auto_labeling)

  logging.info('Ground Truth Labeling Job completed.')

  with open('/tmp/output_manifest_location.txt', 'w') as f:
    f.write(output_manifest)
  with open('/tmp/active_learning_model_arn.txt', 'w') as f:
    f.write(active_learning_model_arn)
def main(argv=None):
  parser = create_parser()
  args = parser.parse_args(argv)

  logging.getLogger().setLevel(logging.INFO)
  client = _utils.get_sagemaker_client(args.region, args.endpoint_url, assume_role_arn=args.assume_role)
  logging.info('Submitting Ground Truth Job request to SageMaker...')
  _utils.create_labeling_job(client, vars(args))

  def signal_term_handler(signalNumber, frame):
    _utils.stop_labeling_job(client, args.job_name)
    logging.info(f"Ground Truth labeling job: {args.job_name} request submitted to Stop")
  signal.signal(signal.SIGTERM, signal_term_handler)

  logging.info('Ground Truth labeling job request submitted. Waiting for completion...')
  _utils.wait_for_labeling_job(client, args.job_name)
  output_manifest, active_learning_model_arn = _utils.get_labeling_job_outputs(client, args.job_name, args.enable_auto_labeling)

  logging.info('Ground Truth Labeling Job completed.')

  _utils.write_output(args.output_manifest_location_output_path, output_manifest)
  _utils.write_output(args.active_learning_model_arn_output_path, active_learning_model_arn)
  def test_ground_truth(self):
    mock_client = MagicMock()
    mock_args = self.parser.parse_args(required_args)
    response = _utils.create_labeling_job(mock_client, vars(mock_args))

    mock_client.create_labeling_job.assert_called_once_with(
      HumanTaskConfig={'WorkteamArn': None, 'UiConfig': {'UiTemplateS3Uri': 's3://fake-bucket/ui_template'},
                       'PreHumanTaskLambdaArn': '', 'TaskTitle': 'fake-image-labelling-work',
                       'TaskDescription': 'fake job', 'NumberOfHumanWorkersPerDataObject': 1,
                       'TaskTimeLimitInSeconds': 180,
                       'AnnotationConsolidationConfig': {'AnnotationConsolidationLambdaArn': ''}},
      InputConfig={'DataSource': {'S3DataSource': {'ManifestS3Uri': 's3://fake-bucket/manifest'}}},
      LabelAttributeName='test_job', LabelingJobName='test_job',
      OutputConfig={'S3OutputPath': 's3://fake-bucket/output', 'KmsKeyId': ''},
      RoleArn='arn:aws:iam::123456789012:user/Development/product_1234/*', Tags=[]
    )

    self.assertEqual(response, 'test_job')