def test_stop_tuning_job(sagemaker_session): feature_num = 14 train_input = np.random.rand(1000, feature_num) rcf = RandomCutForest(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session, base_job_name='test-randomcutforest') records = rcf.record_set(train_input) records.distribution = 'FullyReplicated' test_records = rcf.record_set(train_input, channel='test') test_records.distribution = 'FullyReplicated' hyperparameter_ranges = {'num_trees': IntegerParameter(50, 100), 'num_samples_per_tree': IntegerParameter(1, 2)} objective_metric_name = 'test:f1' tuner = HyperparameterTuner(estimator=rcf, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2, max_parallel_jobs=2) tuner.fit([records, test_records]) time.sleep(15) latest_tuning_job_name = tuner.latest_tuning_job.name print('Attempting to stop {}'.format(latest_tuning_job_name)) tuner.stop_tuning_job() desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client\ .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name) assert desc['HyperParameterTuningJobStatus'] == 'Stopping'
def test_stop_tuning_job(sagemaker_session, cpu_instance_type): feature_num = 14 train_input = np.random.rand(1000, feature_num) rcf = RandomCutForest( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session, ) records = rcf.record_set(train_input) records.distribution = "FullyReplicated" test_records = rcf.record_set(train_input, channel="test") test_records.distribution = "FullyReplicated" hyperparameter_ranges = { "num_trees": IntegerParameter(50, 100), "num_samples_per_tree": IntegerParameter(1, 2), } objective_metric_name = "test:f1" tuner = HyperparameterTuner( estimator=rcf, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type="Maximize", max_jobs=2, max_parallel_jobs=2, ) tuning_job_name = unique_name_from_base("test-randomcutforest", max_length=32) tuner.fit([records, test_records], tuning_job_name, wait=False) time.sleep(15) latest_tuning_job_name = tuner.latest_tuning_job.name print("Attempting to stop {}".format(latest_tuning_job_name)) tuner.stop_tuning_job() desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job( HyperParameterTuningJobName=latest_tuning_job_name) assert desc["HyperParameterTuningJobStatus"] == "Stopping"
def test_rcf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): # Generate a thousand 14-dimensional datapoints. feature_num = 14 train_input = np.random.rand(1000, feature_num) rcf = RandomCutForest( role=ROLE, train_instance_count=SINGLE_INSTANCE_COUNT, train_instance_type=cpu_instance_type, num_trees=50, num_samples_per_tree=20, eval_metrics=["accuracy", "precision_recall_fscore"], sagemaker_session=sagemaker_session, ) records = rcf.record_set(train_input) training_config = _build_airflow_workflow( estimator=rcf, instance_type=cpu_instance_type, inputs=records ) _assert_that_s3_url_contains_data( sagemaker_session, training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"], )
def test_randomcutforest(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("randomcutforest") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): # Generate a thousand 14-dimensional datapoints. feature_num = 14 train_input = np.random.rand(1000, feature_num) rcf = RandomCutForest( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_trees=50, num_samples_per_tree=20, eval_metrics=["accuracy", "precision_recall_fscore"], sagemaker_session=sagemaker_session, ) rcf.fit(records=rcf.record_set(train_input), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): model = RandomCutForestModel(rcf.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["score"] is not None assert len(record.label["score"].float32_tensor.values) == 1
def test_randomcutforest(sagemaker_session): with timeout(minutes=15): # Generate a thousand 14-dimensional datapoints. feature_num = 14 train_input = np.random.rand(1000, feature_num) rcf = RandomCutForest(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session, base_job_name='test-randomcutforest') rcf.fit(rcf.record_set(train_input)) endpoint_name = name_from_base('randomcutforest') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): model = RandomCutForestModel(rcf.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["score"] is not None assert len(record.label["score"].float32_tensor.values) == 1
def create_model(training_data, s3_bucket): session = sagemaker.Session() endpoints = sagemaker_client.list_endpoints()["Endpoints"] endpoint_exists = False existing_endpoint_name = "" for e in endpoints: if e["EndpointName"].startswith("randomcutforest"): existing_endpoint_name = e["EndpointName"] endpoint_exists = True break rcf = RandomCutForest( role=ROLE_ARN, instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, data_location="s3://{}/{}/".format(s3_bucket, INPUT_DATA_PREFIX), output_path="s3://{}/{}/output".format(s3_bucket, MODEL_OUTPUT_PREFIX), num_samples_per_tree=SAMPLES_PER_TREE, num_trees=NUM_OF_TREES, ) numpy_data = training_data.to_numpy() record_set = rcf.record_set(numpy_data, channel="train", encrypt=False) rcf.fit(record_set) if endpoint_exists: response = update_model(rcf, existing_endpoint_name) else: response = deploy_model(rcf) return response
def inference(self, **kwargs): self.bucket = kwargs.get('bucket') self.prefix = kwargs.get('prefix') self.execution_role = kwargs.get('execution_role') self.instance_type = kwargs.get('instance_type') self.aws_access_key_id = kwargs.get('aws_access_key_id') self.aws_secret_access_key = kwargs.get('aws_secret_access_key') self.region_name = kwargs.get('region_name') print("//////boto3 session generating") boto_session = boto3.Session( aws_access_key_id = self.aws_access_key_id, aws_secret_access_key = self.aws_secret_access_key, region_name = self.region_name ) # check if the bucket exists print("\n//////check if the bucket exists") try: boto_session.client('s3').head_bucket(Bucket=self.bucket) except botocore.exceptions.ParamValidationError as e: print('Hey! You either forgot to specify your S3 bucket' ' or you gave your bucket an invalid name!') except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == '403': print("Hey! You don't have permission to access the bucket, {}.".format(self.bucket)) elif e.response['Error']['Code'] == '404': print("Hey! Your bucket, {}, doesn't exist!".format(self.bucket)) else: raise else: print('Training input/output will be stored in: s3://{}/{}'.format(self.bucket, self.prefix)) print("\n//////define sagemaker session") sg_session = sagemaker.Session(boto_session) print("\n//////define rcf model") # specify general training job information rcf = RandomCutForest(role=self.execution_role, train_instance_count=1, train_instance_type=self.instance_type, data_location='s3://{}/{}/'.format(self.bucket, self.prefix), output_path='s3://{}/{}/output'.format(self.bucket, self.prefix), num_samples_per_tree=512, num_trees=50, sagemaker_session = sg_session) print("\n//////fitting rcf model") # automatically upload the training data to S3 and run the training job rcf.fit(rcf.record_set(self.df.value.as_matrix().reshape(-1,1))) print("\n//////infer the virtual data") rcf_inference = rcf.deploy( initial_instance_count=1, instance_type=self.instance_type, ) print("\n//////serialize the output data") rcf_inference.content_type = 'text/csv' rcf_inference.serializer = csv_serializer rcf_inference.accept = 'application/json' rcf_inference.deserializer = json_deserializer df_numpy = self.df.value.as_matrix().reshape(-1,1) self.results = rcf_inference.predict(df_numpy)