コード例 #1
0
def test_stop_tuning_job(sagemaker_session):
    feature_num = 14
    train_input = np.random.rand(1000, feature_num)

    rcf = RandomCutForest(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge',
                          num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session,
                          base_job_name='test-randomcutforest')

    records = rcf.record_set(train_input)
    records.distribution = 'FullyReplicated'

    test_records = rcf.record_set(train_input, channel='test')
    test_records.distribution = 'FullyReplicated'

    hyperparameter_ranges = {'num_trees': IntegerParameter(50, 100),
                             'num_samples_per_tree': IntegerParameter(1, 2)}

    objective_metric_name = 'test:f1'
    tuner = HyperparameterTuner(estimator=rcf, objective_metric_name=objective_metric_name,
                                hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2,
                                max_parallel_jobs=2)

    tuner.fit([records, test_records])

    time.sleep(15)

    latest_tuning_job_name = tuner.latest_tuning_job.name

    print('Attempting to stop {}'.format(latest_tuning_job_name))

    tuner.stop_tuning_job()

    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client\
        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name)
    assert desc['HyperParameterTuningJobStatus'] == 'Stopping'
コード例 #2
0
def test_stop_tuning_job(sagemaker_session):
    feature_num = 14
    train_input = np.random.rand(1000, feature_num)

    rcf = RandomCutForest(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge',
                          num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session,
                          base_job_name='test-randomcutforest')

    records = rcf.record_set(train_input)
    records.distribution = 'FullyReplicated'

    test_records = rcf.record_set(train_input, channel='test')
    test_records.distribution = 'FullyReplicated'

    hyperparameter_ranges = {'num_trees': IntegerParameter(50, 100),
                             'num_samples_per_tree': IntegerParameter(1, 2)}

    objective_metric_name = 'test:f1'
    tuner = HyperparameterTuner(estimator=rcf, objective_metric_name=objective_metric_name,
                                hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2,
                                max_parallel_jobs=2)

    tuner.fit([records, test_records])

    time.sleep(15)

    latest_tuning_job_name = tuner.latest_tuning_job.name

    print('Attempting to stop {}'.format(latest_tuning_job_name))

    tuner.stop_tuning_job()

    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client\
        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name)
    assert desc['HyperParameterTuningJobStatus'] == 'Stopping'
コード例 #3
0
def test_stop_tuning_job(sagemaker_session, cpu_instance_type):
    feature_num = 14
    train_input = np.random.rand(1000, feature_num)

    rcf = RandomCutForest(
        role="SageMakerRole",
        instance_count=1,
        instance_type=cpu_instance_type,
        num_trees=50,
        num_samples_per_tree=20,
        sagemaker_session=sagemaker_session,
    )

    records = rcf.record_set(train_input)
    records.distribution = "FullyReplicated"

    test_records = rcf.record_set(train_input, channel="test")
    test_records.distribution = "FullyReplicated"

    hyperparameter_ranges = {
        "num_trees": IntegerParameter(50, 100),
        "num_samples_per_tree": IntegerParameter(1, 2),
    }

    objective_metric_name = "test:f1"
    tuner = HyperparameterTuner(
        estimator=rcf,
        objective_metric_name=objective_metric_name,
        hyperparameter_ranges=hyperparameter_ranges,
        objective_type="Maximize",
        max_jobs=2,
        max_parallel_jobs=2,
    )

    tuning_job_name = unique_name_from_base("test-randomcutforest",
                                            max_length=32)
    tuner.fit([records, test_records], tuning_job_name, wait=False)

    time.sleep(15)

    latest_tuning_job_name = tuner.latest_tuning_job.name

    print("Attempting to stop {}".format(latest_tuning_job_name))

    tuner.stop_tuning_job()

    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=latest_tuning_job_name)
    assert desc["HyperParameterTuningJobStatus"] == "Stopping"
コード例 #4
0
def test_rcf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        # Generate a thousand 14-dimensional datapoints.
        feature_num = 14
        train_input = np.random.rand(1000, feature_num)

        rcf = RandomCutForest(
            role=ROLE,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            num_trees=50,
            num_samples_per_tree=20,
            eval_metrics=["accuracy", "precision_recall_fscore"],
            sagemaker_session=sagemaker_session,
        )

        records = rcf.record_set(train_input)

        training_config = _build_airflow_workflow(
            estimator=rcf, instance_type=cpu_instance_type, inputs=records
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"],
        )
コード例 #5
0
def test_randomcutforest(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("randomcutforest")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        # Generate a thousand 14-dimensional datapoints.
        feature_num = 14
        train_input = np.random.rand(1000, feature_num)

        rcf = RandomCutForest(
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            num_trees=50,
            num_samples_per_tree=20,
            eval_metrics=["accuracy", "precision_recall_fscore"],
            sagemaker_session=sagemaker_session,
        )

        rcf.fit(records=rcf.record_set(train_input), job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = RandomCutForestModel(rcf.model_data,
                                     role="SageMakerRole",
                                     sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)

        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["score"] is not None
            assert len(record.label["score"].float32_tensor.values) == 1
コード例 #6
0
def test_randomcutforest(sagemaker_session):
    with timeout(minutes=15):
        # Generate a thousand 14-dimensional datapoints.
        feature_num = 14
        train_input = np.random.rand(1000, feature_num)

        rcf = RandomCutForest(role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              num_trees=50,
                              num_samples_per_tree=20,
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-randomcutforest')

        rcf.fit(rcf.record_set(train_input))

    endpoint_name = name_from_base('randomcutforest')
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):
        model = RandomCutForestModel(rcf.model_data,
                                     role='SageMakerRole',
                                     sagemaker_session=sagemaker_session)
        predictor = model.deploy(1,
                                 'ml.c4.xlarge',
                                 endpoint_name=endpoint_name)

        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["score"] is not None
            assert len(record.label["score"].float32_tensor.values) == 1
コード例 #7
0
def create_model(training_data, s3_bucket):
    session = sagemaker.Session()
    endpoints = sagemaker_client.list_endpoints()["Endpoints"]
    endpoint_exists = False
    existing_endpoint_name = ""
    for e in endpoints:
        if e["EndpointName"].startswith("randomcutforest"):
            existing_endpoint_name = e["EndpointName"]
            endpoint_exists = True
            break
    rcf = RandomCutForest(
        role=ROLE_ARN,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        data_location="s3://{}/{}/".format(s3_bucket, INPUT_DATA_PREFIX),
        output_path="s3://{}/{}/output".format(s3_bucket, MODEL_OUTPUT_PREFIX),
        num_samples_per_tree=SAMPLES_PER_TREE,
        num_trees=NUM_OF_TREES,
    )
    numpy_data = training_data.to_numpy()
    record_set = rcf.record_set(numpy_data, channel="train", encrypt=False)
    rcf.fit(record_set)
    if endpoint_exists:
        response = update_model(rcf, existing_endpoint_name)
    else:
        response = deploy_model(rcf)
    return response
コード例 #8
0
def test_randomcutforest(sagemaker_session):
    with timeout(minutes=15):
        # Generate a thousand 14-dimensional datapoints.
        feature_num = 14
        train_input = np.random.rand(1000, feature_num)

        rcf = RandomCutForest(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge',
                              num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session,
                              base_job_name='test-randomcutforest')

        rcf.fit(rcf.record_set(train_input))

    endpoint_name = name_from_base('randomcutforest')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20):
        model = RandomCutForestModel(rcf.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)

        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["score"] is not None
            assert len(record.label["score"].float32_tensor.values) == 1
コード例 #9
0
ファイル: aws.py プロジェクト: luvforsamoyed/AI-API-optimizer
    def inference(self, **kwargs):

        self.bucket = kwargs.get('bucket')
        self.prefix = kwargs.get('prefix')
        self.execution_role = kwargs.get('execution_role')
        self.instance_type = kwargs.get('instance_type')
        self.aws_access_key_id = kwargs.get('aws_access_key_id')
        self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
        self.region_name = kwargs.get('region_name')
        
        
        print("//////boto3 session generating")
        boto_session = boto3.Session(
            aws_access_key_id = self.aws_access_key_id,
            aws_secret_access_key = self.aws_secret_access_key,
            region_name = self.region_name
        )

        # check if the bucket exists
        print("\n//////check if the bucket exists")    
        try:
            boto_session.client('s3').head_bucket(Bucket=self.bucket)
        except botocore.exceptions.ParamValidationError as e:
            print('Hey! You either forgot to specify your S3 bucket'
                  ' or you gave your bucket an invalid name!')
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == '403':
                print("Hey! You don't have permission to access the bucket, {}.".format(self.bucket))
            elif e.response['Error']['Code'] == '404':
                print("Hey! Your bucket, {}, doesn't exist!".format(self.bucket))
            else:
                raise
        else:
            print('Training input/output will be stored in: s3://{}/{}'.format(self.bucket, self.prefix))

        print("\n//////define sagemaker session")    
        sg_session = sagemaker.Session(boto_session)

        print("\n//////define rcf model")    
        # specify general training job information
        rcf = RandomCutForest(role=self.execution_role,
                              train_instance_count=1,
                              train_instance_type=self.instance_type,
                              data_location='s3://{}/{}/'.format(self.bucket, self.prefix),
                              output_path='s3://{}/{}/output'.format(self.bucket, self.prefix),
                              num_samples_per_tree=512,
                              num_trees=50,
                              sagemaker_session = sg_session)

        print("\n//////fitting rcf model")    
        # automatically upload the training data to S3 and run the training job
        rcf.fit(rcf.record_set(self.df.value.as_matrix().reshape(-1,1)))

        print("\n//////infer the virtual data")    
        rcf_inference = rcf.deploy(
            initial_instance_count=1,
            instance_type=self.instance_type,
        )

        print("\n//////serialize the output data")    
        rcf_inference.content_type = 'text/csv'
        rcf_inference.serializer = csv_serializer
        rcf_inference.accept = 'application/json'
        rcf_inference.deserializer = json_deserializer

        df_numpy = self.df.value.as_matrix().reshape(-1,1)
        self.results = rcf_inference.predict(df_numpy)