예제 #1
0
def xgboost_model():
    resource_name = random_suffix_name("xgboost-model", 32)

    replacements = REPLACEMENT_VALUES.copy()
    replacements["MODEL_NAME"] = resource_name

    model = load_resource_file(SERVICE_NAME,
                               "xgboost_model",
                               additional_replacements=replacements)
    logging.debug(model)

    # Create the k8s resource
    reference = k8s.CustomResourceReference(CRD_GROUP,
                                            CRD_VERSION,
                                            RESOURCE_PLURAL,
                                            resource_name,
                                            namespace="default")
    resource = k8s.create_custom_resource(reference, model)
    resource = k8s.wait_resource_consumed_by_controller(reference)

    assert resource is not None

    yield (reference, resource)

    # Delete the k8s resource if not already deleted by tests
    try:
        k8s.delete_custom_resource(reference)
    except:
        pass
def kmeans_processing_job():
    resource_name = random_suffix_name("kmeans-processingjob", 32)

    replacements = REPLACEMENT_VALUES.copy()
    replacements["PROCESSING_JOB_NAME"] = resource_name

    processing_job = load_resource_file(SERVICE_NAME,
                                        "kmeans_processingjob",
                                        additional_replacements=replacements)
    logging.debug(processing_job)

    # Create the k8s resource
    reference = k8s.CustomResourceReference(CRD_GROUP,
                                            CRD_VERSION,
                                            RESOURCE_PLURAL,
                                            resource_name,
                                            namespace="default")
    resource = k8s.create_custom_resource(reference, processing_job)
    resource = k8s.wait_resource_consumed_by_controller(reference)

    assert resource is not None

    yield (reference, resource)

    # Delete the k8s resource if not already deleted by tests
    try:
        k8s.delete_custom_resource(reference)
    except:
        pass
예제 #3
0
    def test_create_delete_non_public(self, amq_client):
        resource_name = "my-rabbit-broker-non-public"

        replacements = REPLACEMENT_VALUES.copy()
        replacements["BROKER_NAME"] = resource_name

        resource_data = load_resource_file(
            SERVICE_NAME,
            "broker_rabbitmq_non_public",
            additional_replacements=replacements,
        )
        logging.error(resource_data)

        # Create the k8s resource
        ref = k8s.CustomResourceReference(
            CRD_GROUP,
            CRD_VERSION,
            RESOURCE_PLURAL,
            resource_name,
            namespace="default",
        )
        k8s.create_custom_resource(ref, resource_data)
        cr = k8s.wait_resource_consumed_by_controller(ref)

        assert cr is not None
        assert k8s.get_resource_exists(ref)

        broker_id = cr['status']['brokerID']

        # Let's check that the Broker appears in AmazonMQ
        aws_res = amq_client.describe_broker(BrokerId=broker_id)
        assert aws_res is not None

        now = datetime.datetime.now()
        timeout = now + datetime.timedelta(seconds=CREATE_TIMEOUT_SECONDS)

        # TODO(jaypipes): Move this into generic AWS-side waiter
        while aws_res['BrokerState'] != "RUNNING":
            if datetime.datetime.now() >= timeout:
                raise Exception("failed to find running Broker before timeout")
            time.sleep(CREATE_INTERVAL_SLEEP_SECONDS)
            aws_res = amq_client.describe_broker(BrokerId=broker_id)
            assert aws_res is not None

        # Delete the k8s resource on teardown of the module
        k8s.delete_custom_resource(ref)

        time.sleep(DELETE_WAIT_AFTER_SECONDS)

        # Broker should no longer appear in AmazonMQ
        res_found = False
        try:
            amq_client.describe_broker(BrokerId=broker_id)
            res_found = True
        except amq_client.exceptions.NotFoundException:
            pass

        assert res_found is False
예제 #4
0
def xgboost_hpojob():
    hpo_job, data = _make_hpojob()
    resource = k8s.create_custom_resource(hpo_job, data)
    resource = k8s.wait_resource_consumed_by_controller(hpo_job)

    yield (hpo_job, resource)

    if k8s.get_resource_exists(hpo_job):
        k8s.delete_custom_resource(hpo_job)
def xgboost_transformjob(sagemaker_client):
    #Create model using boto3 for TransformJob
    transform_model_file = "s3://{d}/sagemaker/batch/model.tar.gz".format(
        d=get_bootstrap_resources().DataBucketName)
    model_name = random_suffix_name("xgboost-model", 32)

    create_response = sagemaker_client.create_model(
        ModelName=model_name,
        PrimaryContainer={
            'Image': REPLACEMENT_VALUES["XGBOOST_IMAGE_URI"],
            'ModelDataUrl': transform_model_file,
            'Environment': {}
        },
        ExecutionRoleArn=REPLACEMENT_VALUES["SAGEMAKER_EXECUTION_ROLE_ARN"])
    logging.debug(create_response)

    #Check if the model is created successfully
    describe_model_response = sagemaker_client.describe_model(
        ModelName=model_name)
    assert describe_model_response["ModelName"] is not None

    resource_name = random_suffix_name("xgboost-transformjob", 32)

    #Use the model created above
    replacements = REPLACEMENT_VALUES.copy()
    replacements["MODEL_NAME"] = model_name
    replacements["TRANSFORM_JOB_NAME"] = resource_name

    transformjob = load_resource_file(SERVICE_NAME,
                                      "xgboost_transformjob",
                                      additional_replacements=replacements)
    logging.debug(transformjob)

    # Create the k8s resource
    reference = k8s.CustomResourceReference(CRD_GROUP,
                                            CRD_VERSION,
                                            RESOURCE_PLURAL,
                                            resource_name,
                                            namespace="default")
    resource = k8s.create_custom_resource(reference, transformjob)
    resource = k8s.wait_resource_consumed_by_controller(reference)

    assert resource is not None

    yield (reference, resource)

    try:
        # Delete the k8s resource if not already deleted by tests
        k8s.delete_custom_resource(reference)
    except:
        pass

    try:
        # Delete the model created
        sagemaker_client.delete_model(ModelName=model_name)
    except:
        pass
예제 #6
0
def rg_auth_token(make_rg_name, make_replication_group, rg_deletion_waiter, first_secret, second_secret):
    input_dict = {
        "RG_ID": make_rg_name("rg-auth-token"),
        "NAME": "first",
        "KEY": "secret1"
    }
    (reference, resource) = make_replication_group("replicationgroup_authtoken", input_dict, input_dict["RG_ID"])
    yield (reference, resource)
    k8s.delete_custom_resource(reference)
    sleep(DEFAULT_WAIT_SECS)
    rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"]) #throws exception if wait fails
예제 #7
0
def single_variant_config():
    config_resource_name = random_suffix_name("single-variant-config", 32)
    model_resource_name = config_resource_name + "-model"

    replacements = REPLACEMENT_VALUES.copy()
    replacements["CONFIG_NAME"] = config_resource_name
    replacements["MODEL_NAME"] = model_resource_name

    model = load_resource_file(SERVICE_NAME,
                               "xgboost_model",
                               additional_replacements=replacements)
    logging.debug(model)

    config = load_resource_file(
        SERVICE_NAME,
        "endpoint_config_single_variant",
        additional_replacements=replacements,
    )
    logging.debug(config)

    # Create the k8s resources
    model_reference = k8s.CustomResourceReference(
        CRD_GROUP,
        CRD_VERSION,
        MODEL_RESOURCE_PLURAL,
        model_resource_name,
        namespace="default",
    )
    model_resource = k8s.create_custom_resource(model_reference, model)
    model_resource = k8s.wait_resource_consumed_by_controller(model_reference)
    assert model_resource is not None

    config_reference = k8s.CustomResourceReference(
        CRD_GROUP,
        CRD_VERSION,
        CONFIG_RESOURCE_PLURAL,
        config_resource_name,
        namespace="default",
    )
    config_resource = k8s.create_custom_resource(config_reference, config)
    config_resource = k8s.wait_resource_consumed_by_controller(
        config_reference)
    assert config_resource is not None

    yield (config_reference, config_resource)

    # Delete the k8s resource if not already deleted by tests
    try:
        k8s.delete_custom_resource(model_reference)
        k8s.delete_custom_resource(config_reference)
    except:
        pass
예제 #8
0
def rg_largecluster(rg_largecluster_input, make_replication_group,
                    rg_deletion_waiter):
    input_dict = rg_largecluster_input

    (reference,
     resource) = make_replication_group("replicationgroup_largecluster",
                                        input_dict, input_dict["RG_ID"])
    yield (reference, resource)

    # teardown
    k8s.delete_custom_resource(reference)
    sleep(DEFAULT_WAIT_SECS)
    rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"])
예제 #9
0
def rg_cmd_fromsnapshot(bootstrap_resources, make_rg_name, make_replication_group, rg_deletion_waiter):
    input_dict = {
        "RG_ID": make_rg_name("rg-cmd-fromsnapshot"),
        "SNAPSHOT_NAME": bootstrap_resources.SnapshotName
    }

    (reference, resource) = make_replication_group("replicationgroup_cmd_fromsnapshot", input_dict, input_dict["RG_ID"])
    yield (reference, resource)

    # teardown
    k8s.delete_custom_resource(reference)
    sleep(DEFAULT_WAIT_SECS)
    rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"])
예제 #10
0
def rg_input_coverage(bootstrap_resources, make_rg_name, make_replication_group, rg_deletion_waiter):
    input_dict = {
        "RG_ID": make_rg_name("rg-input-coverage"),
        "KMS_KEY_ID": bootstrap_resources.KmsKeyID,
        "SNS_TOPIC_ARN": bootstrap_resources.SnsTopicARN,
        "SG_ID": bootstrap_resources.SecurityGroupID,
        "USERGROUP_ID": bootstrap_resources.UserGroupID
    }

    (reference, resource) = make_replication_group("replicationgroup_input_coverage", input_dict, input_dict["RG_ID"])
    yield (reference, resource)

    # teardown
    k8s.delete_custom_resource(reference)
    sleep(DEFAULT_WAIT_SECS)
    rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"]) #throws exception if wait fails
    def test_trainingjob_has_stopped_status(self, sagemaker_client,
                                            xgboost_trainingjob):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource['spec'].get('trainingJobName', None)

        assert trainingjob_name is not None

        # Delete the k8s resource.
        k8s.delete_custom_resource(reference)
        # TODO: This sleep could be replaced by a wait loop but this is sufficient for now.
        time.sleep(5)

        current_trainingjob_status = self._get_sagemaker_trainingjob_status(
            sagemaker_client, trainingjob_name)
        expected_trainingjob_status_list = self._get_stopped_trainingjob_status_list(
        )
        assert current_trainingjob_status in expected_trainingjob_status_list
예제 #12
0
    def test_config_is_deleted(self, sagemaker_client, single_variant_config):
        (reference, _) = single_variant_config
        resource = k8s.get_resource(reference)
        config_name = resource["spec"].get("endpointConfigName", None)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert (self._get_sagemaker_endpoint_config_arn(
            sagemaker_client, config_name) is None)
예제 #13
0
    def test_model_is_deleted(self, sagemaker_client, xgboost_model):
        (reference, _) = xgboost_model
        resource = k8s.get_resource(reference)
        model_name = resource["spec"].get("modelName", None)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert self._get_sagemaker_model_arn(sagemaker_client,
                                             model_name) is None
예제 #14
0
def snapshot_kms(ec_client, bootstrap_resources, make_snapshot):
    response = ec_client.describe_snapshots(
        SnapshotName=bootstrap_resources.SnapshotName)
    cc_id = response['Snapshots'][0]['CacheClusterId']

    snapshot_name = random_suffix_name("ack-snapshot-kms", 32)

    input_dict = {
        "SNAPSHOT_NAME": snapshot_name,
        "CC_ID": cc_id,
        "KMS_KEY_ID": bootstrap_resources.KmsKeyID,
    }

    (reference, resource) = make_snapshot("snapshot_kms", input_dict,
                                          input_dict['SNAPSHOT_NAME'])
    yield (reference, resource)

    # teardown
    k8s.delete_custom_resource(reference)
    assert wait_snapshot_deleted(snapshot_name)
예제 #15
0
    def test_delete_endpoint(self, sagemaker_client, single_variant_xgboost_endpoint):
        (reference, _, _, _) = single_variant_xgboost_endpoint
        resource = k8s.get_resource(reference)
        endpoint_name = resource["spec"].get("endpointName", None)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert (
            self._describe_sagemaker_endpoint(sagemaker_client, endpoint_name) is None
        )
    def test_transformjob_has_stopped_status(self, sagemaker_client,
                                             xgboost_transformjob):
        (reference, resource) = xgboost_transformjob
        transformjob_name = resource['spec'].get('transformJobName', None)

        assert transformjob_name is not None

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert (self._get_sagemaker_transformjob_status(
            sagemaker_client, transformjob_name)
                ) in self._get_stopped_transformjob_status_list()
    def test_processing_job_has_stopped_status(self, sagemaker_client,
                                               kmeans_processing_job):
        (reference, _) = kmeans_processing_job
        resource = k8s.get_resource(reference)
        processing_job_name = resource["spec"].get("processingJobName", None)

        assert processing_job_name is not None

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        current_processing_job_status = self._get_sagemaker_processing_job_status(
            sagemaker_client, processing_job_name)
        expected_processing_job_status_list = (
            self._get_stopped_processing_job_status_list())
        assert current_processing_job_status in expected_processing_job_status_list
    def test_trainingjob_has_stopped_status(
        self, sagemaker_client, xgboost_trainingjob
    ):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource["spec"].get("trainingJobName", None)

        assert trainingjob_name is not None

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        current_trainingjob_status = self._get_sagemaker_trainingjob_status(
            sagemaker_client, trainingjob_name
        )
        expected_trainingjob_status_list = self._get_stopped_trainingjob_status_list()
        assert current_trainingjob_status in expected_trainingjob_status_list
예제 #19
0
    def test_create_hpo(self, xgboost_hpojob):
        (reference, resource) = xgboost_hpojob
        assert k8s.get_resource_exists(reference)

        hpo_job_name = resource["spec"].get("hyperParameterTuningJobName",
                                            None)
        assert hpo_job_name is not None

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        assert k8s.get_resource_arn(
            resource) == hpo_sm_desc["HyperParameterTuningJobArn"]
        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_CREATED

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_STOPPED
예제 #20
0
def single_variant_xgboost_endpoint():
    endpoint_resource_name = random_suffix_name("single-variant-endpoint", 32)
    config1_resource_name = endpoint_resource_name + "-config"
    model_resource_name = config1_resource_name + "-model"

    replacements = REPLACEMENT_VALUES.copy()
    replacements["ENDPOINT_NAME"] = endpoint_resource_name
    replacements["CONFIG_NAME"] = config1_resource_name
    replacements["MODEL_NAME"] = model_resource_name

    model = load_resource_file(
        SERVICE_NAME, "xgboost_model", additional_replacements=replacements
    )
    logging.debug(model)

    config = load_resource_file(
        SERVICE_NAME,
        "endpoint_config_single_variant",
        additional_replacements=replacements,
    )
    logging.debug(config)

    endpoint_spec = load_resource_file(
        SERVICE_NAME, "endpoint_base", additional_replacements=replacements
    )
    logging.debug(endpoint_spec)

    # Create the k8s resources
    model_reference = k8s.CustomResourceReference(
        CRD_GROUP,
        CRD_VERSION,
        MODEL_RESOURCE_PLURAL,
        model_resource_name,
        namespace="default",
    )
    model_resource = k8s.create_custom_resource(model_reference, model)
    model_resource = k8s.wait_resource_consumed_by_controller(model_reference)
    assert model_resource is not None

    config1_reference = k8s.CustomResourceReference(
        CRD_GROUP,
        CRD_VERSION,
        CONFIG_RESOURCE_PLURAL,
        config1_resource_name,
        namespace="default",
    )
    config1_resource = k8s.create_custom_resource(config1_reference, config)
    config1_resource = k8s.wait_resource_consumed_by_controller(config1_reference)
    assert config1_resource is not None

    config2_resource_name = random_suffix_name("2-single-variant-endpoint", 32)
    config["metadata"]["name"] = config["spec"][
        "endpointConfigName"
    ] = config2_resource_name
    logging.debug(config)
    config2_reference = k8s.CustomResourceReference(
        CRD_GROUP,
        CRD_VERSION,
        CONFIG_RESOURCE_PLURAL,
        config2_resource_name,
        namespace="default",
    )
    config2_resource = k8s.create_custom_resource(config2_reference, config)
    config2_resource = k8s.wait_resource_consumed_by_controller(config2_reference)
    assert config2_resource is not None

    endpoint_reference = k8s.CustomResourceReference(
        CRD_GROUP,
        CRD_VERSION,
        ENDPOINT_RESOURCE_PLURAL,
        endpoint_resource_name,
        namespace="default",
    )
    endpoint_resource = k8s.create_custom_resource(endpoint_reference, endpoint_spec)
    endpoint_resource = k8s.wait_resource_consumed_by_controller(endpoint_reference)
    assert endpoint_resource is not None

    yield (endpoint_reference, endpoint_resource, endpoint_spec, config2_resource_name)

    # Delete the k8s resource if not already deleted by tests
    for cr in (model_reference, config1_reference, config2_reference, endpoint_reference):
        try:
            k8s.delete_custom_resource(cr)
        except:
            pass
    def test_create_delete_2az(self, rds_client):
        resource_name = "my-subnet-group"
        resource_desc = "my-subnet-group description"

        br_resources = get_bootstrap_resources()

        replacements = REPLACEMENT_VALUES.copy()
        replacements["DB_SUBNET_GROUP_NAME"] = resource_name
        replacements["DB_SUBNET_GROUP_DESC"] = resource_desc
        replacements["SUBNET_AZ1"] = br_resources.SubnetAZ1
        replacements["SUBNET_AZ2"] = br_resources.SubnetAZ2

        resource_data = load_resource_file(
            SERVICE_NAME,
            "db_subnet_group_2az",
            additional_replacements=replacements,
        )
        logging.debug(resource_data)

        # Create the k8s resource
        ref = k8s.CustomResourceReference(
            CRD_GROUP,
            CRD_VERSION,
            RESOURCE_PLURAL,
            resource_name,
            namespace="default",
        )
        k8s.create_custom_resource(ref, resource_data)
        cr = k8s.wait_resource_consumed_by_controller(ref)

        assert cr is not None
        assert k8s.get_resource_exists(ref)

        # Let's check that the DB subnet group appears in RDS
        aws_res = rds_client.describe_db_subnet_groups(
            DBSubnetGroupName=resource_name)
        assert aws_res is not None
        assert len(aws_res['DBSubnetGroups']) == 1

        now = datetime.datetime.now()
        timeout = now + datetime.timedelta(seconds=CREATE_TIMEOUT_SECONDS)

        # TODO(jaypipes): Move this into generic AWS-side waiter
        while aws_res['DBSubnetGroups'][0]['SubnetGroupStatus'] != "Complete":
            if datetime.datetime.now() >= timeout:
                raise Exception(
                    "failed to find DB subnet group in Complete status before timeout"
                )
            time.sleep(CREATE_INTERVAL_SLEEP_SECONDS)
            aws_res = rds_client.describe_db_subnet_groups(
                DBSubnetGroupName=resource_name)
            assert aws_res is not None
            assert len(aws_res['DBSubnetGroups']) == 1

        # Delete the k8s resource on teardown of the module
        k8s.delete_custom_resource(ref)

        time.sleep(DELETE_WAIT_AFTER_SECONDS)

        # DB subnet group should no longer appear in RDS
        try:
            aws_res = rds_client.describe_db_subnet_groups(
                DBSubnetGroupName=resource_name)
            assert False
        except rds_client.exceptions.DBSubnetGroupNotFoundFault:
            pass