Exemple #1
0
def test_create_executor_config(config_args, config_env,
                                mock_availability_zones):
    executor_config = create_executor_config(config_args, config_env)

    args = get_args(config_args, config_env)
    expected_descriptor_config = DescriptorConfig(
        valid_strategies=args.valid_strategies,
        valid_frameworks=args.valid_frameworks)
    expected_bai_config = BaiConfig(
        puller_docker_image=args.puller_docker_image,
        puller_mount_chmod=args.puller_mount_chmod,
        metrics_pusher_docker_image=args.metrics_pusher_docker_image,
        metrics_extractor_docker_image=args.metrics_extractor_docker_image,
        job_status_trigger_docker_image=args.job_status_trigger_docker_image,
        cron_job_docker_image=args.cron_job_docker_image,
    )
    expected_environment_info = EnvironmentInfo(mock_availability_zones)
    expected_executor_config = ExecutorConfig(
        descriptor_config=expected_descriptor_config,
        bai_config=expected_bai_config,
        environment_info=expected_environment_info,
        kubectl=args.kubectl,
    )

    assert executor_config == expected_executor_config
Exemple #2
0
def test_main(mock_create_executor, mock_availability_zones, mock_env):
    from executor.__main__ import main

    main(
        f" --consumer-topic {CONSUMER_TOPIC} "
        f" --producer-topic {PRODUCER_TOPIC} "
        f" --cmd-submit-topic {CMD_SUBMIT_TOPIC}"
        f" --cmd-return-topic {CMD_RETURN_TOPIC}"
        f" --status-topic {STATUS_TOPIC} "
        f" --bootstrap-servers {BOOTSTRAP_SERVERS_ARG} "
        f" --logging-level {LOGGING_LEVEL} "
        f" --kubectl {KUBECTL} "
        f" --transpiler-valid-strategies {VALID_STRATEGIES} "
        f" --transpiler-valid-frameworks {VALID_FRAMEWORKS} "
        f" --valid-execution-engines {VALID_EXECUTION_ENGINES} "
        f" --transpiler-puller-mount-chmod {PULLER_MOUNT_CHMOD} "
        f" --transpiler-puller-docker-image {PULLER_DOCKER_IMAGE} "
        f" --transpiler-metrics-pusher-docker-image {METRICS_PUSHER_DOCKER_IMAGE} "
        f" --transpiler-metrics-extractor-docker-image {METRICS_EXTRACTOR_DOCKER_IMAGE} "
        f" --transpiler-cron-job-docker-image {CRON_JOB_DOCKER_IMAGE} "
        f" --transpiler-job-status-trigger-docker-image {JOB_STATUS_TRIGGER_DOCKER_IMAGE} "
        f" --suppress-job-affinity")

    expected_common_kafka_cfg = KafkaServiceConfig(
        consumer_topic=CONSUMER_TOPIC,
        producer_topic=PRODUCER_TOPIC,
        cmd_submit_topic=CMD_SUBMIT_TOPIC,
        cmd_return_topic=CMD_RETURN_TOPIC,
        bootstrap_servers=BOOTSTRAP_SERVERS,
        logging_level=LOGGING_LEVEL,
        status_topic=STATUS_TOPIC,
        replication_factor=min(DEFAULT_REPLICATION_FACTOR,
                               len(BOOTSTRAP_SERVERS)),
        num_partitions=DEFAULT_NUM_PARTITIONS,
    )

    expected_executor_config = ExecutorConfig(
        kubectl=KUBECTL,
        valid_execution_engines=VALID_EXECUTION_ENGINES.split(","),
        descriptor_config=DescriptorConfig(
            valid_strategies=VALID_STRATEGIES.split(","),
            valid_frameworks=VALID_FRAMEWORKS.split(",")),
        bai_config=BaiConfig(
            puller_mount_chmod=PULLER_MOUNT_CHMOD,
            puller_docker_image=PULLER_DOCKER_IMAGE,
            metrics_pusher_docker_image=METRICS_PUSHER_DOCKER_IMAGE,
            metrics_extractor_docker_image=METRICS_EXTRACTOR_DOCKER_IMAGE,
            cron_job_docker_image=CRON_JOB_DOCKER_IMAGE,
            job_status_trigger_docker_image=JOB_STATUS_TRIGGER_DOCKER_IMAGE,
            suppress_job_affinity=SUPPRESS_JOB_AFFINITY,
        ),
        environment_info=EnvironmentInfo(
            availability_zones=mock_availability_zones),
    )

    mock_create_executor.assert_called_with(expected_common_kafka_cfg,
                                            expected_executor_config)
Exemple #3
0
def descriptor_config():
    return DescriptorConfig(
        valid_strategies=[e.value for e in DistributedStrategy], valid_frameworks=["", "mxnet", "tensorflow"]
    )
Exemple #4
0
def create_descriptor_config(args):
    return DescriptorConfig(valid_strategies=args.transpiler_valid_strategies,
                            valid_frameworks=args.transpiler_valid_frameworks)
Exemple #5
0
    "SAGEMAKER_NODATASET": MOCK_NODATA_SET,
    "TMP_SOURCES_DIR": MOCK_TMP_DIR,
    "VALID_EXECUTION_ENGINES": MOCK_VALID_ENGINES,
    "TRANSPILER_VALID_FRAMEWORKS": MOCK_VALID_FRAMEWORKS,
    "TRANSPILER_VALID_STRATEGIES": MOCK_VALID_STRATEGIES,
    "SAGEMAKER_SUBNETS": MOCK_SUBNETS,
    "SAGEMAKER_SECURITY_GROUP_IDS": MOCK_SECURITY_GROUP_IDS,
}

EXPECTED_SM_CONFIG = SageMakerExecutorConfig(
    valid_execution_engines=["e1", "e2"],
    tmp_sources_dir=MOCK_TMP_DIR,
    sm_role=MOCK_SAGEMAKER_ROLE,
    s3_output_bucket=MOCK_SAGEMAKER_OUTPUT_BUCKET,
    s3_nodata=MOCK_NODATA_SET,
    descriptor_config=DescriptorConfig(valid_frameworks=["f1", "f2"],
                                       valid_strategies=["s1", "s2"]),
    security_group_ids=["sg1", "sg2"],
    subnets=["n1", "n2"],
)


def test_config_cmdline():
    cfg = create_executor_config(CMD_LINE, {})
    assert cfg == EXPECTED_SM_CONFIG


def test_config_env():
    cfg = create_executor_config("", MOCK_ENV)
    assert cfg == EXPECTED_SM_CONFIG
Exemple #6
0
import sagemaker
from bai_kafka_utils.events import FetcherBenchmarkEvent, BenchmarkJob
from bai_kafka_utils.executors.descriptor import DescriptorConfig, BenchmarkDescriptor, DescriptorError
from bai_kafka_utils.executors.execution_callback import (
    ExecutionEngine,
    ExecutionEngineException,
    NoResourcesFoundException,
)
from bai_sagemaker_utils.utils import get_client_error_message, is_not_found_error
from sm_executor.args import SageMakerExecutorConfig
from sm_executor.estimator_factory import EstimatorFactory
from sm_executor.frameworks import MXNET_FRAMEWORK, TENSORFLOW_FRAMEWORK
from sm_executor.source_dir import ScriptSourceDirectory
from cloudwatch_exporter.cloudwatch_exporter import check_dashboard

CONFIG = DescriptorConfig(["single_node", "horovod"], [TENSORFLOW_FRAMEWORK, MXNET_FRAMEWORK])

SageMakerSessionFactory = Callable[[], sagemaker.Session]

logger = logging.getLogger(__name__)


class SageMakerExecutionEngine(ExecutionEngine):
    ENGINE_ID = "aws.sagemaker"

    SAFETY_FACTOR = 1.1

    def __init__(
        self,
        session_factory: SageMakerSessionFactory,
        estimator_factory: EstimatorFactory,
Exemple #7
0
def test_create_descriptor_config(config_args, config_env):
    args = get_args(config_args, config_env)
    expected_config = DescriptorConfig(valid_strategies=args.valid_strategies,
                                       valid_frameworks=args.valid_frameworks)
    descriptor_config = create_descriptor_config(args)
    assert descriptor_config == expected_config
Exemple #8
0
def descriptor_config() -> DescriptorConfig:
    return DescriptorConfig(
        valid_strategies=["single_node", "horovod"], valid_frameworks=[TENSORFLOW_FRAMEWORK, MXNET_FRAMEWORK]
    )
Exemple #9
0
def descriptor_config():
    config_values = {
        "valid_strategies": [e.value for e in DistributedStrategy],
        "valid_frameworks": ["", "mxnet", "tensorflow"],
    }
    return DescriptorConfig(**config_values)