def benchmark_event():
    descriptor = {
        "spec_version": "0.1.0",
        "info": {
            "description": "some description"
        },
        "hardware": {
            "instance_type": "t3.small",
            "strategy": "single_node"
        },
        "env": {
            "docker_image": "some:image"
        },
    }
    return ExecutorBenchmarkEvent(
        action_id=ACTION_ID,
        message_id="MESSAGE_ID",
        client_id=CLIENT_ID,
        client_version="CLIENT_VERSION",
        client_username="******",
        authenticated=False,
        tstamp=42,
        visited=[],
        type="TYPE",
        payload=ExecutorPayload(toml=BenchmarkDoc(descriptor,
                                                  doc="var=val",
                                                  sha1="sha"),
                                datasets=[],
                                job=BenchmarkJob(id=ACTION_ID)),
    )
Beispiel #2
0
def test_executor_event(base_event_as_dict):
    executor_event_as_dict = base_event_as_dict
    executor_event_as_dict["payload"]["datasets"] = [{
        "src": "http://foo.com",
        "md5": "None",
        "dst": "None",
        "path": "/mount/path"
    }]
    executor_event_as_dict["payload"]["models"] = [{
        "src": "http://foo.com",
        "md5": "None",
        "dst": "None",
        "path": "/mount/path"
    }]
    executor_event_as_dict["payload"]["job"] = {
        "id": "job_id",
        "extras": {
            "some_var": "some_val"
        }
    }

    event_as_json_string = json.dumps(executor_event_as_dict)

    event = ExecutorBenchmarkEvent.from_json(event_as_json_string)
    assert type(event.payload) == ExecutorPayload
    assert event.payload.job == BenchmarkJob(id="job_id",
                                             extras={"some_var": "some_val"})
    def schedule(self, event: FetcherBenchmarkEvent) -> BenchmarkJob:
        """
        Creates k8s resources for a scheduled benchmark
        :param event: The fetcher benchmark event initiating the benchmark
        :return: a Benchmark job
        """
        descriptor_contents = event.payload.toml.contents
        job_id = K8SExecutionEngine.JOB_ID_PREFIX + event.action_id

        try:
            yaml = create_scheduled_job_yaml_spec(descriptor_contents,
                                                  self.config,
                                                  job_id,
                                                  event=event)
            self._kubernetes_apply(yaml)
        except DescriptorError as e:
            logger.exception("Error parsing descriptor file")
            raise ExecutionEngineException(
                f"Error parsing descriptor file: {str(e)}") from e
        except CalledProcessError as e:
            logger.exception("Error creating scheduled benchmark")
            raise ExecutionEngineException(
                f"Error executing benchmark: {str(e)}") from e

        return BenchmarkJob(id=job_id,
                            extras={K8SExecutionEngine.EXTRA_K8S_YAML: yaml})
def test_k8s_engine_happy_path(
    k8s_execution_engine: K8SExecutionEngine,
    valid_fetcher_event: FetcherBenchmarkEvent,
    mock_subprocess_check_output,
    mock_create_job_yaml_spec,
):
    job = k8s_execution_engine.run(valid_fetcher_event)
    mock_subprocess_check_output.assert_called_once_with([KUBECTL, "apply", "-f", "-"], input=SOME_YAML_ENCODED)

    assert job == BenchmarkJob(
        id=K8SExecutionEngine.JOB_ID_PREFIX + valid_fetcher_event.action_id,
        extras={K8SExecutionEngine.EXTRA_K8S_YAML: SOME_YAML},
    )
def test_run(
    descriptor: BenchmarkDescriptor,
    sm_execution_engine_to_test: SageMakerExecutionEngine,
    fetcher_event: FetcherBenchmarkEvent,
    mock_create_source_dir,
    mock_estimator: Framework,
):
    job = sm_execution_engine_to_test.run(fetcher_event)

    mock_estimator.fit.assert_called_with({DATASET_ID: DATASET_S3_URI},
                                          job_name=ACTION_ID,
                                          wait=False,
                                          logs=False)
    mock_create_source_dir.assert_called_with(descriptor, TMP_DIR, SCRIPTS)

    assert job == BenchmarkJob(CREATED_JOB_ID)
Beispiel #6
0
    def run(self, event: FetcherBenchmarkEvent) -> BenchmarkJob:
        logger.info(f"Processing SageMaker benchmark {event.action_id}")
        try:
            descriptor = BenchmarkDescriptor.from_dict(event.payload.toml.contents, CONFIG)
        except DescriptorError as e:
            logger.exception("Could not parse descriptor %s", e)
            raise ExecutionEngineException("Cannot process the request") from e

        with tempfile.TemporaryDirectory(prefix=self.config.tmp_sources_dir) as tmpdirname:
            ScriptSourceDirectory.create(descriptor, tmpdirname, event.payload.scripts)

            session = self.session_factory()
            try:
                estimator = self.estimator_factory(session, descriptor, tmpdirname, self.config)
            except Exception as e:
                logger.exception("Could not create estimator %s", e)
                raise ExecutionEngineException("Cannot create estimator") from e

            # Estimate the total size
            total_size_gb = self._estimate_total_gb(event)
            estimator.train_volume_size = max(estimator.train_volume_size, total_size_gb)

            data = self._get_estimator_data(event)

            try:
                job_name = SageMakerExecutionEngine._get_job_name(event.action_id)
                merge = False
                if descriptor.custom_params and descriptor.custom_params.sagemaker_job_name:
                    job_name = descriptor.custom_params.sagemaker_job_name
                if descriptor.custom_params and descriptor.custom_params.merge:
                    merge = descriptor.custom_params.merge
                logger.info(f"Attempting to start training job {job_name}")
                if merge:
                    estimator.fit(data, wait=True, logs=False, job_name=job_name)
                    self.merge_metrics(descriptor)
                else:
                    estimator.fit(data, wait=False, logs=False, job_name=job_name)
            except botocore.exceptions.ClientError as err:
                error_message = get_client_error_message(err, default="Unknown")
                raise ExecutionEngineException(
                    f"Benchmark creation failed. SageMaker returned error: {error_message}"
                ) from err
            except Exception as err:
                logger.exception("Caught unexpected exception", err)
                raise err
            return BenchmarkJob(id=estimator.latest_training_job.name)
Beispiel #7
0
def test_producer(benchmark_event: BenchmarkEvent, kafka_service_config: KafkaServiceConfig):
    consumer, producer = create_kafka_consumer_producer(kafka_service_config, SERVICE_NAME)

    expected_job = BenchmarkJob(id=JOB_ID, k8s_yaml="")
    expected_payload = ExecutorPayload.create_from_fetcher_payload(benchmark_event.payload, job=expected_job)
    expected_event = create_from_object(ExecutorBenchmarkEvent, benchmark_event, payload=expected_payload)
    producer.send(kafka_service_config.producer_topic, benchmark_event, key=benchmark_event.client_id)

    filter_event = get_message_is_the_response(benchmark_event)
    is_expected_event = get_event_equals(expected_event)

    while True:
        records = consumer.poll(POLL_TIMEOUT_MS)
        for topic, recs in records.items():
            for msg in recs:
                if filter_event(msg.value):
                    assert is_expected_event(msg.value)
                    consumer.close()
                    return
from bai_kafka_utils.executors.execution_callback import ExecutorEventHandler, ExecutionEngine, ExecutionEngineException
from bai_kafka_utils.kafka_service import KafkaService, KafkaServiceCallbackException

JOB_ID = "ID"

ENGINE_2_ID = "ENG2"

ENGINE_1_ID = "ENG1"

REMOTE_ENGINE_ID = "REMOTE"

VALID_ENGINES = [ENGINE_1_ID, ENGINE_2_ID, REMOTE_ENGINE_ID]

PRODUCER_TOPIC = "TOPIC"

BENCHMARK_JOB = BenchmarkJob(JOB_ID)


@fixture
def engine1():
    return create_engine_mock()


def create_engine_mock():
    mock = create_autospec(ExecutionEngine)
    mock.run.return_value = BENCHMARK_JOB
    return mock


@fixture
def engine2():