Esempio n. 1
0
def dagster_instance_with_k8s_scheduler(
    helm_namespace_for_k8s_run_launcher, run_launcher, k8s_scheduler, schedule_tempdir
):
    with local_port_forward_postgres(
        namespace=helm_namespace_for_k8s_run_launcher
    ) as local_forward_port:
        postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format(
            local_forward_port=local_forward_port
        )
        print("Local Postgres forwarding URL: ", postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(schedule_tempdir),
            run_storage=SqliteRunStorage.from_local(os.path.join(schedule_tempdir, "runs")),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
            schedule_storage=SqliteScheduleStorage.from_local(
                os.path.join(schedule_tempdir, "schedules")
            ),
            scheduler=k8s_scheduler,
        )
        yield instance
def test_execute_display_command():
    with tempfile.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        run_config = {
            "solids": {
                "create_string_1_asset": {"config": {"input_str": "apple"}},
                "take_string_1_asset": {"config": {"input_str": "apple"}},
            },
            "resources": {"object_manager": {"config": {"base_dir": temp_dir}}},
        }

        # write run config to temp file
        # file is temp because intermediate storage directory is temporary
        with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f:
            f.write(yaml.dump(run_config))

        kwargs = {
            "config": (os.path.join(temp_dir, "pipeline_config.yaml"),),
            "pipeline": "asset_pipeline",
            "python_file": file_relative_path(
                __file__, "../../core_tests/execution_tests/memoized_dev_loop_pipeline.py"
            ),
            "tags": '{"dagster/is_memoized_run": "true"}',
        }

        with Capturing() as output:
            execute_list_versions_command(kwargs=kwargs, instance=instance)

        assert output

        # execute the pipeline once so that addresses have been populated.

        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success

        with Capturing() as output:
            execute_list_versions_command(kwargs=kwargs, instance=instance)

        assert output
Esempio n. 3
0
def get_instance(temp_dir, event_log_storage):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=event_log_storage,
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=SyncInMemoryRunLauncher(),
    )
Esempio n. 4
0
def get_instance():
    with tempfile.TemporaryDirectory() as temp_dir:
        yield DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=SqliteRunStorage.from_local(temp_dir),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
 def _readonly_in_memory_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         yield DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=InMemoryEventLogStorage(),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_launcher=ExplodingRunLauncher(),
             run_coordinator=DefaultRunCoordinator(),
             schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
         )
Esempio n. 6
0
def define_scheduler_instance(tempdir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(),
        schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, "schedules")),
        scheduler=SystemCronScheduler(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=SyncInMemoryRunLauncher(),
    )
 def _sqlite_asset_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         instance = DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=ConsolidatedSqliteEventLogStorage(temp_dir),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_coordinator=DefaultRunCoordinator(),
             run_launcher=SyncInMemoryRunLauncher(),
         )
         yield instance
Esempio n. 8
0
def dagster_instance(helm_postgres_url, run_launcher):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(helm_postgres_url),
            event_storage=PostgresEventLogStorage(helm_postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
    ) as instance:
        yield instance
def get_ephemeral_instance(temp_dir):
    run_store = SqliteRunStorage.from_local(temp_dir)
    event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
    compute_log_manager = LocalComputeLogManager(temp_dir)
    instance = DagsterInstance(
        instance_type=InstanceType.PERSISTENT,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=run_store,
        event_storage=event_store,
        compute_log_manager=compute_log_manager,
        run_launcher=DefaultRunLauncher(),
        run_coordinator=DefaultRunCoordinator(),
    )
    return instance
Esempio n. 10
0
def broken_compute_log_manager_instance(fail_on_setup=False, fail_on_teardown=False):
    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            yield DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=SqliteRunStorage.from_local(temp_dir),
                event_storage=SqliteEventLogStorage(temp_dir),
                compute_log_manager=BrokenComputeLogManager(
                    fail_on_setup=fail_on_setup, fail_on_teardown=fail_on_teardown
                ),
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
Esempio n. 11
0
def dagster_instance_with_k8s_scheduler(helm_postgres_url_for_k8s_run_launcher,
                                        run_launcher, k8s_scheduler,
                                        schedule_tempdir):  # pylint: disable=redefined-outer-name
    with DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(schedule_tempdir),
            run_storage=SqliteRunStorage.from_local(
                os.path.join(schedule_tempdir, "runs")),
            event_storage=PostgresEventLogStorage(
                helm_postgres_url_for_k8s_run_launcher),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
            schedule_storage=SqliteScheduleStorage.from_local(
                os.path.join(schedule_tempdir, "schedules")),
            scheduler=k8s_scheduler,
    ) as instance:
        yield instance
Esempio n. 12
0
def dagster_instance(helm_postgres_url):  # pylint: disable=redefined-outer-name

    with tempfile.TemporaryDirectory() as tempdir:
        with environ({"DAGSTER_HOME": tempdir}):

            with DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(tempdir),
                run_storage=PostgresRunStorage(helm_postgres_url),
                event_storage=PostgresEventLogStorage(helm_postgres_url),
                compute_log_manager=NoOpComputeLogManager(),
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=ExplodingRunLauncher(),  # use graphql to launch any runs
                ref=InstanceRef.from_dir(tempdir),
            ) as instance:
                yield instance

                check_export_runs(instance)
Esempio n. 13
0
def dagster_instance(helm_namespace, run_launcher):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with local_port_forward_postgres(
            namespace=helm_namespace) as local_forward_port:
        postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format(
            local_forward_port=local_forward_port)
        print("Local Postgres forwarding URL: ", postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(postgres_url),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
        )
        yield instance
Esempio n. 14
0
def dagster_instance_for_user_deployments_subchart_disabled(
    helm_postgres_url_for_user_deployments_subchart_disabled,
):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=PostgresRunStorage(helm_postgres_url_for_user_deployments_subchart_disabled),
        event_storage=PostgresEventLogStorage(
            helm_postgres_url_for_user_deployments_subchart_disabled
        ),
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=ExplodingRunLauncher(),
    ) as instance:
        yield instance

        check_export_runs(instance)
Esempio n. 15
0
def test_fs_stores():
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            return "easy"

        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            compute_log_manager = LocalComputeLogManager(temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=compute_log_manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
                settings={"telemetry": {
                    "enabled": False
                }},
            )

            result = execute_pipeline(simple, instance=instance)

            assert run_store.has_run(result.run_id)
            assert run_store.get_run_by_id(
                result.run_id).status == PipelineRunStatus.SUCCESS
            assert DagsterEventType.PIPELINE_SUCCESS in [
                event.dagster_event.event_type
                for event in event_store.get_logs_for_run(result.run_id)
                if event.is_dagster_event
            ]
            stats = event_store.get_stats_for_run(result.run_id)
            assert stats.steps_succeeded == 1
            assert stats.end_time is not None
Esempio n. 16
0
def dagster_instance_for_k8s_run_launcher(
    helm_postgres_url_for_k8s_run_launcher,
):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    instance_ref = InstanceRef.from_dir(tempdir)

    with DagsterInstance(
        instance_type=InstanceType.PERSISTENT,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=PostgresRunStorage(helm_postgres_url_for_k8s_run_launcher),
        event_storage=PostgresEventLogStorage(helm_postgres_url_for_k8s_run_launcher),
        schedule_storage=PostgresScheduleStorage(helm_postgres_url_for_k8s_run_launcher),
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=ExplodingRunLauncher(),
        ref=instance_ref,
    ) as instance:
        yield instance

        check_export_runs(instance)
Esempio n. 17
0
    def test_run_record_timestamps(self, storage):
        assert storage

        self._skip_in_memory(storage)

        @op
        def a():
            pass

        @job
        def my_job():
            a()

        with tempfile.TemporaryDirectory() as temp_dir:
            if storage._instance:  # pylint: disable=protected-access
                instance = storage._instance  # pylint: disable=protected-access
            else:
                instance = DagsterInstance(
                    instance_type=InstanceType.EPHEMERAL,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=storage,
                    event_storage=InMemoryEventLogStorage(),
                    compute_log_manager=NoOpComputeLogManager(),
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=SyncInMemoryRunLauncher(),
                )

            freeze_datetime = to_timezone(
                create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
            )

            with pendulum.test(freeze_datetime):
                result = my_job.execute_in_process(instance=instance)
                records = instance.get_run_records(
                    filters=PipelineRunsFilter(run_ids=[result.run_id])
                )
                assert len(records) == 1
                record = records[0]
                assert record.start_time == freeze_datetime.timestamp()
                assert record.end_time == freeze_datetime.timestamp()
Esempio n. 18
0
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            PREFIX = "my_prefix"
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix=PREFIX,
                                          skip_empty_files=True)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)

            stderr_object = mock_s3_bucket.Object(
                key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err"
            ).get()
            assert stderr_object

            with pytest.raises(ClientError):
                # stdout is not uploaded because we do not print anything to stdout
                mock_s3_bucket.Object(
                    key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out"
                ).get()
Esempio n. 19
0
def test_compute_log_manager(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")
        print(HELLO_WORLD)  # pylint: disable=print-call
        return "easy"

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix="my_prefix",
                                          local_dir=temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)
            compute_steps = [
                event.step_key for event in result.all_node_events
                if event.event_type == DagsterEventType.STEP_START
            ]
            assert len(compute_steps) == 1
            step_key = compute_steps[0]

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data

            # Check S3 directly
            s3_object = mock_s3_bucket.Object(
                key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
            stderr_s3 = s3_object.get()["Body"].read().decode("utf-8")
            for expected in EXPECTED_LOGS:
                assert expected in stderr_s3

            # Check download behavior by deleting locally cached logs
            compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                            "compute_logs")
            for filename in os.listdir(compute_logs_dir):
                os.unlink(os.path.join(compute_logs_dir, filename))

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data
Esempio n. 20
0
def test_dev_loop_changing_versions():
    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_launcher=DefaultRunLauncher(),
            run_coordinator=DefaultRunCoordinator(),
        )

        run_config = {
            "solids": {
                "create_string_1_asset": {
                    "config": {
                        "input_str": "apple"
                    }
                },
                "take_string_1_asset": {
                    "config": {
                        "input_str": "apple"
                    }
                },
            },
            "resources": {
                "object_manager": {
                    "config": {
                        "base_dir": temp_dir
                    }
                }
            },
        }

        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success
        assert not get_step_keys_to_execute(asset_pipeline, run_config,
                                            "only_mode")

        run_config["solids"]["take_string_1_asset"]["config"][
            "input_str"] = "banana"

        assert get_step_keys_to_execute(
            asset_pipeline, run_config,
            "only_mode") == ["take_string_1_asset"]
        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success
        assert not get_step_keys_to_execute(asset_pipeline, run_config,
                                            "only_mode")
Esempio n. 21
0
def test_compute_log_manager_with_envvar(gcs_bucket):
    @job
    def simple():
        @op
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with open(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")) as f:
        with tempfile.TemporaryDirectory() as temp_dir:
            with environ({"ENV_VAR": f.read(), "DAGSTER_HOME": temp_dir}):
                run_store = SqliteRunStorage.from_local(temp_dir)
                event_store = SqliteEventLogStorage(temp_dir)
                manager = GCSComputeLogManager(
                    bucket=gcs_bucket,
                    prefix="my_prefix",
                    local_dir=temp_dir,
                    json_credentials_envvar="ENV_VAR",
                )
                instance = DagsterInstance(
                    instance_type=InstanceType.PERSISTENT,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=run_store,
                    event_storage=event_store,
                    compute_log_manager=manager,
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=DefaultRunLauncher(),
                    ref=InstanceRef.from_dir(temp_dir),
                )
                result = simple.execute_in_process(instance=instance)
                compute_steps = [
                    event.step_key
                    for event in result.all_node_events
                    if event.event_type == DagsterEventType.STEP_START
                ]
                assert len(compute_steps) == 1
                step_key = compute_steps[0]

                stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
                assert stdout.data == HELLO_WORLD + SEPARATOR

                stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
                for expected in EXPECTED_LOGS:
                    assert expected in stderr.data

                # Check GCS directly
                stderr_gcs = (
                    storage.Client()
                    .bucket(gcs_bucket)
                    .blob(f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
                    .download_as_bytes()
                    .decode("utf-8")
                )

                for expected in EXPECTED_LOGS:
                    assert expected in stderr_gcs

                # Check download behavior by deleting locally cached logs
                compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
                for filename in os.listdir(compute_logs_dir):
                    os.unlink(os.path.join(compute_logs_dir, filename))

                stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
                assert stdout.data == HELLO_WORLD + SEPARATOR

                stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
                for expected in EXPECTED_LOGS:
                    assert expected in stderr.data
def test_compute_log_manager(mock_s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(
            bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = mock_s3_bucket.Object(
            key="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        stderr_s3 = six.ensure_str(s3_object.get()["Body"].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
Esempio n. 23
0
def test_compute_log_manager(
    mock_create_blob_client, mock_generate_blob_sas, storage_account, container, credential
):
    mock_generate_blob_sas.return_value = "fake-url"
    fake_client = FakeBlobServiceClient(storage_account)
    mock_create_blob_client.return_value = fake_client

    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = AzureBlobComputeLogManager(
            storage_account=storage_account,
            container=container,
            prefix="my_prefix",
            local_dir=temp_dir,
            secret_key=credential,
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check ADLS2 directly
        adls2_object = fake_client.get_blob_client(
            container=container,
            blob="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        adls2_stderr = adls2_object.download_blob().readall().decode("utf-8")
        for expected in EXPECTED_LOGS:
            assert expected in adls2_stderr

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data