def test_fs_stores():
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info('easy')
            return 'easy'

        easy()

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
        )

        result = execute_pipeline(simple, instance=instance)

        assert run_store.has_run(result.run_id)
        assert run_store.get_run_by_id(result.run_id).status == PipelineRunStatus.SUCCESS
        assert DagsterEventType.PIPELINE_SUCCESS in [
            event.dagster_event.event_type
            for event in event_store.get_logs_for_run(result.run_id)
            if event.is_dagster_event
        ]
        stats = event_store.get_stats_for_run(result.run_id)
        assert stats.steps_succeeded == 1
        assert stats.end_time is not None
Beispiel #2
0
def dagster_instance_with_k8s_scheduler(
    helm_namespace_for_k8s_run_launcher, run_launcher, k8s_scheduler, schedule_tempdir
):
    with local_port_forward_postgres(
        namespace=helm_namespace_for_k8s_run_launcher
    ) as local_forward_port:
        postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format(
            local_forward_port=local_forward_port
        )
        print("Local Postgres forwarding URL: ", postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(schedule_tempdir),
            run_storage=SqliteRunStorage.from_local(os.path.join(schedule_tempdir, "runs")),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
            schedule_storage=SqliteScheduleStorage.from_local(
                os.path.join(schedule_tempdir, "schedules")
            ),
            scheduler=k8s_scheduler,
        )
        yield instance
def test_execute_display_command():
    with tempfile.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        run_config = {
            "solids": {
                "create_string_1_asset": {"config": {"input_str": "apple"}},
                "take_string_1_asset": {"config": {"input_str": "apple"}},
            },
            "resources": {"object_manager": {"config": {"base_dir": temp_dir}}},
        }

        # write run config to temp file
        # file is temp because intermediate storage directory is temporary
        with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f:
            f.write(yaml.dump(run_config))

        kwargs = {
            "config": (os.path.join(temp_dir, "pipeline_config.yaml"),),
            "pipeline": "asset_pipeline",
            "python_file": file_relative_path(
                __file__, "../../core_tests/execution_tests/memoized_dev_loop_pipeline.py"
            ),
            "tags": '{"dagster/is_memoized_run": "true"}',
        }

        with Capturing() as output:
            execute_list_versions_command(kwargs=kwargs, instance=instance)

        assert output

        # execute the pipeline once so that addresses have been populated.

        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success

        with Capturing() as output:
            execute_list_versions_command(kwargs=kwargs, instance=instance)

        assert output
Beispiel #4
0
def get_instance():
    with tempfile.TemporaryDirectory() as temp_dir:
        yield DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=SqliteRunStorage.from_local(temp_dir),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
Beispiel #5
0
def test_nuke():
    storage = SqliteRunStorage.mem()

    assert storage
    run_id = str(uuid.uuid4())

    storage.create_run(run_id=run_id, pipeline_name='some_pipeline')

    assert len(storage.all_runs) == 1

    storage.wipe()

    assert list(storage.all_runs) == []
def get_ephemeral_instance(temp_dir):
    run_store = SqliteRunStorage.from_local(temp_dir)
    event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
    compute_log_manager = LocalComputeLogManager(temp_dir)
    instance = DagsterInstance(
        instance_type=InstanceType.PERSISTENT,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=run_store,
        event_storage=event_store,
        compute_log_manager=compute_log_manager,
        run_launcher=DefaultRunLauncher(),
        run_coordinator=DefaultRunCoordinator(),
    )
    return instance
def broken_compute_log_manager_instance(fail_on_setup=False, fail_on_teardown=False):
    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            yield DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=SqliteRunStorage.from_local(temp_dir),
                event_storage=SqliteEventLogStorage(temp_dir),
                compute_log_manager=BrokenComputeLogManager(
                    fail_on_setup=fail_on_setup, fail_on_teardown=fail_on_teardown
                ),
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
Beispiel #8
0
def dagster_instance_with_k8s_scheduler(helm_postgres_url_for_k8s_run_launcher,
                                        run_launcher, k8s_scheduler,
                                        schedule_tempdir):  # pylint: disable=redefined-outer-name
    with DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(schedule_tempdir),
            run_storage=SqliteRunStorage.from_local(
                os.path.join(schedule_tempdir, "runs")),
            event_storage=PostgresEventLogStorage(
                helm_postgres_url_for_k8s_run_launcher),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
            schedule_storage=SqliteScheduleStorage.from_local(
                os.path.join(schedule_tempdir, "schedules")),
            scheduler=k8s_scheduler,
    ) as instance:
        yield instance
Beispiel #9
0
def test_sqlite_mem_storage():
    storage = SqliteRunStorage.mem()

    assert storage

    run_id = str(uuid.uuid4())

    storage.create_run(run_id=run_id, pipeline_name='some_pipeline')

    assert len(storage.all_runs) == 1

    run = storage.all_runs[0]

    assert run.run_id == run_id
    assert run.pipeline_name == 'some_pipeline'

    fetched_run = storage.get_run_by_id(run_id)
    assert fetched_run.run_id == run_id
    assert fetched_run.pipeline_name == 'some_pipeline'
Beispiel #10
0
def test_fs_stores():
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            return "easy"

        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            compute_log_manager = LocalComputeLogManager(temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=compute_log_manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
                settings={"telemetry": {
                    "enabled": False
                }},
            )

            result = execute_pipeline(simple, instance=instance)

            assert run_store.has_run(result.run_id)
            assert run_store.get_run_by_id(
                result.run_id).status == PipelineRunStatus.SUCCESS
            assert DagsterEventType.PIPELINE_SUCCESS in [
                event.dagster_event.event_type
                for event in event_store.get_logs_for_run(result.run_id)
                if event.is_dagster_event
            ]
            stats = event_store.get_stats_for_run(result.run_id)
            assert stats.steps_succeeded == 1
            assert stats.end_time is not None
Beispiel #11
0
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            PREFIX = "my_prefix"
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix=PREFIX,
                                          skip_empty_files=True)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)

            stderr_object = mock_s3_bucket.Object(
                key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err"
            ).get()
            assert stderr_object

            with pytest.raises(ClientError):
                # stdout is not uploaded because we do not print anything to stdout
                mock_s3_bucket.Object(
                    key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out"
                ).get()
def test_compute_log_manager(mock_s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(
            bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = mock_s3_bucket.Object(
            key="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        stderr_s3 = six.ensure_str(s3_object.get()["Body"].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
Beispiel #13
0
def create_sqlite_run_storage():
    with seven.TemporaryDirectory() as tempdir:
        yield SqliteRunStorage.from_local(tempdir)
def test_compute_log_manager(s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info('easy')
            print(HELLO_WORLD)
            return 'easy'

        easy()

    # Uses mock S3
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket=s3_bucket)

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(bucket=s3_bucket,
                                      prefix='my_prefix',
                                      local_dir=temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_launcher=CliApiRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = s3.get_object(
            Bucket=s3_bucket,
            Key='{prefix}/storage/{run_id}/compute_logs/easy.compute.err'.
            format(prefix='my_prefix', run_id=result.run_id),
        )
        stderr_s3 = six.ensure_str(s3_object['Body'].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                        'compute_logs')
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
Beispiel #15
0
def test_dev_loop_changing_versions():
    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_launcher=DefaultRunLauncher(),
            run_coordinator=DefaultRunCoordinator(),
        )

        run_config = {
            "solids": {
                "create_string_1_asset": {
                    "config": {
                        "input_str": "apple"
                    }
                },
                "take_string_1_asset": {
                    "config": {
                        "input_str": "apple"
                    }
                },
            },
            "resources": {
                "object_manager": {
                    "config": {
                        "base_dir": temp_dir
                    }
                }
            },
        }

        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success
        assert not get_step_keys_to_execute(asset_pipeline, run_config,
                                            "only_mode")

        run_config["solids"]["take_string_1_asset"]["config"][
            "input_str"] = "banana"

        assert get_step_keys_to_execute(
            asset_pipeline, run_config,
            "only_mode") == ["take_string_1_asset"]
        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success
        assert not get_step_keys_to_execute(asset_pipeline, run_config,
                                            "only_mode")
Beispiel #16
0
def test_dev_loop_changing_versions():
    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_launcher=CliApiRunLauncher(),
        )

        run_config = {
            "solids": {
                "create_string_1": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "create_string_2": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "take_string_1": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "take_string_2": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "take_string_two_inputs": {"config": {"input_str": "apple", "base_dir": temp_dir}},
            },
            "intermediate_storage": {"filesystem": {"config": {"base_dir": temp_dir}}},
        }

        result = execute_pipeline(
            basic_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success

        assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")

        run_config["solids"]["take_string_1"]["config"]["input_str"] = "banana"

        assert set(
            get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")
        ) == set(["take_string_1.compute", "take_string_two_inputs.compute"])

        result2 = execute_pipeline(
            basic_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result2.success

        assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")

        run_config["solids"]["take_string_two_inputs"]["config"]["input_str"] = "banana"

        assert get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") == [
            "take_string_two_inputs.compute"
        ]

        result3 = execute_pipeline(
            basic_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result3.success

        assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")
Beispiel #17
0
def test_compute_log_manager_with_envvar(gcs_bucket):
    @job
    def simple():
        @op
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with open(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")) as f:
        with tempfile.TemporaryDirectory() as temp_dir:
            with environ({"ENV_VAR": f.read(), "DAGSTER_HOME": temp_dir}):
                run_store = SqliteRunStorage.from_local(temp_dir)
                event_store = SqliteEventLogStorage(temp_dir)
                manager = GCSComputeLogManager(
                    bucket=gcs_bucket,
                    prefix="my_prefix",
                    local_dir=temp_dir,
                    json_credentials_envvar="ENV_VAR",
                )
                instance = DagsterInstance(
                    instance_type=InstanceType.PERSISTENT,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=run_store,
                    event_storage=event_store,
                    compute_log_manager=manager,
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=DefaultRunLauncher(),
                    ref=InstanceRef.from_dir(temp_dir),
                )
                result = simple.execute_in_process(instance=instance)
                compute_steps = [
                    event.step_key
                    for event in result.all_node_events
                    if event.event_type == DagsterEventType.STEP_START
                ]
                assert len(compute_steps) == 1
                step_key = compute_steps[0]

                stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
                assert stdout.data == HELLO_WORLD + SEPARATOR

                stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
                for expected in EXPECTED_LOGS:
                    assert expected in stderr.data

                # Check GCS directly
                stderr_gcs = (
                    storage.Client()
                    .bucket(gcs_bucket)
                    .blob(f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
                    .download_as_bytes()
                    .decode("utf-8")
                )

                for expected in EXPECTED_LOGS:
                    assert expected in stderr_gcs

                # Check download behavior by deleting locally cached logs
                compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
                for filename in os.listdir(compute_logs_dir):
                    os.unlink(os.path.join(compute_logs_dir, filename))

                stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
                assert stdout.data == HELLO_WORLD + SEPARATOR

                stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
                for expected in EXPECTED_LOGS:
                    assert expected in stderr.data
Beispiel #18
0
def test_compute_log_manager(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")
        print(HELLO_WORLD)  # pylint: disable=print-call
        return "easy"

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix="my_prefix",
                                          local_dir=temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)
            compute_steps = [
                event.step_key for event in result.all_node_events
                if event.event_type == DagsterEventType.STEP_START
            ]
            assert len(compute_steps) == 1
            step_key = compute_steps[0]

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data

            # Check S3 directly
            s3_object = mock_s3_bucket.Object(
                key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
            stderr_s3 = s3_object.get()["Body"].read().decode("utf-8")
            for expected in EXPECTED_LOGS:
                assert expected in stderr_s3

            # Check download behavior by deleting locally cached logs
            compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                            "compute_logs")
            for filename in os.listdir(compute_logs_dir):
                os.unlink(os.path.join(compute_logs_dir, filename))

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data
Beispiel #19
0
def test_compute_log_manager(
    mock_create_blob_client, mock_generate_blob_sas, storage_account, container, credential
):
    mock_generate_blob_sas.return_value = "fake-url"
    fake_client = FakeBlobServiceClient(storage_account)
    mock_create_blob_client.return_value = fake_client

    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = AzureBlobComputeLogManager(
            storage_account=storage_account,
            container=container,
            prefix="my_prefix",
            local_dir=temp_dir,
            secret_key=credential,
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check ADLS2 directly
        adls2_object = fake_client.get_blob_client(
            container=container,
            blob="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        adls2_stderr = adls2_object.download_blob().readall().decode("utf-8")
        for expected in EXPECTED_LOGS:
            assert expected in adls2_stderr

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data