예제 #1
0
def test_blank_compute_logs(mock_s3_bucket):
    with tempfile.TemporaryDirectory() as temp_dir:
        manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                      prefix="my_prefix",
                                      local_dir=temp_dir)

        # simulate subscription to an in-progress run, where there is no key in the bucket
        stdout = manager.read_logs_file("my_run_id", "my_step_key",
                                        ComputeIOType.STDOUT)
        stderr = manager.read_logs_file("my_run_id", "my_step_key",
                                        ComputeIOType.STDERR)

        assert not stdout.data
        assert not stderr.data
예제 #2
0
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            PREFIX = "my_prefix"
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix=PREFIX,
                                          skip_empty_files=True)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)

            stderr_object = mock_s3_bucket.Object(
                key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err"
            ).get()
            assert stderr_object

            with pytest.raises(ClientError):
                # stdout is not uploaded because we do not print anything to stdout
                mock_s3_bucket.Object(
                    key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out"
                ).get()
def test_compute_log_manager(mock_s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(
            bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = mock_s3_bucket.Object(
            key="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        stderr_s3 = six.ensure_str(s3_object.get()["Body"].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
예제 #4
0
def test_compute_log_manager(s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info('easy')
            print(HELLO_WORLD)
            return 'easy'

        easy()

    # Uses mock S3
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket=s3_bucket)

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(bucket=s3_bucket,
                                      prefix='my_prefix',
                                      local_dir=temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_launcher=CliApiRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = s3.get_object(
            Bucket=s3_bucket,
            Key='{prefix}/storage/{run_id}/compute_logs/easy.compute.err'.
            format(prefix='my_prefix', run_id=result.run_id),
        )
        stderr_s3 = six.ensure_str(s3_object['Body'].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                        'compute_logs')
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
예제 #5
0
def test_compute_log_manager(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")
        print(HELLO_WORLD)  # pylint: disable=print-call
        return "easy"

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix="my_prefix",
                                          local_dir=temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)
            compute_steps = [
                event.step_key for event in result.all_node_events
                if event.event_type == DagsterEventType.STEP_START
            ]
            assert len(compute_steps) == 1
            step_key = compute_steps[0]

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data

            # Check S3 directly
            s3_object = mock_s3_bucket.Object(
                key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
            stderr_s3 = s3_object.get()["Body"].read().decode("utf-8")
            for expected in EXPECTED_LOGS:
                assert expected in stderr_s3

            # Check download behavior by deleting locally cached logs
            compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                            "compute_logs")
            for filename in os.listdir(compute_logs_dir):
                os.unlink(os.path.join(compute_logs_dir, filename))

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data