Example #1
0
def test_cache_file_from_s3_step_three_fake(snapshot):
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket='some-bucket')
    s3.put_object(Bucket='some-bucket', Key='some-key', Body=b'foo')

    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3),
            input_values={
                's3_coord': {
                    'bucket': 'some-bucket',
                    'key': 'some-key'
                }
            },
        )

        target_file = os.path.join(temp_dir, 'some-key')
        assert os.path.exists(target_file)

        with open(target_file, 'rb') as ff:
            assert ff.read() == b'foo'

    snapshot.assert_match({
        'some-bucket': {
            k: s3.get_object(Bucket='some-bucket', Key=k)['Body'].read()
            for k in [
                obj['Key']
                for obj in s3.list_objects(Bucket='some-bucket')['Contents']
            ]
        }
    })
Example #2
0
def test_single_solid_type_checking_output_error():
    @lambda_solid(output_def=OutputDefinition(Int))
    def return_string():
        return 'ksjdfkjd'

    with pytest.raises(DagsterTypeCheckError):
        execute_solid(return_string)
Example #3
0
def test_cache_file_from_s3_step_three_fake(snapshot):
    # https://github.com/spulec/moto/issues/3292
    s3 = boto3.client("s3", region_name="us-east-1")
    s3.create_bucket(Bucket="some-bucket")
    s3.put_object(Bucket="some-bucket", Key="some-key", Body=b"foo")

    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3),
            input_values={
                "s3_coord": {
                    "bucket": "some-bucket",
                    "key": "some-key"
                }
            },
        )

        target_file = os.path.join(temp_dir, "some-key")
        assert os.path.exists(target_file)

        with open(target_file, "rb") as ff:
            assert ff.read() == b"foo"

    snapshot.assert_match({
        "some-bucket": {
            k: s3.get_object(Bucket="some-bucket", Key=k)["Body"].read()
            for k in [
                obj["Key"]
                for obj in s3.list_objects(Bucket="some-bucket")["Contents"]
            ]
        }
    })
def test_single_solid_with_context_config():
    @resource(config_schema=Field(Int, is_required=False, default_value=2))
    def num_resource(init_context):
        return init_context.resource_config

    ran = {"count": 0}

    @solid(required_resource_keys={"num"})
    def check_context_config_for_two(context):
        assert context.resources.num == 2
        ran["count"] += 1

    result = execute_solid(
        check_context_config_for_two,
        run_config={"resources": {
            "num": {
                "config": 2
            }
        }},
        mode_def=ModeDefinition(resource_defs={"num": num_resource}),
    )

    assert result.success
    assert ran["count"] == 1

    result = execute_solid(
        check_context_config_for_two,
        mode_def=ModeDefinition(resource_defs={"num": num_resource}))

    assert result.success
    assert ran["count"] == 2
Example #5
0
def test_composites():
    @lambda_solid
    def hello():
        return 'hello'

    @composite_solid
    def hello_composite():
        return hello()

    result = execute_solid(hello)
    assert result.success
    assert result.output_value() == 'hello'
    assert result.output_values == {'result': 'hello'}

    result = execute_solid(hello_composite)
    assert result.success
    assert result.output_value() == 'hello'
    assert result.output_values == {'result': 'hello'}
    assert result.output_values_for_solid('hello') == {'result': 'hello'}
    assert result.output_value_for_handle('hello') == 'hello'

    nested_result = result.result_for_solid('hello')
    assert nested_result.success
    assert nested_result.output_value() == 'hello'
    assert len(result.solid_result_list) == 1
    assert nested_result.output_values == {'result': 'hello'}

    with pytest.raises(
        DagsterInvariantViolationError,
        match=re.escape(
            'Tried to get result for solid \'goodbye\' in \'hello_composite\'. No such top level '
            'solid'
        ),
    ):
        _ = result.result_for_solid('goodbye')
def test_cache_file_from_s3_step_two_use_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
            'boto3.client', new=lambda *_args, **_kwargs: boto_s3):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(resource_defs={'file_cache': fs_file_cache}),
            environment_dict={
                'resources': {
                    'file_cache': {
                        'config': {
                            'target_folder': temp_dir
                        }
                    }
                },
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {
                            's3_coord': {
                                'bucket': 'some-bucket',
                                'key': 'some-key'
                            }
                        }
                    }
                },
            },
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
def test_single_solid_with_context_config():
    ran = {'check_context_config_for_two': 0}

    @solid
    def check_context_config_for_two(info):
        assert info.resources == 2
        ran['check_context_config_for_two'] += 1

    pipeline_def = PipelineDefinition(
        solids=[check_context_config_for_two],
        context_definitions={
            'test_context': PipelineContextDefinition(
                config_field=Field(Int, is_optional=True, default_value=2),
                context_fn=lambda info: ExecutionContext(resources=info.config),
            )
        },
    )

    result = execute_solid(
        pipeline_def,
        'check_context_config_for_two',
        environment={'context': {'test_context': {'config': 2}}},
    )

    assert result.success
    assert ran['check_context_config_for_two'] == 1

    result = execute_solid(pipeline_def, 'check_context_config_for_two')

    assert result.success
    assert ran['check_context_config_for_two'] == 2
def test_composites():
    @lambda_solid
    def hello():
        return "hello"

    @composite_solid
    def hello_composite():
        return hello()

    result = execute_solid(hello)
    assert result.success
    assert result.output_value() == "hello"
    assert result.output_values == {"result": "hello"}

    result = execute_solid(hello_composite)
    assert result.success
    assert result.output_value() == "hello"
    assert result.output_values == {"result": "hello"}
    assert result.output_values_for_solid("hello") == {"result": "hello"}
    assert result.output_value_for_handle("hello") == "hello"

    nested_result = result.result_for_solid("hello")
    assert nested_result.success
    assert nested_result.output_value() == "hello"
    assert len(result.solid_result_list) == 1
    assert nested_result.output_values == {"result": "hello"}

    with pytest.raises(
            DagsterInvariantViolationError,
            match=re.escape(
                "Tried to get result for solid 'goodbye' in 'hello_composite'. No such top level "
                "solid"),
    ):
        _ = result.result_for_solid("goodbye")
Example #9
0
def test_single_solid_with_context_config():
    @resource(config_field=Field(Int, is_optional=True, default_value=2))
    def num_resource(init_context):
        return init_context.resource_config

    ran = {'count': 0}

    @solid
    def check_context_config_for_two(context):
        assert context.resources.num == 2
        ran['count'] += 1

    result = execute_solid(
        check_context_config_for_two,
        environment_dict={'resources': {'num': {'config': 2}}},
        mode_def=ModeDefinition(resource_defs={'num': num_resource}),
    )

    assert result.success
    assert ran['count'] == 1

    result = execute_solid(
        check_context_config_for_two, mode_def=ModeDefinition(resource_defs={'num': num_resource})
    )

    assert result.success
    assert ran['count'] == 2
Example #10
0
def test_single_solid_type_checking_output_error():
    @lambda_solid(output=OutputDefinition(types.Int))
    def return_string():
        return 'ksjdfkjd'

    pipeline_def = PipelineDefinition(solids=[return_string])

    with pytest.raises(DagsterInvariantViolationError):
        execute_solid(pipeline_def, 'return_string')
def test_failing_solid_in_isolation():
    class ThisException(Exception):
        pass

    @lambda_solid
    def throw_an_error():
        raise ThisException("nope")

    with pytest.raises(ThisException) as e_info:
        execute_solid(throw_an_error)

    assert isinstance(e_info.value, ThisException)
def test_single_solid_error():
    class SomeError(Exception):
        pass

    @lambda_solid
    def throw_error():
        raise SomeError()

    with pytest.raises(SomeError) as e_info:
        execute_solid(throw_error)

    assert isinstance(e_info.value, SomeError)
Example #13
0
def test_failing_solid_execute_solid():
    class ThisException(Exception):
        pass

    @lambda_solid
    def throw_an_error():
        raise ThisException('nope')

    pipeline_def = PipelineDefinition(solids=[throw_an_error])

    with pytest.raises(ThisException):
        execute_solid(pipeline_def, 'throw_an_error')
Example #14
0
def test_single_solid_error():
    class SomeError(Exception):
        pass

    @lambda_solid
    def throw_error():
        raise SomeError()

    pipeline_def = PipelineDefinition(solids=[throw_error])

    with pytest.raises(SomeError):
        execute_solid(pipeline_def, 'throw_error')
Example #15
0
def test_another_new_solid(capsys):
    assert another_configured_example.name == "another_configured_example"
    execute_solid(
        another_configured_example,
        None,
        None,
        None,
        {"solids": {"another_configured_example": {"config": 6}}},
    )

    captured = capsys.readouterr()
    assert captured.err.count("wheaties") == 6
Example #16
0
def test_failing_solid_in_isolation():
    class ThisException(Exception):
        pass

    @lambda_solid
    def throw_an_error():
        raise ThisException('nope')

    with pytest.raises(DagsterExecutionStepExecutionError) as e_info:
        execute_solid(throw_an_error)

    assert isinstance(e_info.value.__cause__, ThisException)
Example #17
0
def test_single_solid_error():
    class SomeError(Exception):
        pass

    @lambda_solid
    def throw_error():
        raise SomeError()

    with pytest.raises(DagsterExecutionStepExecutionError) as e_info:
        execute_solid(throw_error)

    assert isinstance(e_info.value.__cause__, SomeError)
def test_cache_file_from_s3_step_three_mock():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3_session),
            input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}},
        )

        assert s3_session.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
Example #19
0
def test_single_solid_error():
    class SomeError(Exception):
        pass

    @lambda_solid
    def throw_error():
        raise SomeError()

    pipeline_def = PipelineDefinition(solid_defs=[throw_error])

    with pytest.raises(DagsterExecutionStepExecutionError) as e_info:
        execute_solid(pipeline_def, 'throw_error')

    assert isinstance(e_info.value.__cause__, SomeError)
Example #20
0
def test_failing_solid_execute_solid():
    class ThisException(Exception):
        pass

    @lambda_solid
    def throw_an_error():
        raise ThisException('nope')

    pipeline_def = PipelineDefinition(solid_defs=[throw_an_error])

    with pytest.raises(DagsterExecutionStepExecutionError) as e_info:
        execute_solid(pipeline_def, 'throw_an_error')

    assert isinstance(e_info.value.__cause__, ThisException)
def test_cache_file_from_s3_step_two_skip_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
        "boto3.client", new=lambda *_args, **_kwargs: boto_s3
    ):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition.from_resources({"file_cache": FSFileCache(temp_dir)}),
            input_values={"s3_coord": {"bucket": "some-bucket", "key": "some-key"}},
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
Example #22
0
def test_cache_file_from_s3_step_one_two():
    boto_s3 = mock.MagicMock()
    # mock.patch is difficult to get right and requires monkeypatching of global artifacts
    with get_temp_dir() as temp_dir, mock.patch(
        file_cache_folder.__module__ + ".file_cache_folder", new=lambda: temp_dir
    ), mock.patch("boto3.client", new=lambda *_args, **_kwargs: boto_s3):
        execute_solid(
            cache_file_from_s3,
            input_values=dict(s3_coord={"bucket": "some-bucket", "key": "some-key"}),
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
def test_cache_file_from_s3_step_two_skip_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
        'boto3.client', new=lambda *_args, **_kwargs: boto_s3
    ):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition.from_resources({'file_cache': FSFileCache(temp_dir)}),
            input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}},
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
def test_single_solid_with_bad_inputs():
    @lambda_solid(input_defs=[
        InputDefinition("num_one", int),
        InputDefinition("num_two", int)
    ])
    def add_solid(num_one, num_two):
        return num_one + num_two

    result = execute_solid(
        add_solid,
        input_values={
            "num_one": 2,
            "num_two": "three"
        },
        run_config={
            "loggers": {
                "console": {
                    "config": {
                        "log_level": "DEBUG"
                    }
                }
            }
        },
        raise_on_error=False,
    )

    assert not result.success
    assert result.failure_data.error.cls_name == "DagsterTypeCheckDidNotPass"
    assert ('Type check failed for step input "num_two" - expected type "Int"'
            in result.failure_data.error.message)
Example #25
0
def test_composite_with_no_output_mappings():
    a_source = define_stub_solid('A_source', [input_set('A_input')])
    node_a = create_root_solid('A')
    node_b = create_solid_with_deps('B', node_a)
    node_c = create_solid_with_deps('C', node_a)
    node_d = create_solid_with_deps('D', node_b, node_c)

    @composite_solid
    def diamond_composite():
        a = node_a(a_source())
        node_d(B=node_b(a), C=node_c(a))

    res = execute_solid(diamond_composite)

    assert res.success

    assert res.output_values == {}

    with pytest.raises(
        DagsterInvariantViolationError,
        match=re.escape(
            'Output \'result\' not defined in composite solid \'diamond_composite\': no output '
            'mappings were defined. If you were expecting this output to be present, you may be '
            'missing an output_mapping from an inner solid to its enclosing composite solid.'
        ),
    ):
        _ = res.output_value()

    assert len(res.solid_result_list) == 5
def test_cache_file_from_s3_step_four(snapshot):
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket='source-bucket')
    s3.create_bucket(Bucket='file-cache-bucket')
    s3.put_object(Bucket='source-bucket', Key='source-file', Body=b'foo')

    solid_result = execute_solid(
        cache_file_from_s3,
        unittest_for_aws_mode_def(s3),
        input_values={
            's3_coord': {
                'bucket': 'source-bucket',
                'key': 'source-file'
            }
        },
    )

    assert solid_result.output_value(
    ).path_desc == 's3://file-cache-bucket/file-cache/source-file'

    file_cache_obj = s3.get_object(Bucket='file-cache-bucket',
                                   Key='file-cache/source-file')

    assert file_cache_obj['Body'].read() == b'foo'

    snapshot.assert_match({
        'file-cache-bucket': {
            k: s3.get_object(Bucket='file-cache-bucket', Key=k)['Body'].read()
            for k in [
                obj['Key'] for obj in s3.list_objects(
                    Bucket='file-cache-bucket')['Contents']
            ]
        }
    })
Example #27
0
def test_single_solid_with_multiple_inputs():
    @lambda_solid(input_defs=[
        InputDefinition(name='num_one'),
        InputDefinition('num_two')
    ])
    def add_solid(num_one, num_two):
        return num_one + num_two

    result = execute_solid(
        add_solid,
        input_values={
            'num_one': 2,
            'num_two': 3
        },
        environment_dict={
            'loggers': {
                'console': {
                    'config': {
                        'log_level': 'DEBUG'
                    }
                }
            }
        },
    )

    assert result.success
    assert result.output_value() == 5
Example #28
0
def test_cache_file_from_s3_step_four(snapshot):
    s3 = boto3.client("s3")
    s3.create_bucket(Bucket="source-bucket")
    s3.create_bucket(Bucket="file-cache-bucket")
    s3.put_object(Bucket="source-bucket", Key="source-file", Body=b"foo")

    solid_result = execute_solid(
        cache_file_from_s3,
        unittest_for_aws_mode_def(s3),
        input_values={
            "s3_coord": {
                "bucket": "source-bucket",
                "key": "source-file"
            }
        },
    )

    assert solid_result.output_value(
    ).path_desc == "s3://file-cache-bucket/file-cache/source-file"

    file_cache_obj = s3.get_object(Bucket="file-cache-bucket",
                                   Key="file-cache/source-file")

    assert file_cache_obj["Body"].read() == b"foo"

    snapshot.assert_match({
        "file-cache-bucket": {
            k: s3.get_object(Bucket="file-cache-bucket", Key=k)["Body"].read()
            for k in [
                obj["Key"] for obj in s3.list_objects(
                    Bucket="file-cache-bucket")["Contents"]
            ]
        }
    })
def test_single_solid_with_multiple_inputs():
    @lambda_solid(input_defs=[
        InputDefinition(name="num_one"),
        InputDefinition("num_two")
    ])
    def add_solid(num_one, num_two):
        return num_one + num_two

    result = execute_solid(
        add_solid,
        input_values={
            "num_one": 2,
            "num_two": 3
        },
        run_config={
            "loggers": {
                "console": {
                    "config": {
                        "log_level": "DEBUG"
                    }
                }
            }
        },
    )

    assert result.success
    assert result.output_value() == 5
def test_cache_file_from_s3_step_four(snapshot):
    s3_session = S3FakeSession({'source-bucket': {'source-file': b'foo'}})
    s3_file_cache_session = S3FakeSession()

    solid_result = execute_solid(
        cache_file_from_s3,
        unittest_for_aws_mode_def(s3_file_cache_session, s3_session),
        input_values={
            's3_coord': {
                'bucket': 'source-bucket',
                'key': 'source-file'
            }
        },
    )

    assert solid_result.output_value(
    ).path_desc == 's3://file-cache-bucket/file-cache/source-file'

    file_cache_obj = s3_file_cache_session.get_object(
        Bucket='file-cache-bucket', Key='file-cache/source-file')

    assert file_cache_obj['Body'].read() == b'foo'

    # just perform a snapshot of the bucket structure as well
    snapshot.assert_match(s3_file_cache_session.buckets)