Exemple #1
0
def test_slack_on_success():
    @solid
    def passing_solid(_):
        pass

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                "slack": ResourceDefinition.hardcoded_resource(MagicMock()),
                "base_url": ResourceDefinition.hardcoded_resource("foo"),
            })
    ])
    def basic_pipeline():
        passing_solid.with_hooks(hook_defs={slack_on_success})()

    result = execute_pipeline(basic_pipeline)

    assert result.success

    assert not any([
        event.event_type == DagsterEventType.HOOK_ERRORED
        for event in result.event_list
    ])
    assert any([
        event.event_type == DagsterEventType.HOOK_COMPLETED
        for event in result.event_list
    ])
Exemple #2
0
def test_filter_out_resources():
    @solid(required_resource_keys={"a"})
    def requires_resource_a(context):
        assert context.resources.a
        assert not hasattr(context.resources, "b")

    @solid(required_resource_keys={"b"})
    def requires_resource_b(context):
        assert not hasattr(context.resources, "a")
        assert context.resources.b

    @solid
    def not_resources(context):
        assert not hasattr(context.resources, "a")
        assert not hasattr(context.resources, "b")

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "a": ResourceDefinition.hardcoded_resource("foo"),
                    "b": ResourceDefinition.hardcoded_resource("bar"),
                })
        ], )
    def room_of_requirement():
        requires_resource_a()
        requires_resource_b()
        not_resources()

    execute_pipeline(room_of_requirement)
Exemple #3
0
def test_filter_out_resources():
    @solid(required_resource_keys={'a'})
    def requires_resource_a(context):
        assert context.resources.a
        assert not hasattr(context.resources, 'b')

    @solid(required_resource_keys={'b'})
    def requires_resource_b(context):
        assert not hasattr(context.resources, 'a')
        assert context.resources.b

    @solid
    def not_resources(context):
        assert not hasattr(context.resources, 'a')
        assert not hasattr(context.resources, 'b')

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    'a': ResourceDefinition.hardcoded_resource('foo'),
                    'b': ResourceDefinition.hardcoded_resource('bar'),
                })
        ], )
    def room_of_requirement():
        requires_resource_a()
        requires_resource_b()
        not_resources()

    execute_pipeline(room_of_requirement)
Exemple #4
0
def test_cache_file_from_s3_overwrite():
    with get_temp_dir() as temp_dir:
        s3_session_one = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session_one)),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}
                    }
                },
                'resources': {
                    'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}}
                },
            },
        )

        # assert the download occured
        assert s3_session_one.download_file.call_count == 1

        s3_session_two = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(s3_session_two),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}
                    }
                },
                'resources': {
                    'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}}
                },
            },
        )

        # assert the download did not occur because file is already there
        assert s3_session_two.download_file.call_count == 0
def test_cache_file_from_s3_overwrite():
    with tempfile.TemporaryDirectory() as temp_dir:
        s3_session_one = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session_one),
                }
            ),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}}
                    }
                },
                "resources": {
                    "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}}
                },
            },
        )

        # assert the download occurred
        assert s3_session_one.download_file.call_count == 1

        s3_session_two = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session_two),
                }
            ),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}}
                    }
                },
                "resources": {
                    "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}}
                },
            },
        )

        # assert the download did not occur because file is already there
        assert s3_session_two.download_file.call_count == 0
Exemple #6
0
def test_unzip_file_handle_on_fake_s3():
    foo_bytes = b"foo"

    @solid(required_resource_keys={"file_manager"}, output_defs=[OutputDefinition(S3FileHandle)])
    def write_zipped_file_to_s3_store(context):
        with get_temp_file_name() as zip_file_name:
            write_zip_file_to_disk(zip_file_name, "an_archive_member", foo_bytes)
            with open(zip_file_name, "rb") as ff:
                s3_file_handle = context.resources.file_manager.write_data(ff.read())
                return s3_file_handle

    # Uses mock S3
    # https://github.com/spulec/moto/issues/3292
    s3 = boto3.client("s3", region_name="us-east-1")
    s3.create_bucket(Bucket="some-bucket")
    file_manager = S3FileManager(s3_session=s3, s3_bucket="some-bucket", s3_base_key="dagster")

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "s3": ResourceDefinition.hardcoded_resource(s3),
                    "file_manager": ResourceDefinition.hardcoded_resource(file_manager),
                    "io_manager": s3_pickle_io_manager,
                },
            )
        ]
    )
    def do_test_unzip_file_handle_s3():
        return unzip_file_handle(write_zipped_file_to_s3_store())

    result = execute_pipeline(
        do_test_unzip_file_handle_s3,
        run_config={
            "resources": {"io_manager": {"config": {"s3_bucket": "some-bucket"}}},
            "solids": {
                "unzip_file_handle": {"inputs": {"archive_member": {"value": "an_archive_member"}}}
            },
        },
    )

    assert result.success

    zipped_s3_file = result.result_for_solid("write_zipped_file_to_s3_store").output_value()
    unzipped_s3_file = result.result_for_solid("unzip_file_handle").output_value()
    bucket_keys = [obj["Key"] for obj in s3.list_objects(Bucket="some-bucket")["Contents"]]

    assert zipped_s3_file.s3_key in bucket_keys
    assert unzipped_s3_file.s3_key in bucket_keys
    def test_check_data_ingest_job_retries_on_5xx(self):
        data_repo = Mock(spec=RepositoryApi)
        api_responses = [ApiException(status=502), {'failedFiles': 0}]
        data_repo.retrieve_job_result = Mock(side_effect=api_responses)
        mode_def = ModeDefinition(
            name='test',
            resource_defs={
                "data_repo_client":
                ResourceDefinition.hardcoded_resource(data_repo)
            })

        result: SolidExecutionResult = execute_solid(
            base_check_data_ingest_job_result,
            mode_def=mode_def,
            input_values={'job_id': JobId('fake_job_id')},
            run_config={
                'solids': {
                    'base_check_data_ingest_job_result': {
                        'config': {
                            'max_wait_time_seconds': 3,
                            'poll_interval_seconds': 1
                        }
                    }
                }
            })

        self.assertTrue(result.success,
                        "Poll ingest should not raise after a single 5xx")
def test_depends_on_s3_resource_intermediates():
    @solid(
        input_defs=[
            InputDefinition('num_one', Int),
            InputDefinition('num_two', Int)
        ],
        output_defs=[OutputDefinition(Int)],
    )
    def add_numbers(_, num_one, num_two):
        return num_one + num_two

    # Uses mock S3
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket='some-bucket')

    @pipeline(mode_defs=[
        ModeDefinition(
            system_storage_defs=s3_plus_default_storage_defs,
            resource_defs={'s3': ResourceDefinition.hardcoded_resource(s3)},
        )
    ])
    def s3_internal_pipeline():
        return add_numbers()

    result = execute_pipeline(
        s3_internal_pipeline,
        environment_dict={
            'solids': {
                'add_numbers': {
                    'inputs': {
                        'num_one': {
                            'value': 2
                        },
                        'num_two': {
                            'value': 4
                        }
                    }
                }
            },
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            },
        },
    )

    keys_in_bucket = [
        obj['Key'] for obj in s3.list_objects(Bucket='some-bucket')['Contents']
    ]
    assert result.success
    assert result.result_for_solid('add_numbers').output_value() == 6

    keys = set()
    for step_key, output_name in [('add_numbers.compute', 'result')]:
        keys.add(create_s3_key(result.run_id, step_key, output_name))

    assert set(keys_in_bucket) == keys
Exemple #9
0
def test_depends_on_s3_resource_file_manager():
    bar_bytes = 'bar'.encode()

    @solid(output_defs=[OutputDefinition(S3FileHandle)])
    def emit_file(context):
        return context.file_manager.write_data(bar_bytes)

    @solid(input_defs=[InputDefinition('file_handle', S3FileHandle)])
    def accept_file(context, file_handle):
        local_path = context.file_manager.copy_handle_to_local_temp(
            file_handle)
        assert isinstance(local_path, str)
        assert open(local_path, 'rb').read() == bar_bytes

    s3_fake_resource = create_s3_fake_resource()

    @pipeline(mode_defs=[
        ModeDefinition(
            system_storage_defs=s3_plus_default_storage_defs,
            resource_defs={
                's3': ResourceDefinition.hardcoded_resource(s3_fake_resource)
            },
        )
    ])
    def s3_file_manager_test():
        accept_file(emit_file())

    result = execute_pipeline(
        s3_file_manager_test,
        environment_dict={
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            }
        },
    )

    assert result.success

    keys_in_bucket = set(s3_fake_resource.buckets['some-bucket'].keys())

    for step_key, output_name in [
        ('emit_file.compute', 'result'),
        ('accept_file.compute', 'result'),
    ]:
        keys_in_bucket.remove(
            create_s3_key(result.run_id, step_key, output_name))

    assert len(keys_in_bucket) == 1

    file_key = list(keys_in_bucket)[0]
    comps = file_key.split('/')

    assert '/'.join(comps[:-1]) == 'dagster/storage/{run_id}/files'.format(
        run_id=result.run_id)

    assert uuid.UUID(comps[-1])
def test_cache_file_from_s3_specify_target_key():
    s3_session = mock.MagicMock()
    with tempfile.TemporaryDirectory() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session),
                }
            ),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}},
                        "config": {"file_key": "specified-file-key"},
                    }
                },
                "resources": {"file_cache": {"config": {"target_folder": temp_dir}}},
            },
        )

        # assert the download occurred
        assert s3_session.download_file.call_count == 1
        assert solid_result.success
        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert "specified-file-key" in solid_result.output_value().path_desc
Exemple #11
0
def test_runtime_metadata_fn():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    def runtime_metadata_fn(context, node_info):
        return {
            "op_name": context.solid_def.name,
            "dbt_model": node_info["name"]
        }

    assets = load_assets_from_dbt_manifest(
        manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn)
    assert_assets_match_project(assets)

    dbt = MagicMock()
    assets_job = build_assets_job(
        "assets_job",
        assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)})
    result = assets_job.execute_in_process()
    assert result.success

    for asset in assets:
        materializations = [
            event.event_specific_data.materialization
            for event in result.events_for_node(asset.op.name)
            if event.event_type_value == "ASSET_MATERIALIZATION"
        ]
        assert len(materializations) == 1
        assert materializations[0].metadata_entries == [
            EventMetadataEntry.text(asset.op.name, label="op_name"),
            EventMetadataEntry.text(asset.op.name, label="dbt_model"),
        ]
def test_hardcoded_resource():
    called = {}

    mock_obj = seven.mock.MagicMock()

    @solid(required_resource_keys={"hardcoded"})
    def solid_hardcoded(context):
        assert context.resources.hardcoded("called")
        called["yup"] = True

    pipeline = PipelineDefinition(
        name="hardcoded_resource",
        solid_defs=[solid_hardcoded],
        mode_defs=[
            ModeDefinition(
                resource_defs={"hardcoded": ResourceDefinition.hardcoded_resource(mock_obj)}
            )
        ],
    )

    result = execute_pipeline(pipeline)

    assert result.success
    assert called["yup"]
    mock_obj.assert_called_with("called")
def test_depends_on_s3_resource_intermediates():
    @solid(
        input_defs=[
            InputDefinition("num_one", Int),
            InputDefinition("num_two", Int)
        ],
        output_defs=[OutputDefinition(Int)],
    )
    def add_numbers(_, num_one, num_two):
        return num_one + num_two

    # Uses mock S3
    s3 = boto3.client("s3")
    s3.create_bucket(Bucket="some-bucket")

    @pipeline(mode_defs=[
        ModeDefinition(
            system_storage_defs=s3_plus_default_storage_defs,
            resource_defs={"s3": ResourceDefinition.hardcoded_resource(s3)},
        )
    ])
    def s3_internal_pipeline():
        return add_numbers()

    result = execute_pipeline(
        s3_internal_pipeline,
        run_config={
            "solids": {
                "add_numbers": {
                    "inputs": {
                        "num_one": {
                            "value": 2
                        },
                        "num_two": {
                            "value": 4
                        }
                    }
                }
            },
            "storage": {
                "s3": {
                    "config": {
                        "s3_bucket": "some-bucket"
                    }
                }
            },
        },
    )

    keys_in_bucket = [
        obj["Key"] for obj in s3.list_objects(Bucket="some-bucket")["Contents"]
    ]
    assert result.success
    assert result.result_for_solid("add_numbers").output_value() == 6

    keys = set()
    for step_key, output_name in [("add_numbers.compute", "result")]:
        keys.add(create_s3_key(result.run_id, step_key, output_name))

    assert set(keys_in_bucket) == keys
Exemple #14
0
def test_cache_file_from_s3_specify_target_key():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}},
                        'config': {'file_key': 'specified-file-key'},
                    }
                },
                'resources': {'file_cache': {'config': {'target_folder': temp_dir}}},
            },
        )

        # assert the download occured
        assert s3_session.download_file.call_count == 1
        assert solid_result.success
        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert 'specified-file-key' in solid_result.output_value().path_desc
Exemple #15
0
def test_asset_io_manager(gcs_bucket):
    @asset
    def upstream():
        return 2

    @asset
    def downstream(upstream):
        return 1 + upstream

    @asset(partitions_def=StaticPartitionsDefinition(["apple", "orange"]))
    def partitioned():
        return 8

    fake_gcs_client = FakeGCSClient()
    asset_group = AssetGroup(
        [upstream, downstream, partitioned],
        resource_defs={
            "io_manager":
            gcs_pickle_asset_io_manager.configured({
                "gcs_bucket": gcs_bucket,
                "gcs_prefix": "assets"
            }),
            "gcs":
            ResourceDefinition.hardcoded_resource(fake_gcs_client),
        },
    )
    asset_job = asset_group.build_job(name="my_asset_job")

    result = asset_job.execute_in_process(partition_key="apple")
    assert result.success
    assert fake_gcs_client.get_all_blob_paths() == {
        f"{gcs_bucket}/assets/upstream",
        f"{gcs_bucket}/assets/downstream",
        f"{gcs_bucket}/assets/partitioned/apple",
    }
def test_check_has_data_false():
    # need a bucket with a blob that has size 0, aka no content/empty content
    this_test_bucket = FakeGoogleBucket(
        {"gs://my-fake-bucket/fake-prefix": HexBlobInfo(hex_md5="b2d6ec45472467c836f253bd170182c7",
                                                        content="")}
    )

    this_test_mode = ModeDefinition(
        "test_check_has_data_mode",
        resource_defs={**load_table_test_mode.resource_defs}
    )
    this_test_mode.resource_defs["gcs"] = ResourceDefinition.hardcoded_resource(
        FakeGCSClient(
            buckets={test_bucket_name: this_test_bucket}
        )
    )

    result: SolidExecutionResult = execute_solid(
        check_has_data,
        mode_def=this_test_mode,
        input_values={
            "metadata_fanout_result": metadata_fanout_result
        },
        run_config=run_config
    )

    assert result.success
    assert not result.output_value("no_data")
Exemple #17
0
def test_execute_byfeature_parquet_lakehouse():
    with get_temp_dir() as temp_dir:
        lakehouse = ByFeatureParquetLakehouse(temp_dir)
        pipeline_def = construct_lakehouse_pipeline(
            name='test',
            lakehouse_tables=[TableOne, TableTwo, TableThree],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        'spark':
                        spark_session_resource,
                        'lakehouse':
                        ResourceDefinition.hardcoded_resource(lakehouse),
                    })
            ],
        )

        pipeline_result = execute_pipeline(pipeline_def)
        assert pipeline_result.success

        def get_table(table_def):
            spark = spark_session_from_config()
            return spark.read.parquet(
                os.path.join(temp_dir, table_def.metadata[FEATURE_AREA],
                             table_def.name)).collect()

        assert get_table(TableOne) == [Row(num=1)]
        assert get_table(TableTwo) == [Row(num=2)]
        assert set(get_table(TableThree)) == set([Row(num=1), Row(num=2)])
Exemple #18
0
def test_source_asset():
    @asset
    def asset1(source1):
        assert source1 == 5
        return 1

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            assert context.resource_config["a"] == 7
            assert context.resources.subresource == 9
            assert context.upstream_output.resources.subresource == 9
            return 5

    @io_manager(config_schema={"a": int},
                required_resource_keys={"subresource"})
    def my_io_manager(_):
        return MyIOManager()

    job = build_assets_job(
        "a",
        [asset1],
        source_assets=[
            SourceAsset(AssetKey("source1"),
                        io_manager_key="special_io_manager")
        ],
        resource_defs={
            "special_io_manager": my_io_manager.configured({"a": 7}),
            "subresource": ResourceDefinition.hardcoded_resource(9),
        },
    )
    assert job.graph.node_defs == [asset1.op]
    assert job.execute_in_process().success
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url='url',
        jdbc_url='url',
        dialect='dialect',
        load_table=mock.MagicMock(),
        host='host',
        db_name='db_name',
    )

    @solid
    def emit_mock(_):
        return mock.MagicMock(spec=DataFrame)

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                'db_info': ResourceDefinition.hardcoded_resource(db_info_mock),
                'spark': ResourceDefinition.hardcoded_resource(
                    mock.MagicMock()),
            })
    ])
    def load_df_test():
        load_data_to_database_from_spark(emit_mock())

    solid_result = execute_pipeline(
        load_df_test,
        environment_dict={
            'solids': {
                'load_data_to_database_from_spark': {
                    'config': {
                        'table_name': 'foo'
                    }
                }
            }
        },
    ).result_for_solid('load_data_to_database_from_spark')

    assert solid_result.success
    mats = solid_result.materializations_during_compute
    assert len(mats) == 1
    mat = mats[0]
    assert len(mat.metadata_entries) == 2
    entries = {me.label: me for me in mat.metadata_entries}
    assert entries['Host'].entry_data.text == 'host'
    assert entries['Db'].entry_data.text == 'db_name'
def test_depends_on_s3_resource_intermediates():
    @solid(
        input_defs=[
            InputDefinition('num_one', Int),
            InputDefinition('num_two', Int)
        ],
        output_defs=[OutputDefinition(Int)],
    )
    def add_numbers(_, num_one, num_two):
        return num_one + num_two

    s3_fake_resource = create_s3_fake_resource()

    @pipeline(mode_defs=[
        ModeDefinition(
            system_storage_defs=s3_plus_default_storage_defs,
            resource_defs={
                's3': ResourceDefinition.hardcoded_resource(s3_fake_resource)
            },
        )
    ])
    def s3_internal_pipeline():
        return add_numbers()

    result = execute_pipeline(
        s3_internal_pipeline,
        environment_dict={
            'solids': {
                'add_numbers': {
                    'inputs': {
                        'num_one': {
                            'value': 2
                        },
                        'num_two': {
                            'value': 4
                        }
                    }
                }
            },
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            },
        },
    )

    assert result.success
    assert result.result_for_solid('add_numbers').output_value() == 6

    assert 'some-bucket' in s3_fake_resource.session.buckets

    keys = set()
    for step_key, output_name in [('add_numbers.compute', 'result')]:
        keys.add(create_s3_key(result.run_id, step_key, output_name))

    assert set(s3_fake_resource.session.buckets['some-bucket'].keys()) == keys
def test_unzip_file_handle_on_fake_s3():
    foo_bytes = 'foo'.encode()

    @solid(output_defs=[OutputDefinition(S3FileHandle)])
    def write_zipped_file_to_s3_store(context):
        with get_temp_file_name() as zip_file_name:
            write_zip_file_to_disk(zip_file_name, 'an_archive_member',
                                   foo_bytes)
            with open(zip_file_name, 'rb') as ff:
                s3_file_handle = context.file_manager.write_data(ff.read())
                return s3_file_handle

    # Uses mock S3
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket='some-bucket')

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={'s3': ResourceDefinition.hardcoded_resource(s3)},
            system_storage_defs=[s3_system_storage],
        )
    ])
    def do_test_unzip_file_handle_s3():
        return unzip_file_handle(write_zipped_file_to_s3_store())

    result = execute_pipeline(
        do_test_unzip_file_handle_s3,
        environment_dict={
            'storage': {
                's3': {
                    'config': {
                        's3_bucket': 'some-bucket'
                    }
                }
            },
            'solids': {
                'unzip_file_handle': {
                    'inputs': {
                        'archive_member': {
                            'value': 'an_archive_member'
                        }
                    }
                }
            },
        },
    )

    assert result.success

    zipped_s3_file = result.result_for_solid(
        'write_zipped_file_to_s3_store').output_value()
    unzipped_s3_file = result.result_for_solid(
        'unzip_file_handle').output_value()
    bucket_keys = [
        obj['Key'] for obj in s3.list_objects(Bucket='some-bucket')['Contents']
    ]

    assert zipped_s3_file.s3_key in bucket_keys
    assert unzipped_s3_file.s3_key in bucket_keys
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url="url",
        jdbc_url="url",
        dialect="dialect",
        load_table=mock.MagicMock(),
        host="host",
        db_name="db_name",
    )

    @solid(
        required_resource_keys={"pyspark"},
        output_defs=[OutputDefinition(io_manager_key="pyspark_io_manager")],
    )
    def emit_mock(context):
        return context.resources.pyspark.spark_session.read.csv(
            file_relative_path(__file__, "../data/test.csv")
        )

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "db_info": ResourceDefinition.hardcoded_resource(db_info_mock),
                    "pyspark": pyspark_resource,
                    "pyspark_step_launcher": no_step_launcher,
                    "pyspark_io_manager": local_parquet_io_manager,
                    "io_manager": fs_io_manager,
                }
            )
        ]
    )
    def load_df_test():
        load_data_to_database_from_spark(emit_mock())

    with tempfile.TemporaryDirectory() as temp_dir:
        solid_result = execute_pipeline(
            load_df_test,
            run_config={
                "solids": {"load_data_to_database_from_spark": {"config": {"table_name": "foo"}}},
                "resources": {
                    "io_manager": {"config": {"base_dir": temp_dir}},
                    "pyspark_io_manager": {"config": {"base_dir": temp_dir}},
                },
            },
        ).result_for_solid("load_data_to_database_from_spark")

        assert solid_result.success
        mats = solid_result.materializations_during_compute
        assert len(mats) == 1
        mat = mats[0]
        assert len(mat.metadata_entries) == 2
        entries = {me.label: me for me in mat.metadata_entries}
        assert entries["Host"].entry_data.text == "host"
        assert entries["Db"].entry_data.text == "db_name"
Exemple #23
0
def test_hook_resource():
    slack_mock = mock.MagicMock()

    @job(
        resource_defs={"slack": ResourceDefinition.hardcoded_resource(slack_mock)},
    )
    def foo():
        a.with_hooks({slack_message_on_success, slack_message_on_failure})()

    foo.execute_in_process()
    assert slack_mock.chat.post_message.call_count == 1
Exemple #24
0
def test_cache_file_from_s3_basic():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        pipeline_result = execute_solid_with_resources(
            cache_file_from_s3,
            resources={
                'file_cache':
                fs_file_cache,
                's3':
                ResourceDefinition.hardcoded_resource(S3Resource(s3_session)),
            },
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {
                            'bucket_data': {
                                'bucket': 'some-bucket',
                                'key': 'some-key'
                            }
                        }
                    }
                },
                'resources': {
                    'file_cache': {
                        'config': {
                            'target_folder': temp_dir
                        }
                    }
                },
            },
        )

        # assert the download occured
        assert s3_session.download_file.call_count == 1

        assert pipeline_result.success

        solid_result = pipeline_result.result_for_solid('cache_file_from_s3')

        assert solid_result.success

        expectation_results = solid_result.expectation_results_during_compute
        assert len(expectation_results) == 1
        expectation_result = expectation_results[0]
        assert expectation_result.success
        assert expectation_result.label == 'file_handle_exists'
        path_in_metadata = expectation_result.metadata_entries[
            0].entry_data.path
        assert isinstance(path_in_metadata, str)
        assert os.path.exists(path_in_metadata)

        assert isinstance(solid_result.result_value(), LocalFileHandle)
        assert 'some-key' in solid_result.result_value().path_desc
Exemple #25
0
def test_load_from_manifest_json():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    assets = load_assets_from_dbt_manifest(manifest_json=manifest_json)
    assert_assets_match_project(assets)

    dbt = MagicMock()
    assets_job = build_assets_job(
        "assets_job",
        assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)})
    assert assets_job.execute_in_process().success
def create_file_handle_pipeline(temp_file_handle, s3_resource):
    @solid
    def emit_temp_handle(_):
        return temp_file_handle

    @pipeline(mode_defs=[
        ModeDefinition(resource_defs={
            "s3": ResourceDefinition.hardcoded_resource(s3_resource)
        })
    ])
    def test():
        return file_handle_to_s3(emit_temp_handle())

    return test
Exemple #27
0
def test_cache_file_from_s3_basic():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session),
                }),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {
                            "s3_coordinate": {
                                "bucket": "some-bucket",
                                "key": "some-key"
                            }
                        }
                    }
                },
                "resources": {
                    "file_cache": {
                        "config": {
                            "target_folder": temp_dir
                        }
                    }
                },
            },
        )

        # assert the download occurred
        assert s3_session.download_file.call_count == 1

        assert solid_result.success

        expectation_results = solid_result.expectation_results_during_compute
        assert len(expectation_results) == 1
        expectation_result = expectation_results[0]
        assert expectation_result.success
        assert expectation_result.label == "file_handle_exists"
        path_in_metadata = expectation_result.metadata_entries[
            0].entry_data.path
        assert isinstance(path_in_metadata, str)
        assert os.path.exists(path_in_metadata)

        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert "some-key" in solid_result.output_value().path_desc
Exemple #28
0
def test_hook_resource():
    slack_mock = mock.MagicMock()

    @pipeline(mode_defs=[
        ModeDefinition(
            "unittest",
            resource_defs={
                "slack": ResourceDefinition.hardcoded_resource(slack_mock)
            },
        ),
    ])
    def foo():
        a.with_hooks({slack_on_success, slack_on_failure})()

    execute_pipeline(foo)
    assert slack_mock.chat.post_message.call_count == 1
def create_file_handle_pipeline(temp_file_handle, s3_resource):
    # pylint: disable=no-value-for-parameter

    @solid
    def emit_temp_handle(_):
        return temp_file_handle

    @pipeline(mode_defs=[
        ModeDefinition(resource_defs={
            's3': ResourceDefinition.hardcoded_resource(s3_resource)
        })
    ])
    def test():
        return file_handle_to_s3(emit_temp_handle())

    return test
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url="url",
        jdbc_url="url",
        dialect="dialect",
        load_table=mock.MagicMock(),
        host="host",
        db_name="db_name",
    )

    @solid
    def emit_mock(_):
        return mock.MagicMock(spec=DataFrame)

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                "db_info": ResourceDefinition.hardcoded_resource(db_info_mock),
                "pyspark": pyspark_resource,
                "pyspark_step_launcher": no_step_launcher,
            })
    ])
    def load_df_test():
        load_data_to_database_from_spark(emit_mock())

    solid_result = execute_pipeline(
        load_df_test,
        run_config={
            "solids": {
                "load_data_to_database_from_spark": {
                    "config": {
                        "table_name": "foo"
                    }
                }
            }
        },
    ).result_for_solid("load_data_to_database_from_spark")

    assert solid_result.success
    mats = solid_result.materializations_during_compute
    assert len(mats) == 1
    mat = mats[0]
    assert len(mat.metadata_entries) == 2
    entries = {me.label: me for me in mat.metadata_entries}
    assert entries["Host"].entry_data.text == "host"
    assert entries["Db"].entry_data.text == "db_name"