Ejemplo n.º 1
0
def test_cache_file_from_s3_step_three_fake(snapshot):
    # https://github.com/spulec/moto/issues/3292
    s3 = boto3.client("s3", region_name="us-east-1")
    s3.create_bucket(Bucket="some-bucket")
    s3.put_object(Bucket="some-bucket", Key="some-key", Body=b"foo")

    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3),
            input_values={
                "s3_coord": {
                    "bucket": "some-bucket",
                    "key": "some-key"
                }
            },
        )

        target_file = os.path.join(temp_dir, "some-key")
        assert os.path.exists(target_file)

        with open(target_file, "rb") as ff:
            assert ff.read() == b"foo"

    snapshot.assert_match({
        "some-bucket": {
            k: s3.get_object(Bucket="some-bucket", Key=k)["Body"].read()
            for k in [
                obj["Key"]
                for obj in s3.list_objects(Bucket="some-bucket")["Contents"]
            ]
        }
    })
Ejemplo n.º 2
0
def get_external_repository_from_image(image):
    check.str_param(image, 'image')

    with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir:
        output_file_name = "{}.json".format(uuid4())
        command = 'dagster api snapshot repository'.format(
            output_file=os.path.join(DEFAULT_INTERNAL_VOLUME, output_file_name)
        )
        output = run_serialized_container_command(
            image=image,
            command=command,
            volumes={tmp_dir: {'bind': DEFAULT_INTERNAL_VOLUME, 'mode': DEFAULT_MODE}},
        )

        if len(output) != 1:
            print(output)
            raise DagsterInvariantViolationError(
                "Running command {command} in container {image} resulted in output of length "
                "{actual} lines, expected {expected} lines".format(
                    command=command, image=image, actual=len(output), expected=1
                )
            )

        serialized_external_repo_data = output[0]
        external_repo_data = deserialize_json_to_dagster_namedtuple(serialized_external_repo_data)

        if not isinstance(external_repo_data, ExternalRepositoryData):
            raise DagsterInvariantViolationError(
                "Deserialized snapshot is of type {received} must be a ExternalRepositoryData".format(
                    received=type(external_repo_data)
                )
            )
        return ExternalRepository(external_repo_data)
Ejemplo n.º 3
0
def test_execute_byfeature_parquet_lakehouse():
    with get_temp_dir() as temp_dir:
        lakehouse = ByFeatureParquetLakehouse(temp_dir)
        pipeline_def = construct_lakehouse_pipeline(
            name='test',
            lakehouse_tables=[TableOne, TableTwo, TableThree],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        'spark':
                        spark_session_resource,
                        'lakehouse':
                        ResourceDefinition.hardcoded_resource(lakehouse),
                    })
            ],
        )

        pipeline_result = execute_pipeline(pipeline_def)
        assert pipeline_result.success

        def get_table(table_def):
            spark = spark_session_from_config()
            return spark.read.parquet(
                os.path.join(temp_dir, table_def.metadata[FEATURE_AREA],
                             table_def.name)).collect()

        assert get_table(TableOne) == [Row(num=1)]
        assert get_table(TableTwo) == [Row(num=2)]
        assert set(get_table(TableThree)) == set([Row(num=1), Row(num=2)])
def test_execute_file_system_lakehouse(local_on_disk_spark_lakehouse,
                                       execute_spark_lakehouse_build):
    with get_temp_dir() as temp_dir:
        pipeline_result = execute_spark_lakehouse_build(
            tables=[TableOne, TableTwo, TableThree],
            lakehouse=local_on_disk_spark_lakehouse(temp_dir),
            environment_dict={
                'solids': {
                    'TableOne': {
                        'inputs': {
                            'num': {
                                'value': 1
                            }
                        }
                    }
                }
            },
        )

        assert pipeline_result.success

        def get_table(name):
            spark = spark_session_from_config()
            return spark.read.csv(os.path.join(temp_dir, name),
                                  header=True,
                                  inferSchema=True).collect()

        assert get_table('TableOne') == [Row(num=1)]
        assert get_table('TableTwo') == [Row(num=2)]
        assert set(get_table('TableThree')) == set([Row(num=1), Row(num=2)])
Ejemplo n.º 5
0
def test_cache_file_from_s3_specify_target_key():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}},
                        'config': {'file_key': 'specified-file-key'},
                    }
                },
                'resources': {'file_cache': {'config': {'target_folder': temp_dir}}},
            },
        )

        # assert the download occured
        assert s3_session.download_file.call_count == 1
        assert solid_result.success
        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert 'specified-file-key' in solid_result.output_value().path_desc
def test_cache_file_from_s3_step_two_use_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
            'boto3.client', new=lambda *_args, **_kwargs: boto_s3):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(resource_defs={'file_cache': fs_file_cache}),
            environment_dict={
                'resources': {
                    'file_cache': {
                        'config': {
                            'target_folder': temp_dir
                        }
                    }
                },
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {
                            's3_coord': {
                                'bucket': 'some-bucket',
                                'key': 'some-key'
                            }
                        }
                    }
                },
            },
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
Ejemplo n.º 7
0
def test_missing_resources():
    with pytest.raises(DagsterInvalidDefinitionError):
        with get_temp_dir() as temp_dir:
            execute_solid(
                cache_file_from_s3,
                ModeDefinition(resource_defs={"file_cache": fs_file_cache}),
                run_config={
                    "solids": {
                        "cache_file_from_s3": {
                            "inputs": {
                                "s3_coordinate": {
                                    "bucket": "some-bucket",
                                    "key": "some-key"
                                }
                            }
                        }
                    },
                    "resources": {
                        "file_cache": {
                            "config": {
                                "target_folder": temp_dir
                            }
                        }
                    },
                },
            )
Ejemplo n.º 8
0
def test_missing_resources():
    with pytest.raises(DagsterInvalidDefinitionError):
        with get_temp_dir() as temp_dir:
            execute_solid(
                cache_file_from_s3,
                ModeDefinition(resource_defs={'file_cache': fs_file_cache}),
                environment_dict={
                    'solids': {
                        'cache_file_from_s3': {
                            'inputs': {
                                's3_coordinate': {
                                    'bucket': 'some-bucket',
                                    'key': 'some-key'
                                }
                            }
                        }
                    },
                    'resources': {
                        'file_cache': {
                            'config': {
                                'target_folder': temp_dir
                            }
                        }
                    },
                },
            )
Ejemplo n.º 9
0
def get_container_snapshot(image):
    check.str_param(image, 'image')
    # Done to avoid memory leaks
    with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir:
        # TODO: Add better error handling when we move towards integrating with dagit.
        output_file_name = "{}.json".format(uuid4())
        run_serialized_container_command(
            image=image,
            command='dagster repository snapshot {output_file}'.format(
                output_file=os.path.join(DEFAULT_INTERNAL_VOLUME,
                                         output_file_name)),
            volumes={
                tmp_dir: {
                    'bind': DEFAULT_INTERNAL_VOLUME,
                    'mode': DEFAULT_MODE,
                }
            },
        )

        with open(os.path.join(tmp_dir, output_file_name), 'r') as fp:
            snapshot = deserialize_json_to_dagster_namedtuple(fp.read())
        if not isinstance(snapshot, RepositorySnapshot):
            raise DagsterInvariantViolationError(
                "Deserialized snapshot is of type {received} must be a RepositorySnapshot"
                .format(received=type(snapshot)))
    return snapshot
Ejemplo n.º 10
0
def _execute_pipeline_command(
    repository_file, pipeline_name, environment_dict, mode=None, solid_subset=None
):
    with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir:

        output_file_name = "{}.json".format(uuid4())
        output_file = os.path.join(tmp_dir, output_file_name)

        command = (
            "dagster api execute_pipeline -y {repository_file} {pipeline_name} "
            "{output_file} --environment-dict='{environment_dict}' --mode={mode}".format(
                repository_file=repository_file,
                pipeline_name=pipeline_name,
                output_file=output_file,
                environment_dict=json.dumps(environment_dict),
                mode=mode,
            )
        )

        if solid_subset:
            command += " --solid_subset={solid_subset}".format(solid_subset=",".join(solid_subset))

        os.popen(command)

        for message in ipc_read_event_stream(output_file):
            yield message
Ejemplo n.º 11
0
def test_cache_file_from_s3_step_three_fake(snapshot):
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket='some-bucket')
    s3.put_object(Bucket='some-bucket', Key='some-key', Body=b'foo')

    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3),
            input_values={
                's3_coord': {
                    'bucket': 'some-bucket',
                    'key': 'some-key'
                }
            },
        )

        target_file = os.path.join(temp_dir, 'some-key')
        assert os.path.exists(target_file)

        with open(target_file, 'rb') as ff:
            assert ff.read() == b'foo'

    snapshot.assert_match({
        'some-bucket': {
            k: s3.get_object(Bucket='some-bucket', Key=k)['Body'].read()
            for k in [
                obj['Key']
                for obj in s3.list_objects(Bucket='some-bucket')['Contents']
            ]
        }
    })
Ejemplo n.º 12
0
def test_pyspark_assets_pipeline():

    with get_temp_dir() as temp_dir:
        run_config = {
            "solids": {
                "get_max_temp_per_station": {
                    "config": {"temperature_file": "temperature.csv", "version_salt": "foo",}
                },
                "get_consolidated_location": {
                    "config": {"station_file": "stations.csv", "version_salt": "foo",}
                },
                "combine_dfs": {"config": {"version_salt": "foo",}},
                "pretty_output": {"config": {"version_salt": "foo",}},
            },
            "resources": {
                "source_data_dir": {
                    "config": {
                        "dir": "python_modules/dagster-test/dagster_test/toys/pyspark_assets/asset_pipeline_files"
                    }
                },
                "savedir": {"config": {"dir": temp_dir}},
            },
        }

        result = execute_pipeline(pyspark_assets_pipeline, run_config=run_config,)
        assert result.success
Ejemplo n.º 13
0
def get_active_repository_data_from_image(image):
    check.str_param(image, 'image')

    with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir:
        output_file_name = "{}.json".format(uuid4())
        run_serialized_container_command(
            image=image,
            command='dagster repository snapshot {output_file}'.format(
                output_file=os.path.join(DEFAULT_INTERNAL_VOLUME,
                                         output_file_name)),
            volumes={
                tmp_dir: {
                    'bind': DEFAULT_INTERNAL_VOLUME,
                    'mode': DEFAULT_MODE
                }
            },
        )

        active_repo_data = _get_active_repo_data(
            os.path.join(tmp_dir, output_file_name))
        if not isinstance(active_repo_data, ActiveRepositoryData):
            raise DagsterInvariantViolationError(
                "Deserialized snapshot is of type {received} must be a ActiveRepositoryData"
                .format(received=type(active_repo_data)))
        return active_repo_data
Ejemplo n.º 14
0
def test_fs_file_cache_write_binary_data():
    with get_temp_dir() as temp_dir:
        file_store = FSFileCache(temp_dir)
        assert not file_store.has_file_object("foo")
        assert file_store.write_binary_data("foo", b"bar")
        file_handle = file_store.get_file_handle("foo")
        assert isinstance(file_handle, LocalFileHandle)
        assert file_handle.path_desc == os.path.join(temp_dir, "foo")
Ejemplo n.º 15
0
def test_fs_file_cache_write_binary_data():
    with get_temp_dir() as temp_dir:
        file_store = FSFileCache(temp_dir)
        assert not file_store.has_file_object('foo')
        assert file_store.write_binary_data('foo', 'bar'.encode())
        file_handle = file_store.get_file_handle('foo')
        assert isinstance(file_handle, LocalFileHandle)
        assert file_handle.path_desc == os.path.join(temp_dir, 'foo')
Ejemplo n.º 16
0
def test_basic_file_manager_copy_handle_to_local_temp():
    foo_data = 'foo'.encode()
    with get_temp_dir() as temp_dir:
        with get_temp_file_handle_with_data(foo_data) as foo_handle:
            with local_file_manager(temp_dir) as manager:
                local_temp = manager.copy_handle_to_local_temp(foo_handle)
                assert local_temp != foo_handle.path
                with open(local_temp, 'rb') as ff:
                    assert ff.read() == foo_data
Ejemplo n.º 17
0
def test_fs_file_cache_write_data():
    bytes_object = io.BytesIO(b"bar")
    with get_temp_dir() as temp_dir:
        file_cache = FSFileCache(temp_dir)
        assert not file_cache.has_file_object("foo")
        assert file_cache.write_file_object("foo", bytes_object)
        file_handle = file_cache.get_file_handle("foo")
        assert isinstance(file_handle, LocalFileHandle)
        assert file_handle.path_desc == os.path.join(temp_dir, "foo")
def test_cache_file_from_s3_step_three_mock():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3_session),
            input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}},
        )

        assert s3_session.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
Ejemplo n.º 19
0
def test_cache_file_from_s3_basic():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache':
                    fs_file_cache,
                    's3':
                    ResourceDefinition.hardcoded_resource(
                        S3Resource(s3_session)),
                }),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {
                            's3_coordinate': {
                                'bucket': 'some-bucket',
                                'key': 'some-key'
                            }
                        }
                    }
                },
                'resources': {
                    'file_cache': {
                        'config': {
                            'target_folder': temp_dir
                        }
                    }
                },
            },
        )

        # assert the download occured
        assert s3_session.download_file.call_count == 1

        assert solid_result.success

        expectation_results = solid_result.expectation_results_during_compute
        assert len(expectation_results) == 1
        expectation_result = expectation_results[0]
        assert expectation_result.success
        assert expectation_result.label == 'file_handle_exists'
        path_in_metadata = expectation_result.metadata_entries[
            0].entry_data.path
        assert isinstance(path_in_metadata, str)
        assert os.path.exists(path_in_metadata)

        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert 'some-key' in solid_result.output_value().path_desc
Ejemplo n.º 20
0
def test_cache_file_from_s3_step_one_two():
    boto_s3 = mock.MagicMock()
    # mock.patch is difficult to get right and requires monkeypatching of global artifacts
    with get_temp_dir() as temp_dir, mock.patch(
        file_cache_folder.__module__ + ".file_cache_folder", new=lambda: temp_dir
    ), mock.patch("boto3.client", new=lambda *_args, **_kwargs: boto_s3):
        execute_solid(
            cache_file_from_s3,
            input_values=dict(s3_coord={"bucket": "some-bucket", "key": "some-key"}),
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
Ejemplo n.º 21
0
def test_pyspark_assets_job(executor_def):
    with get_temp_dir() as temp_dir:
        run_config = {
            "solids": {
                "get_max_temp_per_station": {
                    "config": {
                        "temperature_file": "temperature.csv",
                        "version_salt": "foo",
                    }
                },
                "get_consolidated_location": {
                    "config": {
                        "station_file": "stations.csv",
                        "version_salt": "foo",
                    }
                },
                "combine_dfs": {
                    "config": {
                        "version_salt": "foo",
                    }
                },
                "pretty_output": {
                    "config": {
                        "version_salt": "foo",
                    }
                },
            },
            "resources": {
                "source_data_dir": {
                    "config": {
                        "dir":
                        file_relative_path(
                            __file__,
                            "../dagster_test/graph_job_op_toys/pyspark_assets/asset_job_files",
                        ),
                    }
                },
                "savedir": {
                    "config": {
                        "dir": temp_dir
                    }
                },
            },
        }

        result = pyspark_assets.to_job(
            config=run_config,
            resource_defs=dir_resources,
            executor_def=executor_def).execute_in_process()
        assert result.success
def test_cache_file_from_s3_step_two_skip_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
        "boto3.client", new=lambda *_args, **_kwargs: boto_s3
    ):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition.from_resources({"file_cache": FSFileCache(temp_dir)}),
            input_values={"s3_coord": {"bucket": "some-bucket", "key": "some-key"}},
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
def test_cache_file_from_s3_step_two_skip_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
        'boto3.client', new=lambda *_args, **_kwargs: boto_s3
    ):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition.from_resources({'file_cache': FSFileCache(temp_dir)}),
            input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}},
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
Ejemplo n.º 24
0
def test_cache_file_from_s3_overwrite():
    with get_temp_dir() as temp_dir:
        s3_session_one = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session_one)),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}
                    }
                },
                'resources': {
                    'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}}
                },
            },
        )

        # assert the download occured
        assert s3_session_one.download_file.call_count == 1

        s3_session_two = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(s3_session_two),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}
                    }
                },
                'resources': {
                    'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}}
                },
            },
        )

        # assert the download did not occur because file is already there
        assert s3_session_two.download_file.call_count == 0
Ejemplo n.º 25
0
def test_cache_file_from_s3_basic():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session),
                }),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {
                            "s3_coordinate": {
                                "bucket": "some-bucket",
                                "key": "some-key"
                            }
                        }
                    }
                },
                "resources": {
                    "file_cache": {
                        "config": {
                            "target_folder": temp_dir
                        }
                    }
                },
            },
        )

        # assert the download occurred
        assert s3_session.download_file.call_count == 1

        assert solid_result.success

        expectation_results = solid_result.expectation_results_during_compute
        assert len(expectation_results) == 1
        expectation_result = expectation_results[0]
        assert expectation_result.success
        assert expectation_result.label == "file_handle_exists"
        path_in_metadata = expectation_result.metadata_entries[
            0].entry_data.path
        assert isinstance(path_in_metadata, str)
        assert os.path.exists(path_in_metadata)

        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert "some-key" in solid_result.output_value().path_desc
def test_cache_file_from_s3_step_three_fake(snapshot):
    s3_session = S3FakeSession({'some-bucket': {'some-key': b'foo'}})

    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3_session),
            input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}},
        )

        target_file = os.path.join(temp_dir, 'some-key')
        assert os.path.exists(target_file)

        with open(target_file, 'rb') as ff:
            assert ff.read() == b'foo'

        snapshot.assert_match(s3_session.buckets)
def test_cache_file_from_s3_step_two_use_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
        "boto3.client", new=lambda *_args, **_kwargs: boto_s3
    ):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(resource_defs={"file_cache": fs_file_cache}),
            run_config={
                "resources": {"file_cache": {"config": {"target_folder": temp_dir}}},
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coord": {"bucket": "some-bucket", "key": "some-key"}}
                    }
                },
            },
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
Ejemplo n.º 28
0
def test_cache_file_from_s3_step_one_one():
    boto_s3 = mock.MagicMock()
    # mock.patch is difficult to get right and requires monkeypatching of global artifacts
    with get_temp_dir() as temp_dir, mock.patch(
        file_cache_folder.__module__ + ".file_cache_folder", new=lambda: temp_dir
    ), mock.patch("boto3.client", new=lambda *_args, **_kwargs: boto_s3):

        @solid
        def emit_value():
            return {"bucket": "some-bucket", "key": "some-key"}

        @pipeline
        def pipe():

            return cache_file_from_s3(emit_value())

        execute_pipeline(pipe)

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
Ejemplo n.º 29
0
def test_cache_file_from_s3_specify_target_key():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session),
                }),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {
                            "s3_coordinate": {
                                "bucket": "some-bucket",
                                "key": "some-key"
                            }
                        },
                        "config": {
                            "file_key": "specified-file-key"
                        },
                    }
                },
                "resources": {
                    "file_cache": {
                        "config": {
                            "target_folder": temp_dir
                        }
                    }
                },
            },
        )

        # assert the download occurred
        assert s3_session.download_file.call_count == 1
        assert solid_result.success
        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert "specified-file-key" in solid_result.output_value().path_desc
Ejemplo n.º 30
0
def test_cache_file_from_s3_step_one_one():
    boto_s3 = mock.MagicMock()
    # mock.patch is difficult to get right and requires monkeypatching of global artifacts
    with get_temp_dir() as temp_dir, mock.patch(
            file_cache_folder.__module__ + '.file_cache_folder',
            new=lambda: temp_dir), mock.patch(
                'boto3.client', new=lambda *_args, **_kwargs: boto_s3):

        @solid
        def emit_value(_):
            return {'bucket': 'some-bucket', 'key': 'some-key'}

        @pipeline
        def pipe():
            # pylint: disable=no-value-for-parameter
            return cache_file_from_s3(emit_value())

        execute_pipeline(pipe)

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))