def test_download(): with tempfile.TemporaryDirectory() as temp_dir: test_job = build_assets_job( "test_job", assets=ASSETS, resource_defs={ "io_manager": fs_io_manager, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(), "parquet_io_manager": local_partitioned_parquet_io_manager.configured( {"base_path": temp_dir}), "warehouse_io_manager": mem_io_manager, "pyspark": pyspark_resource, "hn_client": hn_snapshot_client, }, ) result = test_job.execute_in_process(partition_key="2020-12-30-00:00") assert result.success
def test_download(): with tempfile.TemporaryDirectory() as temp_dir: result = download_comments_and_stories_dev.graph.execute_in_process( run_config={ "resources": { "partition_start": { "config": "2020-12-30 00:00:00" }, "partition_end": { "config": "2020-12-30 01:00:00" }, "parquet_io_manager": { "config": { "base_path": temp_dir } }, } }, resources={ "io_manager": fs_io_manager, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(), "parquet_io_manager": partitioned_parquet_io_manager, "warehouse_io_manager": mem_io_manager, "pyspark": pyspark_resource, "hn_client": hn_snapshot_client, }, ) assert result.success
def test_string_resource(): called = {} @solid(required_resource_keys={'test_string'}) def solid_test_string(context): assert context.resources.test_string == 'foo' called['yup'] = True pipeline = PipelineDefinition( name='test_string_resource', solid_defs=[solid_test_string], mode_defs=[ ModeDefinition(resource_defs={ 'test_string': ResourceDefinition.string_resource() }) ], ) result = execute_pipeline( pipeline, {'resources': { 'test_string': { 'config': 'foo' } }}) assert result.success assert called['yup']
def test_string_resource(): called = {} @solid(required_resource_keys={"test_string"}) def solid_test_string(context): assert context.resources.test_string == "foo" called["yup"] = True the_pipeline = PipelineDefinition( name="test_string_resource", solid_defs=[solid_test_string], mode_defs=[ ModeDefinition(resource_defs={ "test_string": ResourceDefinition.string_resource() }) ], ) result = execute_pipeline( the_pipeline, {"resources": { "test_string": { "config": "foo" } }}) assert result.success assert called["yup"]
def test_string_resource(): called = {} @solid def solid_test_string(info): assert info.context.resources.test_string == 'foo' called['yup'] = True pipeline = PipelineDefinition( name='test_string_resource', solids=[solid_test_string], context_definitions={ 'default': PipelineContextDefinition( resources={ 'test_string': ResourceDefinition.string_resource() }) }, ) result = execute_pipeline(pipeline, { 'context': { 'default': { 'resources': { 'test_string': { 'config': 'foo' } } } } }) assert result.success assert called['yup']
def test_download(): with tempfile.TemporaryDirectory() as temp_dir: test_job = AssetGroup.from_package_name( "hacker_news_assets.assets", resource_defs={ "io_manager": fs_io_manager, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(), "parquet_io_manager": local_partitioned_parquet_io_manager.configured( {"base_path": temp_dir} ), "warehouse_io_manager": mem_io_manager, "pyspark": pyspark_resource, "hn_client": hn_snapshot_client, "dbt": ResourceDefinition.none_resource(), }, ).build_job( "test_job", selection=["*comments", "*stories"], ) result = test_job.execute_in_process(partition_key="2020-12-30-00:00") assert result.success
"net.snowflake:snowflake-jdbc:3.8.0", "net.snowflake:spark-snowflake_2.12:2.8.2-spark_3.0", "com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.7", ] ), "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3native.NativeS3FileSystem", "spark.hadoop.fs.s3.awsAccessKeyId": os.getenv("AWS_ACCESS_KEY_ID", ""), "spark.hadoop.fs.s3.awsSecretAccessKey": os.getenv("AWS_SECRET_ACCESS_KEY", ""), "spark.hadoop.fs.s3.buffer.dir": "/tmp", } } DEV_RESOURCES = { "io_manager": fs_io_manager, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(), "parquet_io_manager": partitioned_parquet_io_manager.configured( {"base_path": get_system_temp_directory()} ), "warehouse_io_manager": fs_io_manager, "pyspark": pyspark_resource, "hn_client": hn_api_subsample_client.configured({"sample_rate": 10}), } PROD_RESOURCES = { "io_manager": s3_pickle_io_manager.configured({"s3_bucket": "hackernews-elementl-prod"}), "s3": s3_resource, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(),
"spark.hadoop.fs.s3.awsSecretAccessKey": os.getenv("AWS_SECRET_ACCESS_KEY", ""), "spark.hadoop.fs.s3.buffer.dir": "/tmp", } } MODE_TEST = ModeDefinition( name="test_local_data", description= "This mode queries snapshotted HN data and does all writes locally.", resource_defs={ "io_manager": fs_io_manager, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(), "parquet_io_manager": partitioned_parquet_io_manager, "db_io_manager": mem_io_manager, "pyspark": pyspark_resource, "hn_client": hn_snapshot_client, "slack": ResourceDefinition.mock_resource(), "base_url": ResourceDefinition.hardcoded_resource("http://localhost:3000", "Dagit URL"),