def test_configuring_solids_without_specifying_name(): @solid(config_schema=int) def return_int(context): return context.solid_config @composite_solid( config_schema={"num": int}, config_fn=lambda cfg: {"return_int": { "config": cfg["num"] }}) def return_int_composite(): return_int() with pytest.raises( DagsterInvalidDefinitionError, match= 'Missing string param "name" while attempting to configure the node "return_int', ): configured(return_int)(2) with pytest.raises( DagsterInvalidDefinitionError, match= 'Missing string param "name" while attempting to configure the node "return_int_composite"', ): configured(return_int_composite)({"num": 5})
def test_configured_composite_solid_with_inputs(): @solid(config_schema=str, input_defs=[InputDefinition("x", int)]) def return_int(context, x): assert context.solid_config == "inner config sentinel" return x return_int_x = configured(return_int, name="return_int_x")("inner config sentinel") @solid(config_schema=str) def add(context, lhs, rhs): assert context.solid_config == "outer config sentinel" return lhs + rhs @composite_solid( input_defs=[InputDefinition("x", int), InputDefinition("y", int)], config_schema={"outer": str}, config_fn=lambda cfg: {"add": {"config": cfg["outer"]}}, ) def return_int_composite(x, y): return add(return_int_x(x), return_int_x.alias("return_int_again")(y)) return_int_composite_x = configured(return_int_composite, name="return_int_composite")( {"outer": "outer config sentinel"} ) @pipeline def test_pipeline(): return_int_composite_x() result = execute_pipeline( test_pipeline, {"solids": {"return_int_composite": {"inputs": {"x": 6, "y": 4}}}} ) assert result.success assert result.result_for_solid("return_int_composite").output_value() == 10
def test_configuring_composite_solid_with_no_config_mapping(): @solid def return_run_id(context): return context.run_id @composite_solid def composite_without_config_fn(): return return_run_id() with pytest.raises( DagsterInvalidDefinitionError, match='Only composite solids utilizing config mapping can be pre-configured. The solid "composite_without_config_fn"', ): configured(composite_without_config_fn, name="configured_composite")({})
def test_dbt_cli_run(self, dbt_seed, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument test_solid = configured(dbt_cli_run, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir }) result = execute_solid(test_solid) assert result.success # Test asset materializations asset_materializations = [ event for event in result.step_events if event.event_type_value == "ASSET_MATERIALIZATION" ] solid_materializations = [ materialization for materialization in asset_materializations if materialization.asset_key.path[0] == "dbt_run_cli_output" ] assert len(solid_materializations) == 1 table_materializations = [ materialization for materialization in asset_materializations if materialization.asset_key.path[0] == "model" ] assert len(table_materializations) == 4
def test_adls_file_manager_resource(MockADLS2FileManager, MockADLS2Resource): did_it_run = dict(it_ran=False) resource_config = { "storage_account": "some-storage-account", "credential": { "key": "some-key", }, "adls2_file_system": "some-file-system", "adls2_prefix": "some-prefix", } @op(required_resource_keys={"file_manager"}) def test_solid(context): # test that we got back a ADLS2FileManager assert context.resources.file_manager == MockADLS2FileManager.return_value # make sure the file manager was initalized with the config we are supplying MockADLS2FileManager.assert_called_once_with( adls2_client=MockADLS2Resource.return_value.adls2_client, file_system=resource_config["adls2_file_system"], prefix=resource_config["adls2_prefix"], ) MockADLS2Resource.assert_called_once_with( resource_config["storage_account"], resource_config["credential"]["key"]) did_it_run["it_ran"] = True context = build_op_context(resources={ "file_manager": configured(adls2_file_manager)(resource_config) }, ) test_solid(context) assert did_it_run["it_ran"]
def test_intermediate_storage_dict_config_configured(): it = {} @intermediate_storage(required_resource_keys=set(), config_schema={"value": str}) def test_intermediate_storage(init_context): assert init_context.intermediate_storage_config[ "value"] == "secret testing value!!" it["ran"] = True return create_mem_system_intermediate_store(init_context) test_intermediate_storage_configured = configured( test_intermediate_storage)({ "value": "secret testing value!!" }) assert_pipeline_runs_with_intermediate_storage( test_intermediate_storage_configured, {"test_intermediate_storage": {}}) assert it["ran"] it = {} assert_pipeline_runs_with_intermediate_storage( test_intermediate_storage_configured, {"test_intermediate_storage": None}) assert it["ran"]
def test_load_tag_no_suffix(self): configured_tag = configured(load_tag)({ "load_tag_prefix": "fake_prefix", "append_timestamp": False }) with initialize_resource(configured_tag) as tag: self.assertEqual(tag, "fake_prefix")
def test_configured_solid_with_inputs(): @solid(config_schema=str, input_defs=[InputDefinition("x", int)]) def return_int(context, x): assert context.solid_config == "config sentinel" return x return_int_configured = configured( return_int, name="return_int_configured")("config sentinel") @pipeline def return_int_pipeline(): return_int_configured() result = execute_pipeline( return_int_pipeline, {"solids": { "return_int_configured": { "inputs": { "x": 6 } } }}) assert result.success assert result.result_for_solid("return_int_configured").output_value() == 6
def test_single_level_pipeline_with_configured_composite_solid(): @solid(config_schema={"inner": int}) def multiply_by_two(context): return context.solid_config["inner"] * 2 @solid def add(_context, lhs, rhs): return lhs + rhs @composite_solid( config_schema={"outer": int}, config_fn=lambda c: { "multiply_by_two": {"config": {"inner": c["outer"]}}, "multiply_by_two_again": {"config": {"inner": c["outer"]}}, }, ) def multiply_by_four(): return add(multiply_by_two(), multiply_by_two.alias("multiply_by_two_again")()) multiply_three_by_four = configured(multiply_by_four, name="multiply_three_by_four")( {"outer": 3} ) @pipeline def test_pipeline(): multiply_three_by_four() result = execute_pipeline(test_pipeline) assert result.success assert result.result_for_solid("multiply_three_by_four").output_value() == 12
def test_s3_file_manager_resource_with_profile(): resource_config = { "use_unsigned_session": True, "region_name": "us-west-1", "endpoint_url": "http://alternate-s3-host.io", "s3_bucket": "some-bucket", "s3_prefix": "some-prefix", "profile_name": "some-profile", } @op(required_resource_keys={"file_manager"}) def test_op(context): # placeholder function to test resource initialization return context.log.info("return from test_solid") with pytest.raises(DagsterResourceFunctionError) as e: context = build_op_context(resources={ "file_manager": configured(s3_file_manager)(resource_config) }, ) test_op(context) assert isinstance(e.value.user_exception, exceptions.ProfileNotFound) assert str(e.value.user_exception ) == "The config profile (some-profile) could not be found"
def test_load_tag_with_suffix(self): configured_tag = configured(load_tag)({ "load_tag_prefix": "fake_prefix", "append_timestamp": True }) with initialize_resource(configured_tag) as tag: self.assertTrue(tag.startswith("fake_prefix"))
def test_gcs_file_manger_resource(MockGCSFileManager, mock_storage_client_Client): did_it_run = dict(it_ran=False) resource_config = { "project": "some-project", "gcs_bucket": "some-bucket", "gcs_prefix": "some-prefix", } @op(required_resource_keys={"file_manager"}) def test_op(context): # test that we got back a GCSFileManager assert context.resources.file_manager == MockGCSFileManager.return_value # make sure the file manager was initalized with the config we are supplying MockGCSFileManager.assert_called_once_with( client=mock_storage_client_Client.return_value, gcs_bucket=resource_config["gcs_bucket"], gcs_base_key=resource_config["gcs_prefix"], ) mock_storage_client_Client.assert_called_once_with(project=resource_config["project"]) did_it_run["it_ran"] = True @job(resource_defs={"file_manager": configured(gcs_file_manager)(resource_config)}) def test_job(): test_op() test_job.execute_in_process() assert did_it_run["it_ran"]
def test_dbt_cli_test(self, dbt_seed, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument test_solid = configured(dbt_cli_test, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir }) result = execute_solid(test_solid) assert result.success
def test_dbt_cli_snapshot_freshness(self, dbt_seed, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument """This command will is a no-op without more arguments, but this test shows that it can invoked successfully.""" test_solid = configured(dbt_cli_snapshot_freshness, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir }) result = execute_solid(test_solid) assert result.success
def test_s3_file_manager_resource(MockS3FileManager, mock_boto3_resource): did_it_run = dict(it_ran=False) resource_config = { "use_unsigned_session": True, "region_name": "us-west-1", "endpoint_url": "http://alternate-s3-host.io", "s3_bucket": "some-bucket", "s3_prefix": "some-prefix", } mock_s3_session = mock_boto3_resource.return_value.meta.client @solid(required_resource_keys={"file_manager"}) def test_solid(context): # test that we got back a S3FileManager assert context.resources.file_manager == MockS3FileManager.return_value # make sure the file manager was initalized with the config we are supplying MockS3FileManager.assert_called_once_with( s3_session=mock_s3_session, s3_bucket=resource_config["s3_bucket"], s3_base_key=resource_config["s3_prefix"], ) _, call_kwargs = mock_boto3_resource.call_args mock_boto3_resource.assert_called_once_with( "s3", region_name=resource_config["region_name"], endpoint_url=resource_config["endpoint_url"], use_ssl=True, config=call_kwargs["config"], ) assert call_kwargs["config"].retries["max_attempts"] == 5 did_it_run["it_ran"] = True @pipeline( mode_defs=[ ModeDefinition( resource_defs={"file_manager": configured(s3_file_manager)(resource_config)}, ) ] ) def test_pipeline(): test_solid() execute_pipeline(test_pipeline) assert did_it_run["it_ran"]
def test_single_level_pipeline_with_complex_configured_solid(): @solid(config_schema={"age": int, "name": str}) def introduce(context): return "{name} is {age} years old".format(**context.solid_config) introduce_aj = configured(introduce, name="introduce_aj")({"age": 20, "name": "AJ"}) @pipeline def introduce_pipeline(): introduce_aj() result = execute_pipeline(introduce_pipeline) assert result.success assert result.result_for_solid("introduce_aj").output_value() == "AJ is 20 years old"
def test_single_level_pipeline_with_configured_solid(): @solid(config_schema=int) def return_int(context): return context.solid_config return_int_5 = configured(return_int, name="return_int_5")(5) @pipeline def return_int_pipeline(): return_int_5() result = execute_pipeline(return_int_pipeline) assert result.success assert result.result_for_solid("return_int_5").output_value() == 5
def test_logger_using_configured(): it = {'ran': False} @logger(config_schema=Field(str)) def test_logger(init_context): assert init_context.logger_config == 'secret testing value!!' it['ran'] = True logger_ = logging.Logger('test', level=coerce_valid_log_level('INFO')) return logger_ test_logger_configured = configured(test_logger)('secret testing value!!') assert_pipeline_runs_with_logger(test_logger_configured, {}) assert it['ran']
def test_logger_using_configured(): it = {"ran": False} @logger(config_schema=Field(str)) def test_logger(init_context): assert init_context.logger_config == "secret testing value!!" it["ran"] = True logger_ = logging.Logger("test", level=coerce_valid_log_level("INFO")) return logger_ test_logger_configured = configured(test_logger)("secret testing value!!") assert_pipeline_runs_with_logger(test_logger_configured, {}) assert it["ran"]
def test_dbt_cli_run_with_extra_config(self, dbt_seed, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument test_solid = configured(dbt_cli_run, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir, "threads": 1, "models": ["least_caloric"], "fail-fast": True, }) result = execute_solid(test_solid) assert result.success
def test_in_process_executor_dict_config_configured(): @executor(name='test_executor', config_schema={'value': str}) def test_executor(init_context): from dagster.core.executor.in_process import InProcessExecutor assert init_context.executor_config['value'] == 'secret testing value!!' return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=Retries.from_config({'enabled': {}}), marker_to_close=None, ) test_executor_configured = configured(test_executor)({'value': 'secret testing value!!'}) assert_pipeline_runs_with_executor([test_executor_configured], {'test_executor': None})
def test_dbt_cli_snapshot_with_extra_config( self, dbt_seed, test_project_dir, dbt_config_dir, ): # pylint: disable=unused-argument test_solid = configured(dbt_cli_snapshot, name="test_solid")( { "project-dir": test_project_dir, "profiles-dir": dbt_config_dir, "threads": 1, "select": ["sort_by_calories+"], "exclude": ["least_caloric"], }, ) result = execute_solid(test_solid) assert result.success
def test_dbt_cli_run(self, dbt_seed, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument test_solid = configured(dbt_cli_run, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir }) result = execute_solid(test_solid) assert result.success output = result.output_value() assert output.n_pass == 4 assert output.n_warn == 0 assert output.n_error == 0 assert output.n_skip == 0 assert output.n_total == 4
def test_dbt_rpc_single_op(self, op: str): op_solid, op_config = SINGLE_OP_CONFIGS[op] mocked_rpc_client = MagicMock(spec=DbtRpcClient) mocked_client_op_method = getattr(mocked_rpc_client, op) @resource def mock_dbt_rpc_resource(_init_context): return mocked_rpc_client response_sentinel_value = "<rpc response: {}>".format(uuid.uuid4()) request_token_sentinel_value = "<request token: {}>".format( uuid.uuid4()) mock_response = MagicMock() mock_response.text = response_sentinel_value mock_response.json.return_value = { "result": { "request_token": request_token_sentinel_value } } mocked_client_op_method.return_value = DbtRpcOutput( response=mock_response) configured_solid = configured(op_solid, name="configured_solid")(op_config) instance = DagsterInstance.ephemeral() result = execute_pipeline( PipelineDefinition( [configured_solid], name="test", mode_defs=[ ModeDefinition( resource_defs={"dbt_rpc": mock_dbt_rpc_resource}) ], ), instance=instance, ) mocked_client_op_method.assert_called_once_with(**op_config) assert (result.output_for_solid( configured_solid.name, "request_token") == request_token_sentinel_value) assert any(response_sentinel_value in event.message for event in instance.all_logs(result.run_id))
def test_run_all(self, dbt_rpc_server): # pylint: disable=unused-argument run_all_fast_poll = configured(dbt_rpc_run_and_wait, name="run_all_fast_poll")({ "interval": 2 }) dagster_result, dbt_output = output_for_solid_executed_with_rpc_resource( run_all_fast_poll) executed_model_from_result = set(res.node["unique_id"] for res in dbt_output.result.results) assert executed_model_from_result == TestDBTRunAndWaitSolid.ALL_MODELS_KEY_SET materialization_asset_keys = set( mat.asset_key.to_string() for mat in dagster_result.materializations_during_compute) assert materialization_asset_keys == TestDBTRunAndWaitSolid.ALL_MODELS_KEY_SET
def test_dbt_cli_test_with_extra_confg(self, dbt_seed, test_project_dir, dbt_config_dir, dbt_target_dir, monkeypatch): # pylint: disable=unused-argument # Specify dbt target path monkeypatch.setenv("DBT_TARGET_PATH", dbt_target_dir) test_solid = configured(dbt_cli_test, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir, "target-path": dbt_target_dir, }) result = execute_solid(test_solid) assert result.success
def test_dbt_cli_with_unset_env_var_in_profile(self, dbt_seed, test_project_dir, dbt_config_dir, monkeypatch): # pylint: disable=unused-argument monkeypatch.delenv("POSTGRES_TEST_DB_DBT_HOST") test_solid = configured(dbt_cli_run, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir }) with pytest.raises(DagsterDbtCliFatalRuntimeError) as exc: execute_solid(test_solid) failure: DagsterDbtCliFatalRuntimeError = exc.value assert "Env var required but not provided:" in failure.metadata_entries[ 1].entry_data.text
def test_dbt_cli_run_operation(self, dbt_seed, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument test_solid = configured(dbt_cli_run_operation, name="test_solid")({ "project-dir": test_project_dir, "profiles-dir": dbt_config_dir, "macro": "log_macro", "args": { "msg": "<<test succeded!>>" }, }) result = execute_solid(test_solid) assert result.success assert any("Log macro: <<test succeded!>>" in log["message"] for log in result.output_value()["logs"])
def test_s3_file_manger_resource(MockS3FileManager, mock_boto3_resource): did_it_run = dict(it_ran=False) resource_config = { 'use_unsigned_session': True, 'region_name': 'us-west-1', 'endpoint_url': 'http://alternate-s3-host.io', 's3_bucket': 'some-bucket', 's3_prefix': 'some-prefix', } mock_s3_session = mock_boto3_resource.return_value.meta.client @solid(required_resource_keys={'file_manager'}) def test_solid(context): # test that we got back a S3FileManager assert context.resources.file_manager == MockS3FileManager.return_value # make sure the file manager was initalized with the config we are supplying MockS3FileManager.assert_called_once_with( s3_session=mock_s3_session, s3_bucket=resource_config['s3_bucket'], s3_base_key=resource_config['s3_prefix'], ) mock_boto3_resource.assert_called_once_with( 's3', region_name=resource_config['region_name'], endpoint_url=resource_config['endpoint_url'], use_ssl=True, ) did_it_run['it_ran'] = True @pipeline(mode_defs=[ ModeDefinition(resource_defs={ 'file_manager': configured(s3_file_manager)(resource_config) }, ) ]) def test_pipeline(): test_solid() execute_pipeline(test_pipeline) assert did_it_run['it_ran']
def test_adls_file_manager_resource(MockADLS2FileManager, MockADLS2Resource): did_it_run = dict(it_ran=False) resource_config = { 'storage_account': 'some-storage-account', 'credential': { 'key': 'some-key', }, 'adls2_file_system': 'some-file-system', 'adls2_prefix': 'some-prefix', } @solid(required_resource_keys={'file_manager'}) def test_solid(context): # test that we got back a ADLS2FileManager assert context.resources.file_manager == MockADLS2FileManager.return_value # make sure the file manager was initalized with the config we are supplying MockADLS2FileManager.assert_called_once_with( adls2_client=MockADLS2Resource.return_value.adls2_client, file_system=resource_config['adls2_file_system'], prefix=resource_config['adls2_prefix'], ) MockADLS2Resource.assert_called_once_with( resource_config['storage_account'], resource_config['credential']['key']) did_it_run['it_ran'] = True @pipeline(mode_defs=[ ModeDefinition(resource_defs={ 'file_manager': configured(adls2_file_manager)(resource_config) }, ) ]) def test_pipeline(): test_solid() execute_pipeline(test_pipeline) assert did_it_run['it_ran']