def test_multi_python_environment_workspace(): workspace = load_workspace_from_config( yaml.safe_load(_get_multi_location_workspace_yaml()), # fake out as if it were loaded by a yaml file in this directory file_relative_path(__file__, 'not_a_real.yaml'), UserProcessApi.CLI, ) assert isinstance(workspace, Workspace) assert len(workspace.repository_location_handles) == 6 assert workspace.has_repository_location_handle('loaded_from_file') assert workspace.has_repository_location_handle('loaded_from_module') assert workspace.has_repository_location_handle('named_loaded_from_file') assert workspace.has_repository_location_handle('named_loaded_from_module') loaded_from_file_handle = workspace.get_repository_location_handle( 'loaded_from_file') assert set( loaded_from_file_handle.repository_code_pointer_dict.keys()) == { 'hello_world_repository' } assert isinstance(loaded_from_file_handle, PythonEnvRepositoryLocationHandle) loaded_from_module_handle = workspace.get_repository_location_handle( 'loaded_from_module') assert set( loaded_from_module_handle.repository_code_pointer_dict.keys()) == { 'hello_world_repository' } assert isinstance(loaded_from_file_handle, PythonEnvRepositoryLocationHandle) named_loaded_from_file_handle = workspace.get_repository_location_handle( 'named_loaded_from_file') assert set( named_loaded_from_file_handle.repository_code_pointer_dict.keys()) == { 'hello_world_repository_name' } assert isinstance(named_loaded_from_file_handle, PythonEnvRepositoryLocationHandle) named_loaded_from_module_handle = workspace.get_repository_location_handle( 'named_loaded_from_module') assert set(named_loaded_from_module_handle.repository_code_pointer_dict. keys()) == {'hello_world_repository_name'} assert isinstance(named_loaded_from_file_handle, PythonEnvRepositoryLocationHandle) named_loaded_from_module_attribute_handle = workspace.get_repository_location_handle( 'named_loaded_from_module_attribute') assert set( named_loaded_from_module_attribute_handle.repository_code_pointer_dict. keys()) == {'hello_world_repository_name'} assert isinstance(named_loaded_from_module_attribute_handle, PythonEnvRepositoryLocationHandle) named_loaded_from_file_attribute_handle = workspace.get_repository_location_handle( 'named_loaded_from_file_attribute') assert set( named_loaded_from_file_attribute_handle.repository_code_pointer_dict. keys()) == {'hello_world_repository_name'} assert isinstance(named_loaded_from_file_attribute_handle, PythonEnvRepositoryLocationHandle)
def test_safe_isfile(): assert safe_isfile(file_relative_path(__file__, "test_file_utils.py")) assert not safe_isfile(file_relative_path(__file__, "not_a_file.py"))
# pylint: disable=unused-argument, no-value-for-parameter, no-member # start_marker import os from typing import List from dagster import DynamicOut, DynamicOutput, Field, job, op from dagster.utils import file_relative_path @op( config_schema={ "path": Field(str, default_value=file_relative_path(__file__, "sample")) }, out=DynamicOut(str), ) def files_in_directory(context): path = context.op_config["path"] dirname, _, filenames = next(os.walk(path)) for file in filenames: yield DynamicOutput( value=os.path.join(dirname, file), # create a mapping key from the file name mapping_key=file.replace(".", "_").replace("-", "_"), ) @op def process_file(path: str) -> int: # simple example of calculating size
"""Compile the proto definitions into Python. This tooling should be invoked to regenerate the Python grpc artifacts by running: python -m dagster.grpc.compile """ import os import shutil import subprocess import sys from dagster.utils import file_relative_path, safe_tempfile_path PROTOS_DIR = file_relative_path(__file__, "protos") PROTOS_PATH = os.path.join(PROTOS_DIR, "api.proto") GENERATED_DIR = file_relative_path(__file__, "__generated__") GENERATED_PB2_PATH = os.path.join(GENERATED_DIR, "api_pb2.py") GENERATED_GRPC_PATH = os.path.join(GENERATED_DIR, "api_pb2_grpc.py") ISORT_SETTINGS_PATH = file_relative_path(__file__, "../../../../") GENERATED_HEADER = [ ("# @" + "generated\n"), # This is to avoid matching the phab rule "\n", "# This file was generated by running `python -m dagster.grpc.compile`\n", "# Do not edit this file directly, and do not attempt to recompile it using\n", "# grpc_tools.protoc directly, as several changes must be made to the raw output\n",
from dagster.serdes.ipc import ( interrupt_ipc_subprocess, open_ipc_subprocess, setup_interrupt_support, ) from dagster.utils import file_relative_path if __name__ == '__main__': setup_interrupt_support() ( child_opened_sentinel, parent_interrupt_sentinel, child_started_sentinel, child_interrupt_sentinel, ) = (sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) child_process = open_ipc_subprocess([ sys.executable, file_relative_path(__file__, 'subprocess_with_interrupt_support.py'), child_started_sentinel, child_interrupt_sentinel, ]) with open(child_opened_sentinel, 'w') as fd: fd.write('opened_ipc_subprocess') try: while True: time.sleep(0.1) except KeyboardInterrupt: interrupt_ipc_subprocess(child_process) with open(parent_interrupt_sentinel, 'w') as fd: fd.write('parent_received_keyboard_interrupt')
def test_import_module_from_path(): foo_module = seven.import_module_from_path( "foo_module", file_relative_path(__file__, "foo_module.py")) assert foo_module.FOO == 7
import pandas as pd from dagster import EventMetadata from dagster.core.asset_defs import build_assets_job from dagster.utils import file_relative_path from dagster_dbt import dbt_cli_resource from dagster_dbt.asset_defs import load_assets_from_dbt_manifest from dagster_pyspark import pyspark_resource from hacker_news_assets.pipelines.download_pipeline import S3_SPARK_CONF from hacker_news_assets.resources.snowflake_io_manager import ( SHARED_SNOWFLAKE_CONF, connect_snowflake, snowflake_io_manager_dev, snowflake_io_manager_prod, ) DBT_PROJECT_DIR = file_relative_path(__file__, "../../hacker_news_dbt") DBT_PROFILES_DIR = DBT_PROJECT_DIR + "/config" # We define two sets of resources, one for the prod mode, which writes to production schemas and # one for dev mode, which writes to alternate schemas PROD_RESOURCES = { "dbt": dbt_cli_resource.configured({ "profiles_dir": DBT_PROFILES_DIR, "project_dir": DBT_PROJECT_DIR, "target": "prod" }), "warehouse_io_manager": snowflake_io_manager_prod, # "parquet_io_manager": parquet_io_manager.configured({"base_path": get_system_temp_directory()}), "pyspark":
def test_start_mock_worker_config_from_yaml(worker_patch): args = ["-y", file_relative_path(__file__, "engine_config.yaml")] start_worker("dagster_test_worker", args=args) assert_called(worker_patch)
"field_six_nullable_int_list": Field([Noneable(int)], is_required=False), }, }, ) def a_solid_with_multilayered_config(_): return None return a_solid_with_multilayered_config() @pipeline(preset_defs=[ PresetDefinition.from_files( name="prod", config_files=[ file_relative_path(__file__, "../environments/csv_hello_world_prod.yaml") ], ), PresetDefinition.from_files( name="test", config_files=[ file_relative_path(__file__, "../environments/csv_hello_world_test.yaml") ], ), PresetDefinition( name="test_inline", run_config={ "solids": { "sum_solid": { "inputs": {
def test_list_command_cli(): with instance_for_test(): runner = CliRunner() result = runner.invoke( pipeline_list_command, ["-f", file_relative_path(__file__, "test_cli_commands.py"), "-a", "bar"], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, [ "-f", file_relative_path(__file__, "test_cli_commands.py"), "-a", "bar", "-d", os.path.dirname(__file__), ], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands", "-a", "bar"], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, ["-w", file_relative_path(__file__, "workspace.yaml")] ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, [ "-w", file_relative_path(__file__, "workspace.yaml"), "-w", file_relative_path(__file__, "override.yaml"), ], ) assert_correct_extra_repository_output(result) result = runner.invoke( pipeline_list_command, [ "-f", "foo.py", "-m", "dagster_tests.cli_tests.command_tests.test_cli_commands", "-a", "bar", ], ) assert result.exit_code == 2 result = runner.invoke( pipeline_list_command, ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands"], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, ["-f", file_relative_path(__file__, "test_cli_commands.py")] ) assert_correct_bar_repository_output(result)
def test_start_worker_config_from_yaml(rabbitmq): args = ["-y", file_relative_path(__file__, "engine_config.yaml")] with cleanup_worker("dagster_test_worker", args=args): start_worker("dagster_test_worker", args=args) assert check_for_worker("dagster_test_worker", args=args)
def test_interrupt_compute_log_tail_grandchild( windows_legacy_stdio_env, # pylint: disable=redefined-outer-name, unused-argument ): with ExitStack() as context_stack: ( child_opened_sentinel, parent_interrupt_sentinel, child_started_sentinel, stdout_pids_file, stderr_pids_file, child_interrupt_sentinel, ) = [ context_stack.enter_context(safe_tempfile_path()) for _ in range(6) ] parent_process = open_ipc_subprocess([ sys.executable, file_relative_path(__file__, "parent_compute_log_subprocess.py"), child_opened_sentinel, parent_interrupt_sentinel, child_started_sentinel, stdout_pids_file, stderr_pids_file, child_interrupt_sentinel, ]) wait_for_file(child_opened_sentinel) wait_for_file(child_started_sentinel) wait_for_file(stdout_pids_file) with open(stdout_pids_file, "r") as stdout_pids_fd: stdout_pids_str = stdout_pids_fd.read() assert stdout_pids_str.startswith("stdout pids:") stdout_pids = list( map( lambda x: int(x) if x != "None" else None, [x.strip("(),") for x in stdout_pids_str.split(" ")[2:]], )) wait_for_file(stderr_pids_file) with open(stderr_pids_file, "r") as stderr_pids_fd: stderr_pids_str = stderr_pids_fd.read() assert stderr_pids_str.startswith("stderr pids:") stderr_pids = list( map( lambda x: int(x) if x != "None" else None, [x.strip("(),") for x in stderr_pids_str.split(" ")[2:]], )) interrupt_ipc_subprocess(parent_process) wait_for_file(child_interrupt_sentinel) with open(child_interrupt_sentinel, "r") as fd: assert fd.read().startswith("compute_log_subprocess_interrupt") wait_for_file(parent_interrupt_sentinel) with open(parent_interrupt_sentinel, "r") as fd: assert fd.read().startswith("parent_received_keyboard_interrupt") for stdout_pid in stdout_pids: if stdout_pid is not None: wait_for_process(stdout_pid) for stderr_pid in stderr_pids: if stderr_pid is not None: wait_for_process(stderr_pid)
def test_create_app_with_workspace(): with load_workspace_from_yaml_paths( [file_relative_path(__file__, "./workspace.yaml")], ) as workspace: assert create_app_from_workspace(workspace, DagsterInstance.ephemeral())
def test_grpc_multi_location_workspace(): workspace = load_workspace_from_config( yaml.safe_load(_get_multi_location_workspace_yaml()), # fake out as if it were loaded by a yaml file in this directory file_relative_path(__file__, 'not_a_real.yaml'), UserProcessApi.GRPC, ) assert isinstance(workspace, Workspace) assert len(workspace.repository_location_handles) == 6 assert workspace.has_repository_location_handle('loaded_from_file') assert workspace.has_repository_location_handle('loaded_from_module') loaded_from_file_handle = workspace.get_repository_location_handle( 'loaded_from_file') assert isinstance(loaded_from_file_handle, ManagedGrpcPythonEnvRepositoryLocationHandle) assert loaded_from_file_handle.repository_names == { 'hello_world_repository' } loaded_from_module_handle = workspace.get_repository_location_handle( 'loaded_from_module') assert isinstance(loaded_from_module_handle, ManagedGrpcPythonEnvRepositoryLocationHandle) assert loaded_from_module_handle.repository_names == { 'hello_world_repository' } named_loaded_from_file_handle = workspace.get_repository_location_handle( 'named_loaded_from_file') assert named_loaded_from_file_handle.repository_names == { 'hello_world_repository_name' } assert isinstance(named_loaded_from_file_handle, ManagedGrpcPythonEnvRepositoryLocationHandle) named_loaded_from_module_handle = workspace.get_repository_location_handle( 'named_loaded_from_module') assert named_loaded_from_module_handle.repository_names == { 'hello_world_repository_name' } assert isinstance(named_loaded_from_file_handle, ManagedGrpcPythonEnvRepositoryLocationHandle) named_loaded_from_module_attribute_handle = workspace.get_repository_location_handle( 'named_loaded_from_module_attribute') assert named_loaded_from_module_attribute_handle.repository_names == { 'hello_world_repository_name' } assert isinstance(named_loaded_from_module_attribute_handle, ManagedGrpcPythonEnvRepositoryLocationHandle) named_loaded_from_file_attribute_handle = workspace.get_repository_location_handle( 'named_loaded_from_file_attribute') assert named_loaded_from_file_attribute_handle.repository_names == { 'hello_world_repository_name' } assert isinstance(named_loaded_from_file_attribute_handle, ManagedGrpcPythonEnvRepositoryLocationHandle)
def test_0_7_6_postgres_pre_add_pipeline_snapshot(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path( __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/postgres/pg_dump.txt" ), ], env=env, ) run_id = "d5f89349-7477-4fab-913e-0925cef0a959" with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision=None) ): execute_pipeline(noop_pipeline, instance=instance) # ensure migration is run instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert runs[0].run_id == run_id run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def test_presets(): @solid(config_schema={'error': Bool}) def can_fail(context): if context.solid_config['error']: raise Exception('I did an error') return 'cool' @lambda_solid def always_fail(): raise Exception('I always do this') pipe = PipelineDefinition( name='simple', solid_defs=[can_fail, always_fail], preset_defs=[ PresetDefinition.from_files( 'passing', config_files=[file_relative_path(__file__, 'pass_env.yaml')], solid_selection=['can_fail'], ), PresetDefinition.from_files( 'passing_overide_to_fail', config_files=[file_relative_path(__file__, 'pass_env.yaml')], solid_selection=['can_fail'], ).with_additional_config({'solids': {'can_fail': {'config': {'error': True}}}}), PresetDefinition( 'passing_direct_dict', run_config={'solids': {'can_fail': {'config': {'error': False}}}}, solid_selection=['can_fail'], ), PresetDefinition.from_files( 'failing_1', config_files=[file_relative_path(__file__, 'fail_env.yaml')], solid_selection=['can_fail'], ), PresetDefinition.from_files( 'failing_2', config_files=[file_relative_path(__file__, 'pass_env.yaml')] ), PresetDefinition('subset', solid_selection=['can_fail'],), ], ) with pytest.raises(DagsterInvariantViolationError): PresetDefinition.from_files( 'invalid_1', config_files=[file_relative_path(__file__, 'not_a_file.yaml')] ) with pytest.raises(DagsterInvariantViolationError): PresetDefinition.from_files( 'invalid_2', config_files=[file_relative_path(__file__, 'test_repository_definition.py')], ) assert execute_pipeline(pipe, preset='passing').success assert execute_pipeline(pipe, preset='passing_direct_dict').success assert execute_pipeline(pipe, preset='failing_1', raise_on_error=False).success == False assert execute_pipeline(pipe, preset='failing_2', raise_on_error=False).success == False with pytest.raises(DagsterInvariantViolationError, match='Could not find preset'): execute_pipeline(pipe, preset='not_failing', raise_on_error=False) assert ( execute_pipeline(pipe, preset='passing_overide_to_fail', raise_on_error=False).success == False ) assert execute_pipeline( pipe, preset='passing', run_config={'solids': {'can_fail': {'config': {'error': False}}}}, ).success with pytest.raises( check.CheckError, match=re.escape( 'The environment set in preset \'passing\' does not agree with the environment passed ' 'in the `run_config` argument.' ), ): execute_pipeline( pipe, preset='passing', run_config={'solids': {'can_fail': {'config': {'error': True}}}}, ) assert execute_pipeline( pipe, preset='subset', run_config={'solids': {'can_fail': {'config': {'error': False}}}}, ).success
def test_0_7_6_postgres_pre_event_log_migration(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path( __file__, "snapshot_0_7_6_pre_event_log_migration/postgres/pg_dump.txt" ), ], env=env, ) run_id = "ca7f1e33-526d-4f75-9bc5-3e98da41ab97" with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Runs will appear in DB, but event logs need migration runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) # Make sure the schema is migrated instance.upgrade() assert isinstance(instance._event_storage, SqlEventLogStorage) events_by_id = instance._event_storage.get_logs_for_run_by_log_id(run_id) assert len(events_by_id) == 40 step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data(run_id, record_id) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) == 0 # run the event_log data migration migrate_event_log_data(instance=instance) step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data(run_id, record_id) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) > 0
def test_basic_execute_plan_with_materialization(): with get_temp_file_name() as out_csv_path: environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, '../data/num.csv') }, 'outputs': [{ 'result': out_csv_path }], } } } instance = DagsterInstance.ephemeral() result = execute_dagster_graphql( define_context(instance=instance), EXECUTION_PLAN_QUERY, variables={ 'pipeline': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'mode': 'default', }, ) steps_data = result.data['executionPlan']['steps'] assert [step_data['key'] for step_data in steps_data] == [ 'sum_solid.compute', 'sum_sq_solid.compute', ] run_id = str(uuid.uuid4()) instance.create_empty_run(run_id, 'csv_hello_world') result = execute_dagster_graphql( define_context(instance=instance), EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'], 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert result.data step_mat_event = None for message in result.data['executePlan']['stepEvents']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event['materialization'] assert len(step_mat_event['materialization']['metadataEntries']) == 1 metadata_entry = step_mat_event['materialization']['metadataEntries'][ 0] assert metadata_entry['path'] == out_csv_path
def get_hello_world_path(): return file_relative_path(__file__, "hello_world_repository.py")
svd.fit(user_story_matrix.matrix) total_explained_variance = svd.explained_variance_ratio_.sum() yield Output( svd, metadata={ "Total explained variance ratio": total_explained_variance, "Number of components": n_components, }, ) model_perf_notebook = define_dagstermill_solid( "recommender_model_perf", notebook_path=file_relative_path( __file__, "../notebooks/recommender_model_perf.ipynb"), input_defs=[ InputDefinition(dagster_type=TruncatedSVD, name="recommender_model") ], output_notebook_name="perf_notebook", ) @op( ins={ "story_titles": In( root_manager_key="warehouse_loader", metadata={ "table": "hackernews.stories", "columns": ["id", "title"],
datasource_name="getest", suite_name="basic.warning" ) # end_ge_demo_marker_1 @pipeline( mode_defs=[ModeDefinition("basic", resource_defs={"ge_data_context": ge_data_context})], preset_defs=[ PresetDefinition( "sample_preset_success", mode="basic", run_config={ "resources": { "ge_data_context": { "config": { "ge_root_dir": file_relative_path(__file__, "./great_expectations") } } }, "solids": { "read_in_datafile": { "inputs": { "csv_path": {"value": file_relative_path(__file__, "./succeed.csv")} } } }, }, ), PresetDefinition( "sample_preset_fail", mode="basic",
def emit_num_csv_local_file(_): return LocalFileHandle(file_relative_path(__file__, "../num.csv"))
def get_test_project_recon_pipeline(pipeline_name): return ReOriginatedReconstructablePipelineForTest( ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", ).get_reconstructable_pipeline(pipeline_name))
def emit_num_special_csv_local_file(_): return LocalFileHandle( file_relative_path(__file__, "../num_with_special_chars.csv"))
def test_double_repository(): loadable_repos = loadable_targets_from_python_file( file_relative_path(__file__, "double_repository.py"), ) assert set([lr.target_definition.name for lr in loadable_repos]) == {"repo_one", "repo_two"}
def test_list_command(): runner = CliRunner() execute_list_command( { 'repository_yaml': None, 'python_file': file_relative_path(__file__, 'test_cli_commands.py'), 'module_name': None, 'fn_name': 'define_bar_repo', }, no_print, ) result = runner.invoke( pipeline_list_command, [ '-f', file_relative_path(__file__, 'test_cli_commands.py'), '-n', 'define_bar_repo' ], ) assert result.exit_code == 0 assert result.output == ('Repository bar\n' '**************\n' 'Pipeline: baz\n' 'Description:\n' 'Not much tbh\n' 'Solids: (Execution Order)\n' ' do_input\n' '*************\n' 'Pipeline: foo\n' 'Solids: (Execution Order)\n' ' do_something\n' ' do_input\n') execute_list_command( { 'repository_yaml': None, 'python_file': None, 'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': 'define_repo', }, no_print, ) result = runner.invoke( pipeline_list_command, ['-m', 'dagster_examples.intro_tutorial.repos', '-n', 'define_repo']) assert result.exit_code == 0 assert result.output == ('Repository hello_cereal_repository\n' '**********************************\n' 'Pipeline: complex_pipeline\n' 'Solids: (Execution Order)\n' ' load_cereals\n' ' sort_by_calories\n' ' sort_by_protein\n' ' display_results\n' '*******************************\n' 'Pipeline: hello_cereal_pipeline\n' 'Solids: (Execution Order)\n' ' hello_cereal\n') execute_list_command( { 'repository_yaml': file_relative_path(__file__, 'repository_module.yaml'), 'python_file': None, 'module_name': None, 'fn_name': None, }, no_print, ) result = runner.invoke( pipeline_list_command, ['-y', file_relative_path(__file__, 'repository_module.yaml')]) assert result.exit_code == 0 assert result.output == ('Repository hello_cereal_repository\n' '**********************************\n' 'Pipeline: complex_pipeline\n' 'Solids: (Execution Order)\n' ' load_cereals\n' ' sort_by_calories\n' ' sort_by_protein\n' ' display_results\n' '*******************************\n' 'Pipeline: hello_cereal_pipeline\n' 'Solids: (Execution Order)\n' ' hello_cereal\n') with pytest.raises(UsageError): execute_list_command( { 'repository_yaml': None, 'python_file': 'foo.py', 'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': 'define_repo', }, no_print, ) result = runner.invoke( pipeline_list_command, [ '-f', 'foo.py', '-m', 'dagster_examples.intro_tutorial.repos', '-n', 'define_repo' ], ) assert result.exit_code == 2 with pytest.raises(UsageError): execute_list_command( { 'repository_yaml': None, 'python_file': None, 'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': None, }, no_print, ) result = runner.invoke(pipeline_list_command, ['-m', 'dagster_examples.intro_tutorial.repos']) assert result.exit_code == 2 with pytest.raises(UsageError): execute_list_command( { 'repository_yaml': None, 'python_file': file_relative_path(__file__, 'test_cli_commands.py'), 'module_name': None, 'fn_name': None, }, no_print, ) result = runner.invoke( pipeline_list_command, ['-f', file_relative_path(__file__, 'test_cli_commands.py')]) assert result.exit_code == 2
def test_script_relative_path_file_relative_path_equiv(): assert file_relative_path(__file__, "foo") == file_relative_path(__file__, "foo")
def test_tags_pipeline(): runner = CliRunner() with mocked_instance() as instance: result = runner.invoke( pipeline_execute_command, [ '-y', file_relative_path(__file__, 'repository_module.yaml'), '--tags', '{ "foo": "bar" }', 'hello_cereal_pipeline', ], ) assert result.exit_code == 0 runs = instance.get_runs() assert len(runs) == 1 run = runs[0] assert len(run.tags) == 1 assert run.tags.get('foo') == 'bar' with mocked_instance() as instance: result = runner.invoke( pipeline_execute_command, [ '-y', file_relative_path(__file__, '../repository.yaml'), '-p', 'add', '--tags', '{ "foo": "bar" }', 'multi_mode_with_resources', # pipeline name ], ) assert result.exit_code == 0 runs = instance.get_runs() assert len(runs) == 1 run = runs[0] assert len(run.tags) == 1 assert run.tags.get('foo') == 'bar' with mocked_instance() as instance: result = runner.invoke( pipeline_backfill_command, [ '-y', file_relative_path(__file__, 'repository_file.yaml'), '--noprompt', '--partition-set', 'baz_partitions', '--partitions', 'c', '--tags', '{ "foo": "bar" }', 'baz', ], ) assert result.exit_code == 0 runs = instance.run_launcher.queue() assert len(runs) == 1 run = runs[0] assert len(run.tags) >= 1 assert run.tags.get('foo') == 'bar'
def test_load_with_empty_working_directory(capfd): port = find_free_port() # File that will fail if working directory isn't set to default python_file = file_relative_path(__file__, "grpc_repo_with_local_import.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] with new_cwd(os.path.dirname(__file__)): process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: process.terminate() # indicating the working directory is empty fails port = find_free_port() subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--empty-working-directory", ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: with pytest.raises(Exception): wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) process.wait() _, err = capfd.readouterr() assert "No module named" in err finally: if process.poll() is None: process.terminate()
def test_workspace(): with load_workspace_from_yaml_paths( [file_relative_path(__file__, "../workspace.yaml")]) as workspace: assert len(workspace.repository_location_handles) == 2