def test_yaml_file(): module = importlib.import_module('dagster_examples.intro_tutorial.repos') handle = handle_for_pipeline_cli_args( { 'module_name': None, 'pipeline_name': 'foobar', 'python_file': None, 'fn_name': None, 'repository_yaml': script_relative_path('repository_module.yaml'), } ) assert handle.mode == _ExecutionTargetMode.PIPELINE expected = LoaderEntrypoint(module, 'dagster_examples.intro_tutorial.repos', 'define_repo') assert handle.entrypoint.module == expected.module assert handle.entrypoint.module_name == expected.module_name assert handle.entrypoint.fn_name == expected.fn_name assert handle.entrypoint.from_handle == handle assert handle.data.pipeline_name == 'foobar' with pytest.raises(UsageError): assert handle_for_pipeline_cli_args({'module_name': 'kdjfdk', 'pipeline_name': 'foobar'}) with pytest.raises(UsageError): assert handle_for_pipeline_cli_args({'fn_name': 'kjdfkd', 'pipeline_name': 'foobar'}) with pytest.raises(UsageError): assert handle_for_pipeline_cli_args({'pipeline_name': 'foobar', 'python_file': 'kjdfkdj'})
def test_repository_python_file(): python_file = script_relative_path('bar_repo.py') module = imp.load_source('bar_repo', python_file) handle = handle_for_pipeline_cli_args( {'pipeline_name': 'foo', 'python_file': python_file, 'fn_name': 'define_bar_repo'} ) assert handle.mode == _ExecutionTargetMode.PIPELINE assert handle.entrypoint == LoaderEntrypoint(module, 'bar_repo', 'define_bar_repo', handle) assert handle.data.pipeline_name == 'foo' assert handle.entrypoint.from_handle == handle with pytest.raises(UsageError): handle_for_pipeline_cli_args( { 'module_name': 'kdjfkd', 'pipeline_name': 'foo', 'python_file': script_relative_path('bar_repo.py'), 'fn_name': 'define_bar_repo', 'repository_yaml': None, } ) with pytest.raises(UsageError): handle_for_pipeline_cli_args( { 'module_name': None, 'pipeline_name': 'foo', 'python_file': script_relative_path('bar_repo.py'), 'fn_name': 'define_bar_repo', 'repository_yaml': 'kjdfkdjf', } )
def test_in_pipeline_manager_solid_config(): with in_pipeline_manager() as manager: assert manager.context.solid_config is None with in_pipeline_manager( solid_handle=SolidHandle('hello_world_config', 'hello_world_config', None), handle=handle_for_pipeline_cli_args( { 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_config_pipeline', } ), ) as manager: assert manager.context.solid_config == {'greeting': 'hello'} with in_pipeline_manager( solid_handle=SolidHandle('hello_world_config', 'hello_world_config', None), environment_dict={'solids': {'hello_world_config': {'config': {'greeting': 'bonjour'}}}}, handle=handle_for_pipeline_cli_args( { 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_config_pipeline', } ), ) as manager: assert manager.context.solid_config == {'greeting': 'bonjour'}
def execute_execute_command_with_preset(preset_name, cli_args, _mode): pipeline = handle_for_pipeline_cli_args(cli_args).build_pipeline_definition() cli_args.pop('pipeline_name') return execute_pipeline_with_preset( pipeline, preset_name, instance=DagsterInstance.get(), raise_on_error=False )
def in_pipeline_manager(pipeline_name='hello_world_pipeline', solid_handle=SolidHandle('hello_world', 'hello_world', None), **kwargs): manager = Manager() run_id = str(uuid.uuid4()) marshal_dir = tempfile.mkdtemp() handle = handle_for_pipeline_cli_args({ 'pipeline_name': pipeline_name, 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_pipeline', }) try: with safe_tempfile_path() as output_log_file_path: context_dict = { 'run_config': RunConfig(run_id=run_id, mode='default'), 'solid_handle': solid_handle, 'handle': handle, 'marshal_dir': marshal_dir, 'environment_dict': {}, 'output_log_path': output_log_file_path, } manager.reconstitute_pipeline_context( **dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def construct_environment_yaml(preset_name, env, pipeline_name, module_name): # Load environment dict from either a preset or yaml file globs if preset_name: if env: raise click.UsageError('Can not use --preset with --env.') cli_args = { 'fn_name': pipeline_name, 'pipeline_name': pipeline_name, 'module_name': module_name, } pipeline = handle_for_pipeline_cli_args( cli_args).build_pipeline_definition() environment_dict = pipeline.get_preset(preset_name).environment_dict else: env = list(env) environment_dict = load_yaml_from_glob_list(env) if env else {} # If not provided by the user, ensure we have storage location defined if 'storage' not in environment_dict: system_tmp_path = seven.get_system_temp_directory() dagster_tmp_path = os.path.join(system_tmp_path, 'dagster-airflow', pipeline_name) environment_dict['storage'] = { 'filesystem': { 'config': { 'base_dir': six.ensure_str(dagster_tmp_path) } } } return environment_dict
def test_papermill_pandas_hello_world_pipeline(): handle = handle_for_pipeline_cli_args({ 'module_name': 'dagster_pandas.examples', 'fn_name': 'papermill_pandas_hello_world_pipeline', }) pipeline = handle.build_pipeline_definition() pipeline_result = execute_pipeline( pipeline, { 'solids': { 'papermill_pandas_hello_world': { 'inputs': { 'df': { 'csv': { 'path': file_relative_path(__file__, 'num_prod.csv') } } } } } }, instance=DagsterInstance.local_temp(), ) assert pipeline_result.success solid_result = pipeline_result.result_for_solid( 'papermill_pandas_hello_world') expected = pd.read_csv(file_relative_path(__file__, 'num_prod.csv')) + 1 assert solid_result.output_value().equals(expected)
def test_event_callback_logging(): events = defaultdict(list) def _event_callback(record): assert isinstance(record, EventRecord) if record.is_dagster_event: events[record.dagster_event.event_type].append(record) handle = handle_for_pipeline_cli_args({ 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_logging_pipeline', }) run_config = RunConfig() instance = DagsterInstance.local_temp() instance.watch_event_logs(run_config.run_id, -1, _event_callback) execute_pipeline(handle.build_pipeline_definition(), run_config=run_config, instance=instance) passed_before_timeout = False retries = 5 while retries > 0: time.sleep(0.333) if DagsterEventType.PIPELINE_FAILURE in events.keys(): break if DagsterEventType.PIPELINE_SUCCESS in events.keys(): passed_before_timeout = True break retries -= 1 assert passed_before_timeout
def execute_execute_command_with_preset(preset, cli_args, _mode): pipeline = handle_for_pipeline_cli_args( cli_args).build_pipeline_definition() cli_args.pop('pipeline_name') kwargs = pipeline.get_preset(preset) return execute_pipeline(**kwargs)
def test_logging(): handle = handle_for_pipeline_cli_args({ 'python_file': script_relative_path('./test_logging.py'), 'fn_name': 'define_hello_logging_pipeline', }) pipeline_def = handle.build_pipeline_definition() with safe_tempfile_path() as test_file_path: with safe_tempfile_path() as critical_file_path: execute_pipeline( pipeline_def, { 'loggers': { 'test': { 'config': { 'name': 'test', 'file_path': test_file_path, 'log_level': 'DEBUG', } }, 'critical': { 'config': { 'name': 'critical', 'file_path': critical_file_path, 'log_level': 'CRITICAL', } }, } }, instance=DagsterInstance.local_temp(), ) with open(test_file_path, 'r') as test_file: records = [ json.loads(line) for line in test_file.read().strip('\n').split('\n') if line ] with open(critical_file_path, 'r') as critical_file: critical_records = [ json.loads(line) for line in critical_file.read().strip('\n').split('\n') if line ] messages = [x['dagster_meta']['orig_message'] for x in records] assert 'Hello, there!' in messages critical_messages = [ x['dagster_meta']['orig_message'] for x in critical_records ] assert 'Hello, there!' not in critical_messages
def test_load_from_pipeline_file(): pipeline = handle_for_pipeline_cli_args({ 'fn_name': 'define_foo_pipeline', 'python_file': __file__ }).build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'foo'
def test_load_from_pipeline_file(): handle = handle_for_pipeline_cli_args( {'fn_name': 'define_foo_pipeline', 'python_file': __file__} ) pipeline = handle.build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'foo' assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def test_loader_from_default_repository_file_yaml(): handle = handle_for_pipeline_cli_args( {'pipeline_name': 'foo', 'repository_yaml': script_relative_path('repository_file.yaml')} ) pipeline = handle.build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'foo' assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def test_load_from_repository_module(): pipeline = handle_for_pipeline_cli_args({ 'module_name': 'dagster_examples.intro_tutorial.repos', 'pipeline_name': 'repo_demo_pipeline', 'fn_name': 'define_repo', }).build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'repo_demo_pipeline'
def pipeline_snapshot_command(solid_subset, **kwargs): handle = handle_for_pipeline_cli_args(kwargs) definition = handle.build_pipeline_definition() if solid_subset: definition = definition.build_sub_pipeline(solid_subset.split(",")) active_data = active_pipeline_data_from_def(definition) click.echo(serialize_dagster_namedtuple(active_data))
def test_load_from_pipeline_module(): handle = handle_for_pipeline_cli_args( {'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': 'hello_cereal_pipeline'} ) pipeline = handle.build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'hello_cereal_pipeline' assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def test_load_from_repository_file(): pipeline = handle_for_pipeline_cli_args({ 'pipeline_name': 'foo', 'python_file': __file__, 'fn_name': 'define_bar_repo' }).build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'foo'
def test_logging(): handle = handle_for_pipeline_cli_args({ 'python_file': script_relative_path('./test_logging.py'), 'fn_name': 'define_hello_logging_pipeline', }) pipeline_def = handle.build_pipeline_definition() with tempfile.NamedTemporaryFile() as test_file: with tempfile.NamedTemporaryFile() as critical_file: execute_pipeline( pipeline_def, { 'loggers': { 'test': { 'config': { 'name': 'test', 'file_path': test_file.name, 'log_level': 'DEBUG', } }, 'critical': { 'config': { 'name': 'critical', 'file_path': critical_file.name, 'log_level': 'CRITICAL', } }, } }, ) test_file.seek(0) critical_file.seek(0) records = [ json.loads(line) for line in test_file.read().decode( 'utf-8').strip('\n').split('\n') if line ] critical_records = [ json.loads(line) for line in critical_file.read().decode( 'utf-8').strip('\n').split('\n') if line ] messages = [x['dagster_meta']['orig_message'] for x in records] assert 'Hello, there!' in messages critical_messages = [ x['dagster_meta']['orig_message'] for x in critical_records ] assert 'Hello, there!' not in critical_messages
def execute_execute_command_with_preset(preset, raise_on_error, cli_args, _mode): pipeline = handle_for_pipeline_cli_args( cli_args).build_pipeline_definition() cli_args.pop('pipeline_name') kwargs = pipeline.get_preset(preset) kwargs['run_config'] = kwargs['run_config'].with_executor_config( InProcessExecutorConfig(raise_on_error=raise_on_error)) return execute_pipeline(**kwargs)
def test_loader_from_default_repository_module_yaml(): pipeline = handle_for_pipeline_cli_args({ 'pipeline_name': 'repo_demo_pipeline', 'repository_yaml': script_relative_path('repository_module.yaml'), }).build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'repo_demo_pipeline'
def test_pipeline_module(): handle = handle_for_pipeline_cli_args({ 'module_name': 'dagster', 'fn_name': 'define_pipeline', 'pipeline_name': None, 'python_file': None, 'repository_yaml': None, }) assert handle.mode == _ExecutionTargetMode.PIPELINE assert handle.entrypoint == LoaderEntrypoint( importlib.import_module('dagster'), 'dagster', 'define_pipeline')
def test_loader_from_default_repository_module_yaml(): handle = handle_for_pipeline_cli_args({ 'pipeline_name': 'hello_cereal_pipeline', 'repository_yaml': file_relative_path(__file__, 'repository_module.yaml'), }) pipeline = handle.build_pipeline_definition() assert isinstance(pipeline, PipelineDefinition) assert pipeline.name == 'hello_cereal_pipeline' assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def get_pipeline_snapshot_from_cli_args(cli_args): _cli_load_invariant(cli_args.get('pipeline_name') is not None) if cli_args.get('image'): _cli_load_invariant( is_module_available('docker'), msg='--image is not supported without dagster[docker] or the Python package docker installed.', ) active_repo_data = get_active_repository_data_from_image(cli_args.get('image')) return active_repo_data.get_pipeline_snapshot(cli_args.get('pipeline_name')[0]) else: pipeline_definition = handle_for_pipeline_cli_args(cli_args).build_pipeline_definition() return PipelineSnapshot.from_pipeline_def(pipeline_definition)
def execute_execute_command_with_preset(preset_name, cli_args, _mode): pipeline = handle_for_pipeline_cli_args( cli_args).build_pipeline_definition() cli_args.pop('pipeline_name') tags = get_tags_from_args(cli_args) return execute_pipeline( pipeline, preset=preset_name, instance=DagsterInstance.get(), raise_on_error=False, run_config=RunConfig(tags=tags), )
def test_repository_module(): module = importlib.import_module('dagster') handle = handle_for_pipeline_cli_args({ 'module_name': 'dagster', 'pipeline_name': 'foo', 'python_file': None, 'fn_name': 'define_bar_repo', 'repository_yaml': None, }) assert handle.mode == _ExecutionTargetMode.REPOSITORY assert handle.entrypoint == LoaderEntrypoint(module, 'dagster', 'define_bar_repo') assert handle.data.pipeline_name == 'foo'
def test_pipeline_python_file(): python_file = script_relative_path('foo_pipeline.py') module = imp.load_source('foo_pipeline', python_file) handle = handle_for_pipeline_cli_args({ 'module_name': None, 'fn_name': 'define_pipeline', 'pipeline_name': None, 'python_file': python_file, 'repository_yaml': None, }) assert handle.mode == _ExecutionTargetMode.PIPELINE assert handle.entrypoint == LoaderEntrypoint(module, 'foo_pipeline', 'define_pipeline')
def execute_execute_command_with_preset(preset, raise_on_error, cli_args, mode): pipeline = handle_for_pipeline_cli_args( cli_args).build_pipeline_definition() cli_args.pop('pipeline_name') repository = handle_for_repo_cli_args( cli_args).build_repository_definition() kwargs = repository.get_preset_pipeline(pipeline.name, preset) return execute_pipeline(run_config=RunConfig( mode=mode, executor_config=InProcessExecutorConfig( raise_on_error=raise_on_error)), **kwargs)
def exec_for_test(fn_name, env=None, **kwargs): result = None handle = handle_for_pipeline_cli_args( {'module_name': 'dagstermill.examples.repository', 'fn_name': fn_name} ) pipeline = handle.build_pipeline_definition() try: result = execute_pipeline(pipeline, env, **kwargs) yield result finally: if result: cleanup_result_notebook(result)
def test_yield_unserializable_result(): manager = Manager() assert manager.yield_result(threading.Lock()) with in_pipeline_manager( solid_handle=SolidHandle('hello_world_output', 'hello_world_output', None), handle=handle_for_pipeline_cli_args( { 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_with_output_pipeline', } ), ) as manager: with pytest.raises(TypeError): manager.yield_result(threading.Lock())
def test_pipeline_python_file(): python_file = file_relative_path(__file__, 'foo_pipeline.py') module = import_module_from_path('foo_pipeline', python_file) handle = handle_for_pipeline_cli_args({ 'module_name': None, 'fn_name': 'define_pipeline', 'pipeline_name': None, 'python_file': python_file, 'repository_yaml': None, }) assert handle.mode == _ExecutionTargetMode.PIPELINE expected = LoaderEntrypoint(module, 'foo_pipeline', 'define_pipeline') assert handle.entrypoint.module == expected.module assert handle.entrypoint.module_name == expected.module_name assert handle.entrypoint.fn_name == expected.fn_name assert handle.entrypoint.from_handle == handle