def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'failing_pipeline') environment_dict = { 'solids': {'sum_solid': {'inputs': {'num': file_relative_path(__file__, 'data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE assert instance.all_logs(run_id)
def test_roundtrip_run(): run = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', selector=ExecutionSelector('pipey_mcpipeface'), step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, previous_run_id='previousID', ) for field in run: # ensure we have a test value to round trip for each field assert field exec_params = execution_params_from_pipeline_run(run) assert run == pipeline_run_from_execution_params(exec_params) exec_params_gql = execution_params_from_graphql( exec_params.to_graphql_input()) assert exec_params_gql == exec_params assert run == pipeline_run_from_execution_params(exec_params_gql) empty_run = PipelineRun.create_empty_run('foo', 'bar') exec_params = execution_params_from_pipeline_run(empty_run) assert empty_run == pipeline_run_from_execution_params(exec_params) exec_params_gql = execution_params_from_graphql( exec_params.to_graphql_input()) assert exec_params_gql == exec_params assert empty_run == pipeline_run_from_execution_params(exec_params_gql)
def initialize_step_context(scratch_dir): pipeline_run = PipelineRun( pipeline_name='foo_pipeline', run_id=str(uuid.uuid4()), environment_dict=make_environment_dict(scratch_dir, 'external'), mode='external', ) plan = create_execution_plan(reconstructable(define_basic_pipeline), pipeline_run.environment_dict, mode='external') initialization_manager = pipeline_initialization_manager( plan, pipeline_run.environment_dict, pipeline_run, DagsterInstance.ephemeral(), ) for _ in initialization_manager.generate_setup_events(): pass pipeline_context = initialization_manager.get_object() active_execution = plan.start(retries=Retries(RetryMode.DISABLED)) step = active_execution.get_next_step() step_context = pipeline_context.for_step(step) return step_context
def test_valid_job_format(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=["dagster-graphql"], args=[ "-p", "executeRunInProcess", "-v", seven.json.dumps({"runId": run.run_id}), ], job_name=job_name, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources="", ).strip())
def create_test_pipeline_execution_context(logger_defs=None): loggers = check.opt_dict_param(logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition) mode_def = ModeDefinition(logger_defs=loggers) pipeline_def = PipelineDefinition(name="test_legacy_context", solid_defs=[], mode_defs=[mode_def]) run_config = {"loggers": {key: {} for key in loggers}} pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config) creation_data = create_context_creation_data( InMemoryPipeline(pipeline_def), execution_plan, run_config, pipeline_run, instance) log_manager = create_log_manager(creation_data) scoped_resources_builder = ScopedResourcesBuilder() executor = create_executor(creation_data) return PlanExecutionContext( plan_data=create_plan_data(creation_data, True, executor.retries), execution_data=create_execution_data( context_creation_data=creation_data, scoped_resources_builder=scoped_resources_builder, ), log_manager=log_manager, output_capture=None, )
def test_valid_job_format(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_graphql_k8s_job( run_launcher.job_config, args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, pod_name=pod_name, component='runmaster', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources='', ).strip())
def test_valid_job_format(run_launcher): docker_image = get_test_project_docker_image() run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.get_static_job_config(), args=["dagster", "api", "execute_run"], job_name=job_name, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources="", env_from=ENV_FROM, ).strip())
def create_test_pipeline_execution_context(logger_defs=None): from dagster.core.storage.intermediate_storage import build_in_mem_intermediates_storage loggers = check.opt_dict_param( logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition ) mode_def = ModeDefinition(logger_defs=loggers) pipeline_def = PipelineDefinition( name="test_legacy_context", solid_defs=[], mode_defs=[mode_def] ) run_config = {"loggers": {key: {} for key in loggers}} pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config) creation_data = create_context_creation_data(execution_plan, run_config, pipeline_run, instance) log_manager = create_log_manager(creation_data) scoped_resources_builder = ScopedResourcesBuilder() executor = create_executor(creation_data) return SystemPipelineExecutionContext( construct_execution_context_data( context_creation_data=creation_data, scoped_resources_builder=scoped_resources_builder, intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id), log_manager=log_manager, retries=executor.retries, raise_on_error=True, ), executor=executor, log_manager=log_manager, )
def create_test_pipeline_execution_context(logger_defs=None): loggers = check.opt_dict_param( logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition ) mode_def = ModeDefinition(logger_defs=loggers) pipeline_def = PipelineDefinition( name='test_legacy_context', solid_defs=[], mode_defs=[mode_def] ) environment_dict = {'loggers': {key: {} for key in loggers}} pipeline_run = PipelineRun( pipeline_name='test_legacy_context', environment_dict=environment_dict ) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline=pipeline_def, environment_dict=environment_dict) creation_data = create_context_creation_data( execution_plan, environment_dict, pipeline_run, instance ) log_manager = create_log_manager(creation_data) scoped_resources_builder = ScopedResourcesBuilder() executor_config = create_executor_config(creation_data) return construct_pipeline_execution_context( context_creation_data=creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=SystemStorageData( intermediates_manager=InMemoryIntermediatesManager(), file_manager=LocalFileManager.for_instance(instance, pipeline_run.run_id), ), log_manager=log_manager, executor_config=executor_config, raise_on_error=True, )
def initialize_step_context(scratch_dir, instance): pipeline_run = PipelineRun( pipeline_name="foo_pipeline", run_id=str(uuid.uuid4()), run_config=make_run_config(scratch_dir, "external"), mode="external", ) recon_pipeline = reconstructable(define_basic_pipeline) plan = create_execution_plan(recon_pipeline, pipeline_run.run_config, mode="external") initialization_manager = PipelineExecutionContextManager( recon_pipeline, plan, pipeline_run.run_config, pipeline_run, instance, ) for _ in initialization_manager.prepare_context(): pass pipeline_context = initialization_manager.get_context() step_context = pipeline_context.for_step( plan.get_step_by_key("return_two")) return step_context
def python_callable(ts, dag_run, **kwargs): # pylint: disable=unused-argument run_id = dag_run.run_id # TODO: https://github.com/dagster-io/dagster/issues/1342 redacted = construct_variables(mode, 'REDACTED', pipeline_name, run_id, ts, step_keys) logging.info('Executing GraphQL query: {query}\n'.format( query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(redacted, indent=2)) instance = DagsterInstance.from_ref( instance_ref) if instance_ref else None if instance: instance.get_or_create_run( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=ExecutionSelector(pipeline_name), reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.MANAGED, )) events = execute_execute_plan_mutation( handle, construct_variables(mode, environment_dict, pipeline_name, run_id, ts, step_keys), instance_ref=instance_ref, ) check_events_for_skips(events) return events
def create_run_with_snapshot(self, create_run_args): check.inst_param(create_run_args, 'create_run_args', InstanceCreateRunArgs) from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id snapshot_id = create_pipeline_snapshot_id( create_run_args.pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot(snapshot_id): returned_snapshot_id = self._run_storage.add_pipeline_snapshot( create_run_args.pipeline_snapshot) check.invariant(snapshot_id == returned_snapshot_id) return self.create_run( PipelineRun( pipeline_name=create_run_args.pipeline_snapshot.name, pipeline_snapshot_id=snapshot_id, run_id=create_run_args.run_id, environment_dict=create_run_args.environment_dict, mode=create_run_args.mode, selector=create_run_args.selector, step_keys_to_execute=create_run_args.step_keys_to_execute, status=create_run_args.status, tags=create_run_args.tags, parent_run_id=create_run_args.parent_run_id, root_run_id=create_run_args.root_run_id, ))
def execute_partition_set(partition_set, partition_filter, instance=None): check.inst_param(partition_set, 'partition_set', PartitionSetDefinition) check.callable_param(partition_filter, 'partition_filter') check.inst_param(instance, 'instance', DagsterInstance) candidate_partitions = partition_set.get_partitions() partitions = partition_filter(candidate_partitions) instance = instance or DagsterInstance.ephemeral() for partition in partitions: run = PipelineRun( pipeline_name=partition_set.pipeline_name, run_id=make_new_run_id(), selector=ExecutionSelector(partition_set.pipeline_name), environment_dict=partition_set.environment_dict_for_partition( partition), mode='default', tags=merge_dicts({'dagster/backfill': 'custom'}, partition_set.tags_for_partition(partition)), status=PipelineRunStatus.NOT_STARTED, ) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) instance.run_launcher.launch_run(run)
def test_single_write_read_with_snapshot(self, storage): run_with_snapshot_id = "lkasjdflkjasdf" pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) run_with_snapshot = PipelineRun( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id=pipeline_snapshot_id, ) assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id assert serialize_pp(storage.get_pipeline_snapshot(pipeline_snapshot_id)) == serialize_pp( pipeline_snapshot ) storage.add_run(run_with_snapshot) assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_run(run_with_snapshot_id)
def test_execution_plan_wrong_run_id(): pipeline_def = define_addy_pipeline() unrun_id = make_new_run_id() new_run_id = make_new_run_id() environment_dict = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}) pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=new_run_id, environment_dict=environment_dict, mode='default', previous_run_id=unrun_id, ) execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, run_config=pipeline_run ) with pytest.raises(DagsterRunNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral(), ) assert str( exc_info.value ) == 'Run id {} set as previous run id was not found in instance'.format(unrun_id) assert exc_info.value.invalid_run_id == unrun_id
def initialize_step_context(scratch_dir): pipeline_run = PipelineRun( pipeline_name='foo_pipeline', run_id=str(uuid.uuid4()), run_config=make_run_config(scratch_dir, 'external'), mode='external', ) plan = create_execution_plan(reconstructable(define_basic_pipeline), pipeline_run.run_config, mode='external') initialization_manager = PipelineExecutionContextManager( plan, pipeline_run.run_config, pipeline_run, DagsterInstance.ephemeral(), ) for _ in initialization_manager.prepare_context(): pass pipeline_context = initialization_manager.get_context() active_execution = plan.start(retries=Retries(RetryMode.DISABLED)) step = active_execution.get_next_step() step_context = pipeline_context.for_step(step) return step_context
def pipeline_launch_command(env, preset_name, mode, **kwargs): env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str)) pipeline = create_pipeline_from_cli_args(kwargs) instance = DagsterInstance.get() if preset_name: if env: raise click.UsageError('Can not use --preset with --env.') if mode: raise click.UsageError('Can not use --preset with --mode.') preset = pipeline.get_preset(preset_name) else: preset = None run_tags = get_tags_from_args(kwargs) run = PipelineRun( pipeline_name=pipeline.name, run_id=make_new_run_id(), selector=ExecutionSelector(pipeline.name, preset.solid_subset if preset else None), environment_dict=preset.environment_dict if preset else load_yaml_from_glob_list(env), mode=(preset.mode if preset else mode) or 'default', status=PipelineRunStatus.NOT_STARTED, tags=run_tags, ) return instance.launch_run(run)
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'crashy_pipeline') environment_dict = { 'solids': {'sum_solid': {'inputs': {'num': file_relative_path(__file__, 'data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=crashy_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE last_log = instance.all_logs(run_id)[-1] assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format( run_id=run_id ) )
def _construct_run_with_snapshots( self, pipeline_name, run_id, run_config, mode, solids_to_execute, step_keys_to_execute, status, tags, root_run_id, parent_run_id, pipeline_snapshot, execution_plan_snapshot, parent_pipeline_snapshot, solid_selection=None, ): # https://github.com/dagster-io/dagster/issues/2403 if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc().isoformat() check.invariant( not (not pipeline_snapshot and execution_plan_snapshot), "It is illegal to have an execution plan snapshot and not have a pipeline snapshot. " "It is possible to have no execution plan snapshot since we persist runs " "that do not successfully compile execution plans in the scheduled case.", ) pipeline_snapshot_id = ( self._ensure_persisted_pipeline_snapshot(pipeline_snapshot, parent_pipeline_snapshot) if pipeline_snapshot else None ) execution_plan_snapshot_id = ( self._ensure_persisted_execution_plan_snapshot( execution_plan_snapshot, pipeline_snapshot_id, step_keys_to_execute ) if execution_plan_snapshot and pipeline_snapshot_id else None ) return PipelineRun( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=mode, solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot_id=pipeline_snapshot_id, execution_plan_snapshot_id=execution_plan_snapshot_id, )
def get_should_launch_run(): return PipelineRun( run_id=str(uuid.uuid4()), status=PipelineRunStatus.SUCCESS, mode="prod", pipeline_name="download_pipeline", run_config={"resources": DEFAULT_PARTITION_RESOURCE_CONFIG}, )
def test_roundtrip_run(): run_with_snapshot = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', selector=ExecutionSelector('pipey_mcpipeface'), step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, root_run_id='previousID', parent_run_id='previousID', pipeline_snapshot_id='pipey_mcpipeface_snapshot_id', execution_plan_snapshot_id='mcexecutionplanface_snapshot_id', ) for field in run_with_snapshot: # ensure we have a test value to round trip for each field assert field # The invariant that all the execution parameter structs # pipeline run can be constructed from each other is no longer # true. Clients of the GraphQL API cannot know the value of the # pipeline_snapshot_id prior to execution, because it is # constructed on the server. Hence these roundtrip tests # do not include snapshot_id run = run_with_snapshot._replace(pipeline_snapshot_id=None, execution_plan_snapshot_id=None) exec_params = execution_params_from_pipeline_run(run) for key, value in pipeline_run_args_from_execution_params(exec_params).items(): assert getattr(run, key) == value exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input()) assert exec_params_gql == exec_params for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items(): assert getattr(run, key) == value empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default') exec_params = execution_params_from_pipeline_run(empty_run) for key, value in pipeline_run_args_from_execution_params(exec_params).items(): assert getattr(empty_run, key) == value exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input()) assert exec_params_gql == exec_params for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items(): assert getattr(empty_run, key) == value
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() old_run_id = str(uuid.uuid4()) environment_dict = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id), instance=instance, ) assert result.success store = build_fs_intermediate_store(instance.intermediates_directory, result.run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 ## re-execute add_two new_run_id = str(uuid.uuid4()) pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=new_run_id, environment_dict=environment_dict, mode='default', previous_run_id=result.run_id, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict, run_config=pipeline_run) step_events = execute_plan( execution_plan.build_subset_plan(['add_two.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, ) store = build_fs_intermediate_store(instance.intermediates_directory, new_run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute')
def test_write_conflicting_run_id(self, storage): double_run_id = "double_run_id" pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) run = PipelineRun(run_id=double_run_id, pipeline_name=pipeline_def.name) assert storage.add_run(run) with pytest.raises(DagsterRunAlreadyExists): storage.add_run(run)
def test_queued_pipeline_origin_check(): fake_pipeline_origin = ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(ModuleCodePointer("fake", "fake"))), "foo_repo", ), "foo", ) PipelineRun(status=PipelineRunStatus.QUEUED, external_pipeline_origin=fake_pipeline_origin) with pytest.raises(check.CheckError): PipelineRun(status=PipelineRunStatus.QUEUED) with pytest.raises(check.CheckError): PipelineRun().with_status(PipelineRunStatus.QUEUED)
def test_roundtrip_run(): run_with_snapshot = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', solid_subset=['solid_1'], step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, root_run_id='previousID', parent_run_id='previousID', pipeline_snapshot_id='pipey_mcpipeface_snapshot_id', execution_plan_snapshot_id='mcexecutionplanface_snapshot_id', ) for field in run_with_snapshot: # ensure we have a test value to round trip for each field assert field # The invariant that all the execution parameter structs # pipeline run can be constructed from each other is no longer # true. Clients of the GraphQL API cannot know the value of the # pipeline_snapshot_id prior to execution, because it is # constructed on the server. Hence these roundtrip tests # do not include snapshot_id run = run_with_snapshot._replace(pipeline_snapshot_id=None, execution_plan_snapshot_id=None) context = define_context_for_file(__file__, 'pipey_mcpipeface', DagsterInstance.ephemeral()) exec_params = execution_params_from_pipeline_run(context, run) exec_params_gql = execution_params_from_graphql( context, exec_params.to_graphql_input()) assert exec_params_gql == exec_params empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default') exec_params = execution_params_from_pipeline_run(context, empty_run) exec_params_gql = execution_params_from_graphql( context, exec_params.to_graphql_input()) assert exec_params_gql == exec_params
def test_execute_plan_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) instance = DagsterInstance.local_temp() pipeline = PipelineDefinition( name='basic_resource_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ 'a': resource_a, 'b': resource_b }, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) environment_dict = {'loggers': {'callback': {}}} pipeline_run = instance.create_run( PipelineRun( pipeline_name=pipeline.name, run_id=make_new_run_id(), environment_dict={'loggers': { 'callback': {} }}, mode='default', status=PipelineRunStatus.NOT_STARTED, )) execution_plan = create_execution_plan(pipeline, environment_dict) iterator = execute_plan_iterator(execution_plan, pipeline_run, instance, environment_dict=environment_dict) event_type = None while event_type != 'STEP_START': event = next(iterator) event_type = event.event_type_value iterator.close() messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len([message for message in messages if message == 'CLEANING A']) > 0 assert len([message for message in messages if message == 'CLEANING B']) > 0
def start_pipeline_execution(graphene_info, execution_params, reexecution_config): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) check.opt_inst_param(reexecution_config, 'reexecution_config', ReexecutionConfig) instance = graphene_info.context.instance dauphin_pipeline = get_dauphin_pipeline_from_selector_or_raise( graphene_info, execution_params.selector ) get_validated_config( graphene_info, dauphin_pipeline, environment_dict=execution_params.environment_dict, mode=execution_params.mode, ) execution_plan = create_execution_plan( dauphin_pipeline.get_dagster_pipeline(), execution_params.environment_dict, run_config=RunConfig(mode=execution_params.mode), ) _check_start_pipeline_execution_errors( graphene_info, execution_params, execution_plan, reexecution_config ) run = instance.create_run( PipelineRun( pipeline_name=dauphin_pipeline.get_dagster_pipeline().name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), selector=execution_params.selector, environment_dict=execution_params.environment_dict, mode=execution_params.mode, reexecution_config=reexecution_config, step_keys_to_execute=execution_params.step_keys, tags=execution_params.execution_metadata.tags, status=PipelineRunStatus.NOT_STARTED, ) ) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.get_handle(), dauphin_pipeline.get_dagster_pipeline(), run, instance=instance, ) return graphene_info.schema.type_named('StartPipelineExecutionSuccess')( run=graphene_info.schema.type_named('PipelineRun')(run) )
def test_no_runs_for_different_mode(): with tempfile.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) instance.add_run( PipelineRun(status=PipelineRunStatus.SUCCESS, mode="xyz", pipeline_name="download_pipeline")) run_requests = list( dbt_on_hn_download_finished( build_sensor_context(instance=instance))) assert len(run_requests) == 0
def test_valid_job_format_with_backcompat_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, }) }) user_defined_k8s_config = get_user_defined_k8s_config(tags) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=["dagster-graphql"], args=[ "-p", "executeRunInProcess", "-v", seven.json.dumps({"runId": run.run_id}), ], job_name=job_name, user_defined_k8s_config=user_defined_k8s_config, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=""" resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi""", ).strip())
def test_valid_job_format_with_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ 'requests': { 'cpu': '250m', 'memory': '64Mi' }, 'limits': { 'cpu': '500m', 'memory': '2560Mi' }, }) }) resources = get_k8s_resource_requirements(tags) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, resources=resources, pod_name=pod_name, component='runmaster', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=''' resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi''', ).strip())