def _create_sensor_run(instance, repo_location, external_sensor, external_pipeline, run_request): external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_request.run_config, external_sensor.mode, step_keys_to_execute=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts( merge_dicts(pipeline_tags, run_request.tags), PipelineRun.tags_for_sensor(external_sensor), ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key return instance.create_run( pipeline_name=external_sensor.pipeline_name, run_id=None, run_config=run_request.run_config, mode=external_sensor.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, solid_selection=external_sensor.solid_selection, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), )
def _get_or_create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request): if not run_request.run_key: return _create_sensor_run(instance, repo_location, external_sensor, external_pipeline, run_request) existing_runs = instance.get_runs( PipelineRunsFilter(tags=merge_dicts( PipelineRun.tags_for_sensor(external_sensor), {RUN_KEY_TAG: run_request.run_key}, ))) if len(existing_runs): run = existing_runs[0] if run.status != PipelineRunStatus.NOT_STARTED: # A run already exists and was launched for this time period, # but the scheduler must have crashed before the tick could be put # into a SUCCESS state return SkippedSensorRun(run_key=run_request.run_key, existing_run=run) else: context.logger.info( f"Run {run.run_id} already created with the run key " f"`{run_request.run_key}` for {external_sensor.name}") return run context.logger.info(f"Creating new run for {external_sensor.name}") return _create_sensor_run(instance, repo_location, external_sensor, external_pipeline, run_request)
def test_queue_from_schedule_and_sensor(instance, foo_example_workspace, foo_example_repo): external_schedule = foo_example_repo.get_external_schedule("always_run_schedule") external_sensor = foo_example_repo.get_external_sensor("always_on_sensor") external_pipeline = foo_example_repo.get_full_external_pipeline("foo_pipeline") instance.start_schedule_and_update_storage_state(external_schedule) instance.start_sensor(external_sensor) with start_daemon(timeout=180): run = create_run(instance, external_pipeline) instance.submit_run(run.run_id, foo_example_workspace) runs = [ poll_for_finished_run(instance, run.run_id), poll_for_finished_run(instance, run_tags=PipelineRun.tags_for_sensor(external_sensor)), poll_for_finished_run( instance, run_tags=PipelineRun.tags_for_schedule(external_schedule), timeout=90, ), ] for run in runs: logs = instance.all_logs(run.run_id) assert_events_in_order( logs, [ "PIPELINE_ENQUEUED", "PIPELINE_DEQUEUED", "PIPELINE_STARTING", "PIPELINE_START", "PIPELINE_SUCCESS", ], )
def _create_sensor_run( instance, repo_location, external_sensor, external_pipeline, run_request, target_data ): from dagster.daemon.daemon import get_telemetry_daemon_session_id external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_request.run_config, target_data.mode, step_keys_to_execute=None, known_state=None, instance=instance, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts( merge_dicts(pipeline_tags, run_request.tags), PipelineRun.tags_for_sensor(external_sensor), ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key log_action( instance, SENSOR_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "SENSOR_NAME_HASH": hash_name(external_sensor.name), "pipeline_name_hash": hash_name(external_pipeline.name), "repo_hash": hash_name(repo_location.name), }, ) return instance.create_run( pipeline_name=target_data.pipeline_name, run_id=None, run_config=run_request.run_config, mode=target_data.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, solid_selection=target_data.solid_selection, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def test_queue_from_schedule_and_sensor(tmpdir, foo_example_repo): dagster_home_path = tmpdir.strpath with setup_instance( dagster_home_path, """run_coordinator: module: dagster.core.run_coordinator class: QueuedRunCoordinator config: dequeue_interval_seconds: 1 """, ) as instance: external_schedule = foo_example_repo.get_external_schedule( "never_run_schedule") external_sensor = foo_example_repo.get_external_sensor( "never_on_sensor") foo_pipeline_handle = PipelineHandle("foo_pipeline", foo_example_repo.handle) instance.start_schedule_and_update_storage_state(external_schedule) instance.start_sensor(external_sensor) with start_daemon(timeout=180): run = create_run(instance, foo_pipeline_handle) with external_pipeline_from_run(run) as external_pipeline: instance.submit_run(run.run_id, external_pipeline) runs = [ poll_for_finished_run(instance, run.run_id), poll_for_finished_run( instance, run_tags=PipelineRun.tags_for_sensor(external_sensor)), poll_for_finished_run( instance, run_tags=PipelineRun.tags_for_schedule( external_schedule), timeout=90, ), ] for run in runs: logs = instance.all_logs(run.run_id) assert_events_in_order( logs, [ "PIPELINE_ENQUEUED", "PIPELINE_DEQUEUED", "PIPELINE_STARTING", "PIPELINE_START", "PIPELINE_SUCCESS", ], )
def _fetch_existing_runs(instance, external_sensor, run_requests): run_keys = [ run_request.run_key for run_request in run_requests if run_request.run_key ] if not run_keys: return {} existing_runs = {} if instance.supports_bucket_queries: runs = instance.get_runs( filters=RunsFilter( tags=PipelineRun.tags_for_sensor(external_sensor), ), bucket_by=TagBucket( tag_key=RUN_KEY_TAG, bucket_limit=1, tag_values=run_keys, ), ) for run in runs: tags = run.tags or {} run_key = tags.get(RUN_KEY_TAG) existing_runs[run_key] = run return existing_runs else: for run_key in run_keys: runs = instance.get_runs( filters=RunsFilter(tags=merge_dicts( PipelineRun.tags_for_sensor(external_sensor), {RUN_KEY_TAG: run_key}, )), limit=1, ) if runs: existing_runs[run_key] = runs[0] return existing_runs
def _create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request): execution_plan_errors = [] execution_plan_snapshot = None try: external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_request.run_config, external_sensor.mode, step_keys_to_execute=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot except DagsterSubprocessError as e: execution_plan_errors.extend(e.subprocess_error_infos) except Exception as e: # pylint: disable=broad-except execution_plan_errors.append( serializable_error_info_from_exc_info(sys.exc_info())) pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts( merge_dicts(pipeline_tags, run_request.tags), PipelineRun.tags_for_sensor(external_sensor), ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key run = instance.create_run( pipeline_name=external_sensor.pipeline_name, run_id=None, run_config=run_request.run_config, mode=external_sensor.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_sensor.solid_selection, status=(PipelineRunStatus.FAILURE if len(execution_plan_errors) > 0 else PipelineRunStatus.NOT_STARTED), root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), ) if len(execution_plan_errors) > 0: for error in execution_plan_errors: instance.report_engine_event( error.message, run, EngineEventData.engine_error(error), ) instance.report_run_failed(run) context.logger.error( "Failed to fetch execution plan for {sensor_name}: {error_string}". format( sensor_name=external_sensor.name, error_string="\n".join( [error.to_string() for error in execution_plan_errors]), ), ) return run