def test_one_task_dag(): dag = DAG( dag_id="dag", default_args=default_args, schedule_interval=None, ) dummy_operator = DummyOperator( task_id="dummy_operator", dag=dag, ) pipeline_def = make_dagster_pipeline_from_airflow_dag( dag=dag, tags={ AIRFLOW_EXECUTION_DATE_STR: get_current_datetime_in_utc().isoformat() }, ) result = execute_pipeline(pipeline_def) assert result.success
def test_skip(external_repo_context, capfd): with central_timezone(): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "skip_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state( external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: skip_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for skip_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - INFO - should_execute returned False for skip_schedule, skipping """)
def test_bad_env_fn(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "bad_env_fn_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of run_config_fn for " "schedule bad_env_fn_schedule", ) captured = capfd.readouterr() assert "Failed to fetch schedule data for bad_env_fn_schedule: " in captured.out assert ("Error occurred during the execution of run_config_fn for " "schedule bad_env_fn_schedule" in captured.out)
def test_include_execution_time_grpc(): repository_handle = get_bar_repo_handle() execution_time = get_current_datetime_in_utc() with instance_for_test() as instance: execution_data = sync_get_external_schedule_execution_data_ephemeral_grpc( instance, repository_handle, "foo_schedule_echo_time", ScheduleExecutionDataMode.LAUNCH_SCHEDULED_EXECUTION, execution_time, ) assert isinstance(execution_data, ExternalScheduleExecutionData) assert execution_data.run_config == { "passed_in_time": execution_time.isoformat() } assert execution_data.tags == { "dagster/schedule_name": "foo_schedule_echo_time" } assert execution_data.should_execute == True
def start_schedule_and_update_storage_state(self, instance, external_schedule): """ Updates the status of the given schedule to `JobStatus.RUNNING` in schedule storage, then calls `start_schedule`. This should not be overridden by subclasses. Args: instance (DagsterInstance): The current instance. external_schedule (ExternalSchedule): The schedule to start """ check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_schedule, "external_schedule", ExternalSchedule) schedule_state = instance.get_job_state( external_schedule.get_external_origin_id()) if not schedule_state: schedule_state = self._create_new_schedule_state( instance, external_schedule) if schedule_state.status == JobStatus.RUNNING: raise DagsterSchedulerError( "You have attempted to start schedule {name}, but it is already running" .format(name=external_schedule.name)) self.start_schedule(instance, external_schedule) started_schedule = schedule_state.with_status( JobStatus.RUNNING).with_data( ScheduleJobData( external_schedule.cron_schedule, get_current_datetime_in_utc().timestamp(), scheduler=self.__class__.__name__, )) instance.update_job_state(started_schedule) return started_schedule
def test_update_schedule(self, storage): assert storage schedule = self.build_schedule("my_schedule", "* * * * *") storage.add_instigator_state(schedule) now_time = get_current_datetime_in_utc().timestamp() new_schedule = schedule.with_status(InstigatorStatus.RUNNING).with_data( ScheduleInstigatorData( cron_schedule=schedule.instigator_data.cron_schedule, start_timestamp=now_time, ) ) storage.update_instigator_state(new_schedule) schedules = storage.all_instigator_state( self.fake_repo_target().get_id(), InstigatorType.SCHEDULE ) assert len(schedules) == 1 schedule = schedules[0] assert schedule.instigator_name == "my_schedule" assert schedule.status == InstigatorStatus.RUNNING assert schedule.instigator_data.start_timestamp == now_time stopped_schedule = schedule.with_status(InstigatorStatus.STOPPED).with_data( ScheduleInstigatorData(schedule.instigator_data.cron_schedule) ) storage.update_instigator_state(stopped_schedule) schedules = storage.all_instigator_state( self.fake_repo_target().get_id(), InstigatorType.SCHEDULE ) assert len(schedules) == 1 schedule = schedules[0] assert schedule.instigator_name == "my_schedule" assert schedule.status == InstigatorStatus.STOPPED assert schedule.instigator_data.start_timestamp == None
def test_update_schedule(self, storage): assert storage schedule = self.build_schedule("my_schedule", "* * * * *") storage.add_job_state(schedule) now_time = get_current_datetime_in_utc().timestamp() new_schedule = schedule.with_status(JobStatus.RUNNING).with_data( ScheduleJobData( cron_schedule=schedule.job_specific_data.cron_schedule, start_timestamp=now_time, scheduler=FAKE_SCHEDULER_NAME, )) storage.update_job_state(new_schedule) schedules = storage.all_stored_job_state( self.fake_repo_target().get_id(), JobType.SCHEDULE) assert len(schedules) == 1 schedule = schedules[0] assert schedule.job_name == "my_schedule" assert schedule.status == JobStatus.RUNNING assert schedule.job_specific_data.start_timestamp == now_time assert schedule.job_specific_data.scheduler == FAKE_SCHEDULER_NAME stopped_schedule = schedule.with_status(JobStatus.STOPPED).with_data( ScheduleJobData(schedule.job_specific_data.cron_schedule, scheduler=FAKE_SCHEDULER_NAME)) storage.update_job_state(stopped_schedule) schedules = storage.all_stored_job_state( self.fake_repo_target().get_id(), JobType.SCHEDULE) assert len(schedules) == 1 schedule = schedules[0] assert schedule.job_name == "my_schedule" assert schedule.status == JobStatus.STOPPED assert schedule.job_specific_data.start_timestamp == None assert schedule.job_specific_data.scheduler == FAKE_SCHEDULER_NAME
def test_re_init(restore_cron_tab): # pylint:disable=unused-argument,redefined-outer-name with TemporaryDirectory() as tempdir: instance = define_scheduler_instance(tempdir) with get_test_external_repo() as external_repo: now = get_current_datetime_in_utc() # Start schedule schedule_state = instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule( "no_config_pipeline_every_min_schedule")) assert schedule_state.start_timestamp == get_timestamp_from_utc_datetime( now) # Check schedules are saved to disk assert "schedules" in os.listdir(tempdir) schedule_states = instance.all_stored_schedule_state() for state in schedule_states: if state.name == "no_config_pipeline_every_min_schedule": assert state == schedule_state
def get_sensor_next_tick(graphene_info, sensor_state): from ..schema.instigation import GrapheneFutureInstigationTick check.inst_param(graphene_info, "graphene_info", ResolveInfo) check.inst_param(sensor_state, "sensor_state", InstigatorState) repository_origin = sensor_state.origin.external_repository_origin if not graphene_info.context.has_repository_location( repository_origin.repository_location_origin.location_name): return None repository_location = graphene_info.context.get_repository_location( repository_origin.repository_location_origin.location_name) if not repository_location.has_repository( repository_origin.repository_name): return None repository = repository_location.get_repository( repository_origin.repository_name) if not repository.has_external_sensor(sensor_state.name): return None external_sensor = repository.get_external_sensor(sensor_state.name) if not sensor_state.is_running: return None ticks = graphene_info.context.instance.get_ticks( sensor_state.instigator_origin_id, limit=1) if not ticks: return None latest_tick = ticks[0] next_timestamp = latest_tick.timestamp + external_sensor.min_interval_seconds if next_timestamp < get_timestamp_from_utc_datetime( get_current_datetime_in_utc()): return None return GrapheneFutureInstigationTick(sensor_state, next_timestamp)
def test_long_name(): dag_name = "dag-with.dot-dash-lo00ong" * 10 dag = DAG(dag_id=dag_name, default_args=default_args, schedule_interval=None,) long_name = "task-with.dot-dash2-loong" * 10 # 250 characters, Airflow's max allowed length dummy_operator = DummyOperator(task_id=long_name, dag=dag,) pipeline_def = make_dagster_pipeline_from_airflow_dag( dag=dag, tags={AIRFLOW_EXECUTION_DATE_STR: get_current_datetime_in_utc().isoformat()}, ) result = execute_pipeline(pipeline_def) assert result.success assert ( result.pipeline_def.name == "airflow_dag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ongdag_with_dot_dash_lo00ong" ) assert len(result.pipeline_def.solids) == 1 assert ( result.pipeline_def.solids[0].name == "airflow_task_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loongtask_with_dot_dash2_loong" )
def get_schedule_next_tick(graphene_info, schedule_state): if schedule_state.status != JobStatus.RUNNING: return None repository_origin = schedule_state.origin.external_repository_origin if not graphene_info.context.has_repository_location( repository_origin.repository_location_origin.location_name ): return None repository_location = graphene_info.context.get_repository_location( repository_origin.repository_location_origin.location_name ) if not repository_location.has_repository(repository_origin.repository_name): return None repository = repository_location.get_repository(repository_origin.repository_name) external_schedule = repository.get_external_job(schedule_state.name) time_iter = external_schedule.execution_time_iterator( get_timestamp_from_utc_datetime(get_current_datetime_in_utc()) ) next_timestamp = next(time_iter).timestamp() return graphene_info.schema.type_named("FutureJobTick")(schedule_state, next_timestamp)
def test_normalize_name(): dag = DAG( dag_id="dag-with.dot-dash", default_args=default_args, schedule_interval=None, ) dummy_operator = DummyOperator( task_id="task-with.dot-dash", dag=dag, ) pipeline_def = make_dagster_pipeline_from_airflow_dag( dag=dag, tags={ AIRFLOW_EXECUTION_DATE_STR: get_current_datetime_in_utc().isoformat() }, ) result = execute_pipeline(pipeline_def) assert result.success assert result.pipeline_def.name == "airflow_dag_with_dot_dash" assert len(result.pipeline_def.solids) == 1 assert result.pipeline_def.solids[0].name == "airflow_task_with_dot_dash"
def test_reconcile_schedule_without_start_time(): with TemporaryDirectory() as tempdir: instance = define_scheduler_instance(tempdir) external_repo = get_test_external_repo() external_schedule = external_repo.get_external_schedule( "no_config_pipeline_daily_schedule") legacy_schedule_state = ScheduleState( external_schedule.get_origin(), ScheduleStatus.RUNNING, external_schedule.cron_schedule, None, ) instance.add_schedule_state(legacy_schedule_state) instance.reconcile_scheduler_state(external_repository=external_repo) reconciled_schedule_state = instance.get_schedule_state( external_schedule.get_origin_id()) assert reconciled_schedule_state.status == ScheduleStatus.RUNNING assert reconciled_schedule_state.start_timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc())
def test_template_task_dag(): dag = DAG( dag_id="dag", default_args=default_args, schedule_interval=None, ) t1 = BashOperator( task_id="print_hello", bash_command="echo hello dagsir", dag=dag, ) t2 = BashOperator( task_id="sleep", bash_command="sleep 2", dag=dag, ) templated_command = """ {% for i in range(5) %} echo '{{ ds }}' echo '{{ macros.ds_add(ds, 7)}}' echo '{{ params.my_param }}' {% endfor %} """ t3 = BashOperator( task_id="templated", depends_on_past=False, bash_command=templated_command, params={"my_param": "Parameter I passed in"}, dag=dag, ) # pylint: disable=pointless-statement t1 >> [t2, t3] instance = DagsterInstance.local_temp() manager = instance.compute_log_manager execution_date = get_current_datetime_in_utc() execution_date_add_one_week = execution_date + datetime.timedelta(days=7) execution_date_iso = execution_date.strftime("%Y-%m-%d") execution_date_add_one_week_iso = execution_date_add_one_week.strftime( "%Y-%m-%d") result = execute_pipeline( make_dagster_pipeline_from_airflow_dag( dag=dag, tags={AIRFLOW_EXECUTION_DATE_STR: execution_date_iso}), instance=instance, ) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert compute_steps == [ "airflow_print_hello.compute", "airflow_sleep.compute", "airflow_templated.compute", ] for step_key in compute_steps: compute_io_path = manager.get_local_path(result.run_id, step_key, ComputeIOType.STDOUT) assert os.path.exists(compute_io_path) stdout_file = open(compute_io_path, "r") file_contents = normalize_file_content(stdout_file.read()) stdout_file.close() if step_key == "airflow_print_hello.compute": assert file_contents.count( "INFO - Running command: echo hello dagsir\n") == 1 assert file_contents.count( "INFO - Command exited with return code 0") == 1 elif step_key == "airflow_sleep.compute": assert file_contents.count( "INFO - Running command: sleep 2\n") == 1 assert file_contents.count("INFO - Output:\n") == 1 assert file_contents.count( "INFO - Command exited with return code 0") == 1 elif step_key == "airflow_templated.compute": assert (file_contents.count( "INFO - Running command: \n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n \n".format( execution_date_iso=execution_date_iso, execution_date_add_one_week_iso= execution_date_add_one_week_iso, )) == 1) assert (file_contents.count("INFO - {execution_date_iso}\n".format( execution_date_iso=execution_date_iso)) == 5) assert (file_contents.count( "INFO - {execution_date_add_one_week_iso}\n".format( execution_date_add_one_week_iso= execution_date_add_one_week_iso)) == 5) assert file_contents.count("INFO - Parameter I passed in\n") == 5 assert file_contents.count( "INFO - Command exited with return code 0") == 1
def test_failure_recovery_before_run_created(external_repo_context, crash_location, crash_signal, capfd): with central_timezone(): # Verify that if the scheduler crashes or is interrupted before a run is created, # it will create exactly one tick/run when it is re-launched with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state( external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 """) ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.STARTED assert instance.get_runs_count() == 0 frozen_datetime.tick(delta=timedelta(minutes=5)) scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), None ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:05:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:05:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:05:00 - dagster-scheduler - INFO - Resuming previously interrupted schedule execution 2019-02-26 18:05:00 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id))
def _construct_run_with_snapshots( self, pipeline_name=None, run_id=None, environment_dict=None, mode=None, solid_subset=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, ): if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc( ).isoformat() pipeline_run = PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, solid_subset=solid_subset, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, ) if pipeline_snapshot is not None: from dagster.core.snap import create_pipeline_snapshot_id pipeline_snapshot_id = create_pipeline_snapshot_id( pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot_id): returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( pipeline_snapshot) check.invariant( pipeline_snapshot_id == returned_pipeline_snapshot_id) pipeline_run = pipeline_run.with_pipeline_snapshot_id( pipeline_snapshot_id) if execution_plan_snapshot is not None: from dagster.core.snap import create_execution_plan_snapshot_id check.invariant(execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) pipeline_run = pipeline_run.with_execution_plan_snapshot_id( execution_plan_snapshot_id) return pipeline_run
def test_simple_schedule(external_repo_context): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(external_repo_context) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # launch_scheduled_runs does nothing before the first tick launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # Move forward in time so we're past a tick frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = datetime(year=2019, month=2, day=28, tzinfo=get_utc_timezone()) validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], expected_datetime, "2019-02-27") # Verify idempotence launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Verify advancing in time but not going past a tick doesn't add any new runs frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Traveling two more days in the future before running results in two new ticks frozen_datetime.tick(delta=timedelta(days=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 assert len([tick for tick in ticks if tick.status == ScheduleTickStatus.SUCCESS]) == 3 runs_by_partition = {run.tags[PARTITION_NAME_TAG]: run for run in instance.get_runs()} assert "2019-02-28" in runs_by_partition assert "2019-03-01" in runs_by_partition # Check idempotence again launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
from airflow.models.dag import DAG from airflow.operators.bash_operator import BashOperator from airflow.utils.dates import days_ago from dagster import DagsterEventType, execute_pipeline from dagster.core.instance import AIRFLOW_EXECUTION_DATE_STR from dagster.core.storage.compute_log_manager import ComputeIOType from dagster.core.test_utils import instance_for_test from dagster.seven import get_current_datetime_in_utc from dagster_airflow.dagster_pipeline_factory import make_dagster_pipeline_from_airflow_dag default_args = { "owner": "dagster", "start_date": days_ago(1), } EXECUTION_DATE = get_current_datetime_in_utc() EXECUTION_DATE_MINUS_WEEK = EXECUTION_DATE - datetime.timedelta(days=7) EXECUTION_DATE_FMT = EXECUTION_DATE.strftime("%Y-%m-%d") EXECUTION_DATE_MINUS_WEEK_FMT = EXECUTION_DATE_MINUS_WEEK.strftime("%Y-%m-%d") def normalize_file_content(s): return "\n".join( [line for line in s.replace(os.linesep, "\n").split("\n") if line]) def check_compute_logs(manager, result, execution_date_fmt): assert result.success compute_steps = [
def _construct_run_with_snapshots( self, pipeline_name, run_id, environment_dict, mode, solid_subset, step_keys_to_execute, status, tags, root_run_id, parent_run_id, pipeline_snapshot, execution_plan_snapshot, parent_pipeline_snapshot, ): # https://github.com/dagster-io/dagster/issues/2403 if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc( ).isoformat() pipeline_run = PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, solid_subset=solid_subset, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, ) if pipeline_snapshot is not None: from dagster.core.snap import create_pipeline_snapshot_id if pipeline_snapshot.lineage_snapshot: if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot.lineage_snapshot.parent_snapshot_id): check.invariant( create_pipeline_snapshot_id( parent_pipeline_snapshot) == pipeline_snapshot.lineage_snapshot.parent_snapshot_id, 'Parent pipeline snapshot id out of sync with passed parent pipeline snapshot', ) returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( parent_pipeline_snapshot) check.invariant( pipeline_snapshot.lineage_snapshot.parent_snapshot_id == returned_pipeline_snapshot_id) pipeline_snapshot_id = create_pipeline_snapshot_id( pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot_id): returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( pipeline_snapshot) check.invariant( pipeline_snapshot_id == returned_pipeline_snapshot_id) pipeline_run = pipeline_run.with_pipeline_snapshot_id( pipeline_snapshot_id) if execution_plan_snapshot is not None: from dagster.core.snap import create_execution_plan_snapshot_id check.invariant(execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id) check.invariant( set(step_keys_to_execute) == set( execution_plan_snapshot.step_keys_to_execute) if step_keys_to_execute else set( execution_plan_snapshot.step_keys_to_execute) == set( [step.key for step in execution_plan_snapshot.steps]), 'We encode step_keys_to_execute twice in our stack, unfortunately. This check ' 'ensures that they are consistent. We check that step_keys_to_execute in the plan ' 'matches the step_keys_to_execute params if it is set. If it is not, this indicates ' 'a full execution plan, and so we verify that.', ) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) pipeline_run = pipeline_run.with_execution_plan_snapshot_id( execution_plan_snapshot_id) return pipeline_run
def test_failure_recovery_after_run_created(external_repo_context, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted after a run is created, # it will just re-launch the already-created run when it runs again with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.STARTED assert instance.get_runs_count() == 1 if crash_location == "RUN_CREATED": run = instance.get_runs()[0] # Run was created, but hasn't launched yet assert run.tags[ SCHEDULED_EXECUTION_TIME_TAG] == initial_datetime.isoformat( ) assert run.tags[PARTITION_NAME_TAG] == "2019-02-26" assert run.status == PipelineRunStatus.NOT_STARTED else: # The run was created and launched - running again should do nothing other than # moving the tick to success state. # The fact that we need to add this line indicates that there is still a theoretical # possible race condition - if the scheduler fails after launching a run # and then runs again between when the run was launched and when its status is changed to STARTED by the executor, we could # end up launching the same run twice. Run queueing or some other way to immediately # identify that a run was launched would help eliminate this race condition. For now, # eliminate the possibility by waiting for the run to start before running the # scheduler again. wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") assert run.status in [ PipelineRunStatus.STARTED, PipelineRunStatus.SUCCESS ] frozen_datetime.tick(delta=timedelta(minutes=5)) # Running again just launches the existing run and marks the tick as success scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), get_current_datetime_in_utc(), None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, )
def test_max_catchup_runs(capfd): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with central_timezone(): with instance_with_schedules(grpc_repo) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state( external_schedule) # Day is now March 4 at 11:59PM frozen_datetime.tick(delta=timedelta(days=5)) launch_scheduled_runs( instance, get_default_scheduler_logger(), get_current_datetime_in_utc(), max_catchup_runs=2, ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 first_datetime = datetime(year=2019, month=3, day=4, tzinfo=get_utc_timezone()) wait_for_all_runs_to_start(instance) validate_tick( ticks[0], external_schedule, first_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) validate_run_started(instance.get_runs()[0], first_datetime, "2019-03-03") second_datetime = datetime(year=2019, month=3, day=3, tzinfo=get_utc_timezone()) validate_tick( ticks[1], external_schedule, second_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[1].run_id, ) validate_run_started(instance.get_runs()[1], second_datetime, "2019-03-02") captured = capfd.readouterr() assert ( captured.out == """2019-03-04 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - WARNING - simple_schedule has fallen behind, only launching 2 runs 2019-03-04 17:59:59 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-03 00:00:00+0000, 2019-03-04 00:00:00+0000 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, ))
def test_failure_recovery_after_tick_success(external_repo_context, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted after a run is created, # it will just re-launch the already-created run when it runs again with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 # As above there's a possible race condition here if the scheduler crashes # and launches the same run twice if we crash right after the launch and re-run # before the run actually starts wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 if crash_signal == signal.SIGKILL: validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.STARTED, None, ) else: validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) frozen_datetime.tick(delta=timedelta(minutes=5)) # Running again just marks the tick as success since the run has already started scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), get_current_datetime_in_utc(), None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, )
def test_multiple_schedules_on_different_time_ranges(external_repo_context, capfd): with central_timezone(): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") external_hourly_schedule = external_repo.get_external_schedule( "simple_hourly_schedule") initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state( external_schedule) instance.start_schedule_and_update_storage_state( external_hourly_schedule) frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks( external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 1 assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_hourly_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) frozen_datetime.tick(delta=timedelta(hours=1)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks( external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 2 assert (len([ tick for tick in hourly_ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 2) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 19:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule 2019-02-27 19:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 01:00:00+0000 2019-02-27 19:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {third_run_id} for simple_hourly_schedule 2019-02-27 19:00:01 - dagster-scheduler - INFO - No new runs for simple_schedule """.format(third_run_id=instance.get_runs()[0].run_id))
def test_bad_load(capfd): with schedule_instance() as instance: working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file( __file__, "doesnt_exist", working_directory) schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist") fake_origin = schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", get_timestamp_from_utc_datetime(get_current_datetime_in_utc()), ) instance.add_schedule_state(schedule_state) frozen_datetime.tick(delta=timedelta(seconds=1)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc()) assert "doesnt_exist not found at module scope in file" in ticks[ 0].error.message captured = capfd.readouterr() assert "Error launching scheduled run" in captured.out assert "doesnt_exist not found at module scope" in captured.out frozen_datetime.tick(delta=timedelta(days=1)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 2 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc()) assert "doesnt_exist not found at module scope in file" in ticks[ 0].error.message captured = capfd.readouterr() assert "Error launching scheduled run" in captured.out assert "doesnt_exist not found at module scope" in captured.out
def get_or_create_run( self, pipeline_name=None, run_id=None, environment_dict=None, mode=None, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, ): if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc( ).isoformat() pipeline_run = PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=selector, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, ) if pipeline_snapshot is not None: from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id pipeline_snapshot_id = create_pipeline_snapshot_id( pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot_id): returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( pipeline_snapshot) check.invariant( pipeline_snapshot_id == returned_pipeline_snapshot_id) pipeline_run = pipeline_run.with_pipeline_snapshot_id( pipeline_snapshot_id) if execution_plan_snapshot is not None: from dagster.core.snap.execution_plan_snapshot import create_execution_plan_snapshot_id check.invariant(execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) pipeline_run = pipeline_run.with_execution_plan_snapshot_id( execution_plan_snapshot_id) if self.has_run(pipeline_run.run_id): candidate_run = self.get_run_by_id(pipeline_run.run_id) field_diff = _check_run_equality(pipeline_run, candidate_run) if field_diff: raise DagsterRunConflict( 'Found conflicting existing run with same id {run_id}. Runs differ in:' '\n{field_diff}'.format( run_id=pipeline_run.run_id, field_diff=_format_field_diff(field_diff), ), ) return candidate_run return self._run_storage.add_run(pipeline_run)
def test_failure_recovery_before_run_created(external_repo_context, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted before a run is created, # it will create exactly one tick/run when it is re-launched with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.STARTED assert instance.get_runs_count() == 0 frozen_datetime.tick(delta=timedelta(minutes=5)) scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), get_current_datetime_in_utc(), None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, )
def test_simple_schedule(external_repo_context, capfd): with central_timezone(): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(external_repo_context) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state( external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # launch_scheduled_runs does nothing before the first tick launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 17:59:59 - dagster-scheduler - INFO - No new runs for simple_schedule """) # Move forward in time so we're past a tick frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = datetime(year=2019, month=2, day=28, tzinfo=get_utc_timezone()) validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], expected_datetime, "2019-02-27") captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id)) # Verify idempotence launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Verify advancing in time but not going past a tick doesn't add any new runs frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS capfd.readouterr() # Traveling two more days in the future before running results in two new ticks frozen_datetime.tick(delta=timedelta(days=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 assert (len([ tick for tick in ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 3) runs_by_partition = { run.tags[PARTITION_NAME_TAG]: run for run in instance.get_runs() } assert "2019-02-28" in runs_by_partition assert "2019-03-01" in runs_by_partition captured = capfd.readouterr() assert ( captured.out == """2019-03-01 18:00:03 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-01 00:00:00+0000, 2019-03-02 00:00:00+0000 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) # Check idempotence again launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def reconcile_scheduler_state(self, instance, external_repository): """Reconcile the ExternalSchedule list from the repository and ScheduleStorage on the instance to ensure there is a 1-1 correlation between ExternalSchedule and JobStates of type JobType.SCHEDULE, where the ExternalSchedule list is the source of truth. If a new ExternalSchedule is introduced, a new JobState is added to storage with status JobStatus.STOPPED. For every previously existing ExternalSchedule (where target id is the primary key), any changes to the definition are persisted in the corresponding JobState and the status is left unchanged. The schedule is also restarted to make sure the external artifacts (such as a cron job) are up to date. For every ScheduleDefinitions that is removed, the corresponding JobState is removed from the storage and the corresponding job is ended. """ schedules_to_restart = [] for external_schedule in external_repository.get_external_schedules(): # If a schedule already exists for schedule_def, overwrite bash script and # metadata file existing_schedule_state = instance.get_job_state( external_schedule.get_external_origin_id()) if existing_schedule_state: new_timestamp = existing_schedule_state.job_specific_data.start_timestamp if not new_timestamp and existing_schedule_state.status == JobStatus.RUNNING: new_timestamp = get_timestamp_from_utc_datetime( get_current_datetime_in_utc()) # Keep the status, update target and cron schedule schedule_state = JobState( external_schedule.get_external_origin(), JobType.SCHEDULE, existing_schedule_state.status, ScheduleJobData( external_schedule.cron_schedule, new_timestamp, scheduler=self.__class__.__name__, ), ) instance.update_job_state(schedule_state) schedules_to_restart.append( (existing_schedule_state, external_schedule)) else: self._create_new_schedule_state(instance, external_schedule) # Delete all existing schedules that are not in external schedules external_schedule_origin_ids = { s.get_external_origin_id() for s in external_repository.get_external_schedules() } existing_schedule_origin_ids = set([ job.job_origin_id for job in instance.all_stored_job_state( external_repository.get_external_origin_id()) if job.job_type == JobType.SCHEDULE ]) schedule_origin_ids_to_delete = existing_schedule_origin_ids - external_schedule_origin_ids schedule_reconciliation_errors = [] for schedule_state, external_schedule in schedules_to_restart: # Restart is only needed if the schedule was previously running if schedule_state.status == JobStatus.RUNNING: try: self.refresh_schedule(instance, external_schedule) except DagsterSchedulerError as e: schedule_reconciliation_errors.append(e) if schedule_state.status == JobStatus.STOPPED: try: self.stop_schedule( instance, external_schedule.get_external_origin_id()) except DagsterSchedulerError as e: schedule_reconciliation_errors.append(e) for schedule_origin_id in schedule_origin_ids_to_delete: try: instance.stop_schedule_and_delete_from_storage( schedule_origin_id) except DagsterSchedulerError as e: schedule_reconciliation_errors.append(e) if len(schedule_reconciliation_errors): raise DagsterScheduleReconciliationError( "One or more errors were encountered by the Scheduler while starting or stopping schedules. " "Individual error messages follow:", errors=schedule_reconciliation_errors, )
def test_bad_schedule_mixed_with_good_schedule(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): good_schedule = external_repo.get_external_schedule("simple_schedule") bad_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule_on_odd_days") good_origin = good_schedule.get_origin() bad_origin = bad_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(good_schedule) instance.start_schedule_and_update_storage_state(bad_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 1 validate_tick( good_ticks[0], good_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 1 assert bad_ticks[0].status == ScheduleTickStatus.FAILURE assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in bad_ticks[0].error.message) frozen_datetime.tick(delta=timedelta(days=1)) new_now = get_current_datetime_in_utc() launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 wait_for_all_runs_to_start(instance) good_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(good_schedule)) assert len(good_schedule_runs) == 2 validate_run_started(good_schedule_runs[0], new_now, "2019-02-27") good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 2 validate_tick( good_ticks[0], good_schedule, new_now, ScheduleTickStatus.SUCCESS, good_schedule_runs[0].run_id, ) bad_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(bad_schedule)) assert len(bad_schedule_runs) == 1 validate_run_started(bad_schedule_runs[0], new_now, "2019-02-27") bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 2 validate_tick( bad_ticks[0], bad_schedule, new_now, ScheduleTickStatus.SUCCESS, bad_schedule_runs[0].run_id, )