def test_schedule_dag_no_previous_runs(self): """ Tests scheduling a dag with no previous runs """ dag = DAG(self.TEST_SCHEDULE_WITH_NO_PREVIOUS_RUNS_DAG_ID) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag.clear()
def test_schedule_dag_start_end_dates(self): """ Tests that an attempt to schedule a task after the Dag's end_date does not succeed. """ delta = timedelta(hours=1) runs = 3 start_date = DEFAULT_DATE end_date = start_date + (runs - 1) * delta dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID, start_date=start_date, end_date=end_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) # Create and schedule the dag runs dag_runs = [] for _ in range(runs): dag_runs.append(dag_file_processor.create_dag_run(dag)) additional_dag_run = dag_file_processor.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID, schedule_interval=delta, start_date=DEFAULT_DATE) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE)) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=State.SUCCESS, external_trigger=True) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( DEFAULT_DATE + delta, dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger)
def test_trigger_controller_dag(self): dag = self.dagbag.get_dag('example_trigger_controller_dag') target_dag = self.dagbag.get_dag('example_trigger_target_dag') target_dag.sync_to_db() dag_file_processor = DagFileProcessor(dag_ids=[], log=Mock()) task_instances_list = dag_file_processor._process_task_instances( target_dag, dag_runs=DagRun.find(dag_id='example_trigger_target_dag') ) self.assertFalse(task_instances_list) job = BackfillJob( dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_first_depends_on_past=True ) job.run() task_instances_list = dag_file_processor._process_task_instances( target_dag, dag_runs=DagRun.find(dag_id='example_trigger_target_dag') ) self.assertTrue(task_instances_list)
def case(): import logging import airflow from airflow.jobs.scheduler_job import DagFileProcessor log = logging.getLogger(__name__) processor = DagFileProcessor(dag_ids=[], log=log) dag_file = os.path.join(os.path.dirname(airflow.__file__), "example_dags", "example_complex.py") processor.process_file(file_path=dag_file, failure_callback_requests=[])
def test_schedule_dag_once(self): """ Tests scheduling a dag scheduled for @once - should be scheduled the first time it is called, and not scheduled the second. """ dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID) dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag) dag_run2 = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertIsNone(dag_run2) dag.clear()
def case(): """Case of logging om/""" import logging from unittest import mock from airflow.jobs.scheduler_job import DagFileProcessor with mock.patch.dict("os.environ", { "PERF_DAGS_COUNT": "200", "PERF_TASKS_COUNT": "10", "PERF_START_AGO": "2d", "PERF_SCHEDULE_INTERVAL": "None", "PERF_SHAPE": "no_structure", }): log = logging.getLogger(__name__) processor = DagFileProcessor(dag_ids=[], log=log) dag_file = os.path.join(os.path.dirname(__file__), os.path.pardir, "dags", "elastic_dag.py") processor.process_file(file_path=dag_file, failure_callback_requests=[])
def test_schedule_dag_relativedelta(self): """ Tests scheduling a dag with a relativedelta schedule_interval """ dag_id = "test_schedule_dag_relativedelta" delta = relativedelta(hours=+1) dag = DAG(dag_id=dag_id, schedule_interval=delta) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime_tz(2015, 1, 2, 0, 0))) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime_tz(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag_run2 = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run2) self.assertEqual(dag.dag_id, dag_run2.dag_id) self.assertIsNotNone(dag_run2.run_id) self.assertNotEqual('', dag_run2.run_id) self.assertEqual( datetime_tz(2015, 1, 2, 0, 0) + delta, dag_run2.execution_date, msg='dag_run2.execution_date did not match expectation: {0}' .format(dag_run2.execution_date) ) self.assertEqual(State.RUNNING, dag_run2.state) self.assertFalse(dag_run2.external_trigger) dag.clear() self._clean_up(dag_id)
def test_schedule_dag_no_end_date_up_to_today_only(self): """ Tests that a Dag created without an end_date can only be scheduled up to and including the current datetime. For example, if today is 2016-01-01 and we are scheduling from a start_date of 2015-01-01, only jobs up to, but not including 2016-01-01 should be scheduled. """ session = settings.Session() delta = datetime.timedelta(days=1) now = utcnow() start_date = now.subtract(weeks=1) runs = (now - start_date).days dag_id = "test_schedule_dag_no_end_date_up_to_today_only" dag = DAG(dag_id=dag_id, start_date=start_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_runs = [] for _ in range(runs): dag_run = dag_file_processor.create_dag_run(dag) dag_runs.append(dag_run) # Mark the DagRun as complete dag_run.state = State.SUCCESS session.merge(dag_run) session.commit() # Attempt to schedule an additional dag run (for 2016-01-01) additional_dag_run = dag_file_processor.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run) self._clean_up(dag_id)