예제 #1
0
    def test_schedule_dag_no_previous_runs(self):
        """
        Tests scheduling a dag with no previous runs
        """
        dag = DAG(self.TEST_SCHEDULE_WITH_NO_PREVIOUS_RUNS_DAG_ID)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_run = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            datetime(2015, 1, 2, 0, 0),
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
        dag.clear()
예제 #2
0
    def test_schedule_dag_start_end_dates(self):
        """
        Tests that an attempt to schedule a task after the Dag's end_date
        does not succeed.
        """
        delta = timedelta(hours=1)
        runs = 3
        start_date = DEFAULT_DATE
        end_date = start_date + (runs - 1) * delta

        dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID,
                  start_date=start_date,
                  end_date=end_date,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake'))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        # Create and schedule the dag runs
        dag_runs = []
        for _ in range(runs):
            dag_runs.append(dag_file_processor.create_dag_run(dag))

        additional_dag_run = dag_file_processor.create_dag_run(dag)

        for dag_run in dag_runs:
            self.assertIsNotNone(dag_run)

        self.assertIsNone(additional_dag_run)
예제 #3
0
    def test_schedule_dag_fake_scheduled_previous(self):
        """
        Test scheduling a dag where there is a prior DagRun
        which has the same run_id as the next run should have
        """
        delta = timedelta(hours=1)

        dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID,
                  schedule_interval=delta,
                  start_date=DEFAULT_DATE)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=DEFAULT_DATE))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE),
                          execution_date=DEFAULT_DATE,
                          state=State.SUCCESS,
                          external_trigger=True)
        dag_run = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            DEFAULT_DATE + delta,
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
예제 #4
0
    def test_trigger_controller_dag(self):
        dag = self.dagbag.get_dag('example_trigger_controller_dag')
        target_dag = self.dagbag.get_dag('example_trigger_target_dag')
        target_dag.sync_to_db()

        dag_file_processor = DagFileProcessor(dag_ids=[], log=Mock())
        task_instances_list = dag_file_processor._process_task_instances(
            target_dag,
            dag_runs=DagRun.find(dag_id='example_trigger_target_dag')
        )
        self.assertFalse(task_instances_list)

        job = BackfillJob(
            dag=dag,
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_first_depends_on_past=True
        )
        job.run()

        task_instances_list = dag_file_processor._process_task_instances(
            target_dag,
            dag_runs=DagRun.find(dag_id='example_trigger_target_dag')
        )

        self.assertTrue(task_instances_list)
예제 #5
0
    def case():
        import logging

        import airflow
        from airflow.jobs.scheduler_job import DagFileProcessor

        log = logging.getLogger(__name__)
        processor = DagFileProcessor(dag_ids=[], log=log)
        dag_file = os.path.join(os.path.dirname(airflow.__file__), "example_dags", "example_complex.py")
        processor.process_file(file_path=dag_file, failure_callback_requests=[])
예제 #6
0
    def test_schedule_dag_once(self):
        """
        Tests scheduling a dag scheduled for @once - should be scheduled the first time
        it is called, and not scheduled the second.
        """
        dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID)
        dag.schedule_interval = '@once'
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))
        dag_run = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag)
        dag_run2 = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag)

        self.assertIsNotNone(dag_run)
        self.assertIsNone(dag_run2)
        dag.clear()
예제 #7
0
    def case():
        """Case of logging om/"""
        import logging
        from unittest import mock

        from airflow.jobs.scheduler_job import DagFileProcessor

        with mock.patch.dict("os.environ", {
            "PERF_DAGS_COUNT": "200",
            "PERF_TASKS_COUNT": "10",
            "PERF_START_AGO": "2d",
            "PERF_SCHEDULE_INTERVAL": "None",
            "PERF_SHAPE": "no_structure",
        }):
            log = logging.getLogger(__name__)
            processor = DagFileProcessor(dag_ids=[], log=log)
            dag_file = os.path.join(os.path.dirname(__file__), os.path.pardir, "dags", "elastic_dag.py")
            processor.process_file(file_path=dag_file, failure_callback_requests=[])
예제 #8
0
    def test_schedule_dag_relativedelta(self):
        """
        Tests scheduling a dag with a relativedelta schedule_interval
        """
        dag_id = "test_schedule_dag_relativedelta"
        delta = relativedelta(hours=+1)
        dag = DAG(dag_id=dag_id,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime_tz(2015, 1, 2, 0, 0)))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_run = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            datetime_tz(2015, 1, 2, 0, 0),
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
        dag_run2 = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run2)
        self.assertEqual(dag.dag_id, dag_run2.dag_id)
        self.assertIsNotNone(dag_run2.run_id)
        self.assertNotEqual('', dag_run2.run_id)
        self.assertEqual(
            datetime_tz(2015, 1, 2, 0, 0) + delta,
            dag_run2.execution_date,
            msg='dag_run2.execution_date did not match expectation: {0}'
            .format(dag_run2.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run2.state)
        self.assertFalse(dag_run2.external_trigger)
        dag.clear()
        self._clean_up(dag_id)
예제 #9
0
    def test_schedule_dag_no_end_date_up_to_today_only(self):
        """
        Tests that a Dag created without an end_date can only be scheduled up
        to and including the current datetime.

        For example, if today is 2016-01-01 and we are scheduling from a
        start_date of 2015-01-01, only jobs up to, but not including
        2016-01-01 should be scheduled.
        """
        session = settings.Session()
        delta = datetime.timedelta(days=1)
        now = utcnow()
        start_date = now.subtract(weeks=1)

        runs = (now - start_date).days
        dag_id = "test_schedule_dag_no_end_date_up_to_today_only"
        dag = DAG(dag_id=dag_id,
                  start_date=start_date,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake'))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_runs = []
        for _ in range(runs):
            dag_run = dag_file_processor.create_dag_run(dag)
            dag_runs.append(dag_run)

            # Mark the DagRun as complete
            dag_run.state = State.SUCCESS
            session.merge(dag_run)
            session.commit()

        # Attempt to schedule an additional dag run (for 2016-01-01)
        additional_dag_run = dag_file_processor.create_dag_run(dag)

        for dag_run in dag_runs:
            self.assertIsNotNone(dag_run)

        self.assertIsNone(additional_dag_run)
        self._clean_up(dag_id)