Ejemplo n.º 1
0
    def test_handle_failure_callback_with_zobmies_are_correctly_passed_to_dag_file_processor(
            self):
        """
        Check that the same set of failure callback with zombies are passed to the dag
        file processors until the next zombie detection logic is invoked.
        """
        test_dag_path = os.path.join(TEST_DAG_FOLDER,
                                     'test_example_bash_operator.py')
        with conf_vars({
            ('scheduler', 'max_threads'): '1',
            ('core', 'load_examples'): 'False'
        }):
            dagbag = DagBag(test_dag_path)
            with create_session() as session:
                session.query(LJ).delete()
                dag = dagbag.get_dag('test_example_bash_operator')
                dag.sync_to_db()
                task = dag.get_task(task_id='run_this_last')

                ti = TI(task, DEFAULT_DATE, State.RUNNING)
                local_job = LJ(ti)
                local_job.state = State.SHUTDOWN
                local_job.id = 1
                ti.job_id = local_job.id

                session.add(local_job)
                session.add(ti)
                session.commit()
                fake_failure_callback_requests = [
                    FailureCallbackRequest(
                        full_filepath=dag.full_filepath,
                        simple_task_instance=SimpleTaskInstance(ti),
                        msg="Message")
                ]

            test_dag_path = os.path.join(TEST_DAG_FOLDER,
                                         'test_example_bash_operator.py')

            async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn')
            processor_agent = DagFileProcessorAgent(
                test_dag_path, 1,
                FakeDagFileProcessorRunner._fake_dag_processor_factory,
                timedelta.max, [], False, async_mode)
            processor_agent.start()
            parsing_result = []
            if not async_mode:
                processor_agent.run_single_parsing_loop()
            while not processor_agent.done:
                if not async_mode:
                    processor_agent.wait_until_finished()
                parsing_result.extend(processor_agent.harvest_simple_dags())

            self.assertEqual(len(fake_failure_callback_requests),
                             len(parsing_result))
            self.assertEqual(
                set(zombie.simple_task_instance.key
                    for zombie in fake_failure_callback_requests),
                set(result.simple_task_instance.key
                    for result in parsing_result))
Ejemplo n.º 2
0
    def test_find_zombies(self):
        manager = DagFileProcessorManager(
            dag_directory='directory',
            max_runs=1,
            processor_factory=MagicMock().return_value,
            processor_timeout=timedelta.max,
            signal_conn=MagicMock(),
            dag_ids=[],
            pickle_dags=False,
            async_mode=True)

        dagbag = DagBag(TEST_DAG_FOLDER)
        with create_session() as session:
            session.query(LJ).delete()
            dag = dagbag.get_dag('example_branch_operator')
            dag.sync_to_db()
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)
            local_job = LJ(ti)
            local_job.state = State.SHUTDOWN
            local_job.id = 1
            ti.job_id = local_job.id

            session.add(local_job)
            session.add(ti)
            session.commit()

            manager._last_zombie_query_time = timezone.utcnow() - timedelta(
                seconds=manager._zombie_threshold_secs + 1)
            manager._find_zombies()  # pylint: disable=no-value-for-parameter
            requests = manager._callback_to_execute[dag.full_filepath]
            self.assertEqual(1, len(requests))
            self.assertEqual(requests[0].full_filepath, dag.full_filepath)
            self.assertEqual(requests[0].msg, "Detected as zombie")
            self.assertIsInstance(requests[0].simple_task_instance,
                                  SimpleTaskInstance)
            self.assertEqual(ti.dag_id,
                             requests[0].simple_task_instance.dag_id)
            self.assertEqual(ti.task_id,
                             requests[0].simple_task_instance.task_id)
            self.assertEqual(ti.execution_date,
                             requests[0].simple_task_instance.execution_date)

            session.query(TI).delete()
            session.query(LJ).delete()
Ejemplo n.º 3
0
    def test_find_zombies(self):
        manager = DagFileProcessorManager(
            dag_directory='directory',
            file_paths=['abc.txt'],
            max_runs=1,
            processor_factory=MagicMock().return_value,
            processor_timeout=timedelta.max,
            signal_conn=MagicMock(),
            async_mode=True)

        dagbag = DagBag(TEST_DAG_FOLDER)
        with create_session() as session:
            session.query(LJ).delete()
            dag = dagbag.get_dag('example_branch_operator')
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)
            local_job = LJ(ti)
            local_job.state = State.SHUTDOWN
            local_job.id = 1
            ti.job_id = local_job.id

            session.add(local_job)
            session.add(ti)
            session.commit()

            manager._last_zombie_query_time = timezone.utcnow() - timedelta(
                seconds=manager._zombie_threshold_secs + 1)
            manager._find_zombies()  # pylint: disable=no-value-for-parameter
            zombies = manager._zombies
            self.assertEqual(1, len(zombies))
            self.assertIsInstance(zombies[0], SimpleTaskInstance)
            self.assertEqual(ti.dag_id, zombies[0].dag_id)
            self.assertEqual(ti.task_id, zombies[0].task_id)
            self.assertEqual(ti.execution_date, zombies[0].execution_date)

            session.query(TI).delete()
            session.query(LJ).delete()
Ejemplo n.º 4
0
    def test_handle_failure_callback_with_zobmies_are_correctly_passed_to_dag_file_processor(self):
        """
        Check that the same set of failure callback with zombies are passed to the dag
        file processors until the next zombie detection logic is invoked.
        """
        test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py')
        with conf_vars({('scheduler', 'max_threads'): '1',
                        ('core', 'load_examples'): 'False'}):
            dagbag = DagBag(test_dag_path)
            with create_session() as session:
                session.query(LJ).delete()
                dag = dagbag.get_dag('test_example_bash_operator')
                dag.sync_to_db()
                task = dag.get_task(task_id='run_this_last')

                ti = TI(task, DEFAULT_DATE, State.RUNNING)
                local_job = LJ(ti)
                local_job.state = State.SHUTDOWN
                local_job.id = 1
                ti.job_id = local_job.id

                session.add(local_job)
                session.add(ti)
                session.commit()
                fake_failure_callback_requests = [
                    FailureCallbackRequest(
                        full_filepath=dag.full_filepath,
                        simple_task_instance=SimpleTaskInstance(ti),
                        msg="Message"
                    )
                ]

            class FakeDagFileProcessorRunner(DagFileProcessorProcess):
                # This fake processor will return the zombies it received in constructor
                # as its processing result w/o actually parsing anything.
                def __init__(self, file_path, pickle_dags, dag_id_white_list, failure_callback_requests):
                    super().__init__(file_path, pickle_dags, dag_id_white_list, failure_callback_requests)
                    self._result = failure_callback_requests, 0

                def start(self):
                    pass

                @property
                def start_time(self):
                    return DEFAULT_DATE

                @property
                def pid(self):
                    return 1234

                @property
                def done(self):
                    return True

                @property
                def result(self):
                    return self._result

            def processor_factory(file_path, failure_callback_requests):
                return FakeDagFileProcessorRunner(
                    file_path,
                    False,
                    [],
                    failure_callback_requests
                )

            async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn')
            processor_agent = DagFileProcessorAgent(test_dag_path,
                                                    1,
                                                    processor_factory,
                                                    timedelta.max,
                                                    async_mode)
            processor_agent.start()
            parsing_result = []
            if not async_mode:
                processor_agent.heartbeat()
            while not processor_agent.done:
                if not async_mode:
                    processor_agent.wait_until_finished()
                parsing_result.extend(processor_agent.harvest_simple_dags())

            self.assertEqual(len(fake_failure_callback_requests), len(parsing_result))
            self.assertEqual(
                set(zombie.simple_task_instance.key for zombie in fake_failure_callback_requests),
                set(result.simple_task_instance.key for result in parsing_result)
            )