コード例 #1
0
    def test_dag_trigger_parse_dag(self):
        mailbox = Mailbox()
        dag_trigger = DagTrigger("../../dags/test_scheduler_dags.py", -1, [], False, mailbox)
        dag_trigger.start()

        message = mailbox.get_message()
        message = SchedulerInnerEventUtil.to_inner_event(message)
        # only one dag is executable
        assert "test_task_start_date_scheduling" == message.dag_id

        assert DagModel.get_dagmodel(dag_id="test_task_start_date_scheduling") is not None
        assert DagModel.get_dagmodel(dag_id="test_start_date_scheduling") is not None
        assert SerializedDagModel.get(dag_id="test_task_start_date_scheduling") is not None
        assert SerializedDagModel.get(dag_id="test_start_date_scheduling") is not None
        dag_trigger.end()
コード例 #2
0
    def get_dag(self, dag_id):
        """
        Gets the DAG out of the dictionary, and refreshes it if expired

        :param dag_id: DAG Id
        :type dag_id: str
        """
        # Avoid circular import
        from airflow.models.dag import DagModel

        # Only read DAGs from DB if this dagbag is store_serialized_dags.
        if self.store_serialized_dags:
            # Import here so that serialized dag is only imported when serialization is enabled
            from airflow.models.serialized_dag import SerializedDagModel
            if dag_id not in self.dags:
                # Load from DB if not (yet) in the bag
                row = SerializedDagModel.get(dag_id)
                if not row:
                    return None

                dag = row.dag
                for subdag in dag.subdags:
                    self.dags[subdag.dag_id] = subdag
                self.dags[dag.dag_id] = dag

            return self.dags.get(dag_id)

        # If asking for a known subdag, we want to refresh the parent
        dag = None
        root_dag_id = dag_id
        if dag_id in self.dags:
            dag = self.dags[dag_id]
            if dag.is_subdag:
                root_dag_id = dag.parent_dag.dag_id

        # Needs to load from file for a store_serialized_dags dagbag.
        enforce_from_file = False
        if self.store_serialized_dags and dag is not None:
            from airflow.serialization.serialized_objects import SerializedDAG
            enforce_from_file = isinstance(dag, SerializedDAG)

        # If the dag corresponding to root_dag_id is absent or expired
        orm_dag = DagModel.get_current(root_dag_id)
        if (orm_dag and
            (root_dag_id not in self.dags or
             (orm_dag.last_expired and dag.last_loaded < orm_dag.last_expired))
            ) or enforce_from_file:
            # Reprocess source file
            found_dags = self.process_file(filepath=correct_maybe_zipped(
                orm_dag.fileloc),
                                           only_if_updated=False)

            # If the source file no longer exports `dag_id`, delete it from self.dags
            if found_dags and dag_id in [
                    found_dag.dag_id for found_dag in found_dags
            ]:
                return self.dags[dag_id]
            elif dag_id in self.dags:
                del self.dags[dag_id]
        return self.dags.get(dag_id)
コード例 #3
0
    def test_user_defined_filter_and_macros_raise_error(
            self, get_dag_function):
        """
        Test that the Rendered View is able to show rendered values
        even for TIs that have not yet executed
        """
        get_dag_function.return_value = SerializedDagModel.get(
            self.dag.dag_id).dag

        self.assertEqual(self.task2.bash_command,
                         'echo {{ fullname("Apache", "Airflow") | hello }}')

        url = (
            '/admin/airflow/rendered?task_id=task2&dag_id=testdag&execution_date={}'
            .format(self.percent_encode(self.default_date)))

        resp = self.app.get(url, follow_redirects=True)
        self.assertNotIn("echo Hello Apache Airflow",
                         resp.data.decode('utf-8'))

        if six.PY3:
            self.assertIn(
                "Webserver does not have access to User-defined Macros or Filters "
                "when Dag Serialization is enabled. Hence for the task that have not yet "
                "started running, please use &#39;airflow tasks render&#39; for debugging the "
                "rendering of template_fields.<br/><br/>OriginalError: no filter named &#39;hello&#39",
                resp.data.decode('utf-8'))
        else:
            self.assertIn(
                "Webserver does not have access to User-defined Macros or Filters "
                "when Dag Serialization is enabled. Hence for the task that have not yet "
                "started running, please use &#39;airflow tasks render&#39; for debugging the "
                "rendering of template_fields.", resp.data.decode('utf-8'))
コード例 #4
0
def get_dag_by_deserialization(dag_id: str) -> "DAG":
    from airflow.models.serialized_dag import SerializedDagModel

    dag_model = SerializedDagModel.get(dag_id)
    if dag_model is None:
        raise AirflowException(f"Serialized DAG: {dag_id} could not be found")

    return dag_model.dag
コード例 #5
0
    def get_dag(self, dag_id):
        """
        Gets the DAG out of the dictionary, and refreshes it if expired

        :param dag_id: DAG Id
        :type dag_id: str
        """
        # Avoid circular import
        from airflow.models.dag import DagModel

        # Only read DAGs from DB if this dagbag is read_dags_from_db.
        if self.read_dags_from_db:
            # Import here so that serialized dag is only imported when serialization is enabled
            from airflow.models.serialized_dag import SerializedDagModel
            if dag_id not in self.dags:
                # Load from DB if not (yet) in the bag
                row = SerializedDagModel.get(dag_id)
                if not row:
                    return None

                dag = row.dag
                for subdag in dag.subdags:
                    self.dags[subdag.dag_id] = subdag
                self.dags[dag.dag_id] = dag

            return self.dags.get(dag_id)

        # If asking for a known subdag, we want to refresh the parent
        dag = None
        root_dag_id = dag_id
        if dag_id in self.dags:
            dag = self.dags[dag_id]
            if dag.is_subdag:
                root_dag_id = dag.parent_dag.dag_id

        # If DAG Model is absent, we can't check last_expired property. Is the DAG not yet synchronized?
        orm_dag = DagModel.get_current(root_dag_id)
        if not orm_dag:
            return self.dags.get(dag_id)

        # If the dag corresponding to root_dag_id is absent or expired
        is_missing = root_dag_id not in self.dags
        is_expired = (orm_dag.last_expired
                      and dag.last_loaded < orm_dag.last_expired)
        if is_missing or is_expired:
            # Reprocess source file
            found_dags = self.process_file(filepath=correct_maybe_zipped(
                orm_dag.fileloc),
                                           only_if_updated=False)

            # If the source file no longer exports `dag_id`, delete it from self.dags
            if found_dags and dag_id in [
                    found_dag.dag_id for found_dag in found_dags
            ]:
                return self.dags[dag_id]
            elif dag_id in self.dags:
                del self.dags[dag_id]
        return self.dags.get(dag_id)
コード例 #6
0
    def _add_dag_from_db(self, dag_id: str):
        """Add DAG to DagBag from DB"""
        from airflow.models.serialized_dag import SerializedDagModel
        row = SerializedDagModel.get(dag_id)
        if not row:
            raise ValueError(f"DAG '{dag_id}' not found in serialized_dag table")

        dag = row.dag
        for subdag in dag.subdags:
            self.dags[subdag.dag_id] = subdag
        self.dags[dag.dag_id] = dag
        self.dags_last_fetched[dag.dag_id] = timezone.utcnow()
コード例 #7
0
    def init_dag_and_dag_run(
        dag_file: str, dag_id: str, execution_date: datetime.datetime
    ) -> Tuple[SerializedDagModel, DagRun]:
        dags = DagBag(dag_folder=dag_file).dags

        dag = dags[dag_id]
        SerializedDagModel.write_dag(dag)
        serialized_dag = SerializedDagModel.get(dag.dag_id)
        dag_run = dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    execution_date=execution_date,
                                    state=State.RUNNING)
        return serialized_dag, dag_run
コード例 #8
0
ファイル: dagbag.py プロジェクト: yesemsanthoshkumar/airflow
    def _add_dag_from_db(self, dag_id: str, session: Session):
        """Add DAG to DagBag from DB"""
        from airflow.models.serialized_dag import SerializedDagModel

        row = SerializedDagModel.get(dag_id, session)
        if not row:
            raise SerializedDagNotFound(f"DAG '{dag_id}' not found in serialized_dag table")

        row.load_op_links = self.load_op_links
        dag = row.dag
        for subdag in dag.subdags:
            self.dags[subdag.dag_id] = subdag
        self.dags[dag.dag_id] = dag
        self.dags_last_fetched[dag.dag_id] = timezone.utcnow()
        self.dags_hash[dag.dag_id] = row.dag_hash
コード例 #9
0
    def test_rendered_view_for_unexecuted_tis(self, get_dag_function):
        """
        Test that the Rendered View is able to show rendered values
        even for TIs that have not yet executed
        """
        get_dag_function.return_value = SerializedDagModel.get(
            self.dag.dag_id).dag

        self.assertEqual(self.task1.bash_command,
                         '{{ task_instance_key_str }}')

        url = (
            '/admin/airflow/rendered?task_id=task1&dag_id=task1&execution_date={}'
            .format(self.percent_encode(self.default_date)))

        resp = self.app.get(url, follow_redirects=True)
        self.assertIn("testdag__task1__20200301", resp.data.decode('utf-8'))
コード例 #10
0
    def test_rendered_view(self, get_dag_function):
        """
        Test that the Rendered View contains the values from RenderedTaskInstanceFields
        """
        get_dag_function.return_value = SerializedDagModel.get(
            self.dag.dag_id).dag

        self.assertEqual(self.task1.bash_command,
                         '{{ task_instance_key_str }}')
        ti = TaskInstance(self.task1, self.default_date)

        with create_session() as session:
            session.add(RTIF(ti))

        url = (
            '/admin/airflow/rendered?task_id=task1&dag_id=testdag&execution_date={}'
            .format(self.percent_encode(self.default_date)))

        resp = self.app.get(url, follow_redirects=True)
        self.assertIn("testdag__task1__20200301", resp.data.decode('utf-8'))
コード例 #11
0
    def __init__(self,
                 mailbox: Mailbox,
                 dag_run_id: DagRunId,
                 max_num_event: int,
                 poll_timeout: int = 0) -> None:
        """

        :param mailbox: where the EventHandleResult is send to.
        :type mailbox: Mailbox
        :param dag_run_id: the run id of the dag run
        :type dag_run_id: str
        :param max_num_event: max number of event can be handled before exit
        :type max_num_event: int
        :param poll_timeout: poll timeout in second for event before exit
        :type poll_timeout: int
        """
        super().__init__()
        self._mailbox = mailbox
        self._dag_run_id = dag_run_id
        self._event_queue = Queue()
        self._max_num_event = max_num_event
        self._poll_timeout = poll_timeout

        dag_runs = DagRun.find(dag_id=dag_run_id.dag_id,
                               run_id=dag_run_id.run_id)
        if len(dag_runs) < 1:
            raise RuntimeError(
                "no dag_run found with dag_run_id: {}".format(dag_run_id))
        elif len(dag_runs) > 1:
            raise RuntimeError(
                "more than one dag_run found with dag_run_id: {}".format(
                    dag_run_id))

        self._dag_run = dag_runs[0]
        dag_id = self._dag_run.dag_id
        self._serialized_dag_model = SerializedDagModel.get(dag_id)
        if self._serialized_dag_model is None:
            raise RuntimeError(
                "no serialized dag is found with dag_id: {}".format(dag_id))
コード例 #12
0
    def get_dag(self, dag_id, from_file_only=False):
        """
        Gets the DAG out of the dictionary, and refreshes it if expired

        :param dag_id: DAG Id
        :type dag_id: str
        :param from_file_only: returns a DAG loaded from file.
        :type from_file_only: bool
        """
        # Avoid circular import
        from airflow.models.dag import DagModel

        # Only read DAGs from DB if this dagbag is store_serialized_dags.
        # from_file_only is an exception, currently it is for renderring templates
        # in UI only. Because functions are gone in serialized DAGs, DAGs must be
        # imported from files.
        # FIXME: this exception should be removed in future, then webserver can be
        # decoupled from DAG files.
        if self.store_serialized_dags and not from_file_only:
            # Import here so that serialized dag is only imported when serialization is enabled
            from airflow.models.serialized_dag import SerializedDagModel
            if dag_id not in self.dags:
                # Load from DB if not (yet) in the bag
                row = SerializedDagModel.get(dag_id)
                if not row:
                    return None

                dag = row.dag
                for subdag in dag.subdags:
                    self.dags[subdag.dag_id] = subdag
                self.dags[dag.dag_id] = dag

            return self.dags.get(dag_id)

        # If asking for a known subdag, we want to refresh the parent
        dag = None
        root_dag_id = dag_id
        if dag_id in self.dags:
            dag = self.dags[dag_id]
            if dag.is_subdag:
                root_dag_id = dag.parent_dag.dag_id

        # Needs to load from file for a store_serialized_dags dagbag.
        enforce_from_file = False
        if self.store_serialized_dags and dag is not None:
            from airflow.serialization.serialized_dag import SerializedDAG
            enforce_from_file = isinstance(dag, SerializedDAG)

        # If the dag corresponding to root_dag_id is absent or expired
        orm_dag = DagModel.get_current(root_dag_id)
        if (orm_dag and (
                root_dag_id not in self.dags or
                (
                    orm_dag.last_expired and
                    dag.last_loaded < orm_dag.last_expired
                )
        )) or enforce_from_file:
            # Reprocess source file
            found_dags = self.process_file(
                filepath=correct_maybe_zipped(orm_dag.fileloc), only_if_updated=False)

            # If the source file no longer exports `dag_id`, delete it from self.dags
            if found_dags and dag_id in [found_dag.dag_id for found_dag in found_dags]:
                return self.dags[dag_id]
            elif dag_id in self.dags:
                del self.dags[dag_id]
        return self.dags.get(dag_id)