def test_dag_trigger_parse_dag(self): mailbox = Mailbox() dag_trigger = DagTrigger("../../dags/test_scheduler_dags.py", -1, [], False, mailbox) dag_trigger.start() message = mailbox.get_message() message = SchedulerInnerEventUtil.to_inner_event(message) # only one dag is executable assert "test_task_start_date_scheduling" == message.dag_id assert DagModel.get_dagmodel(dag_id="test_task_start_date_scheduling") is not None assert DagModel.get_dagmodel(dag_id="test_start_date_scheduling") is not None assert SerializedDagModel.get(dag_id="test_task_start_date_scheduling") is not None assert SerializedDagModel.get(dag_id="test_start_date_scheduling") is not None dag_trigger.end()
def get_dag(self, dag_id): """ Gets the DAG out of the dictionary, and refreshes it if expired :param dag_id: DAG Id :type dag_id: str """ # Avoid circular import from airflow.models.dag import DagModel # Only read DAGs from DB if this dagbag is store_serialized_dags. if self.store_serialized_dags: # Import here so that serialized dag is only imported when serialization is enabled from airflow.models.serialized_dag import SerializedDagModel if dag_id not in self.dags: # Load from DB if not (yet) in the bag row = SerializedDagModel.get(dag_id) if not row: return None dag = row.dag for subdag in dag.subdags: self.dags[subdag.dag_id] = subdag self.dags[dag.dag_id] = dag return self.dags.get(dag_id) # If asking for a known subdag, we want to refresh the parent dag = None root_dag_id = dag_id if dag_id in self.dags: dag = self.dags[dag_id] if dag.is_subdag: root_dag_id = dag.parent_dag.dag_id # Needs to load from file for a store_serialized_dags dagbag. enforce_from_file = False if self.store_serialized_dags and dag is not None: from airflow.serialization.serialized_objects import SerializedDAG enforce_from_file = isinstance(dag, SerializedDAG) # If the dag corresponding to root_dag_id is absent or expired orm_dag = DagModel.get_current(root_dag_id) if (orm_dag and (root_dag_id not in self.dags or (orm_dag.last_expired and dag.last_loaded < orm_dag.last_expired)) ) or enforce_from_file: # Reprocess source file found_dags = self.process_file(filepath=correct_maybe_zipped( orm_dag.fileloc), only_if_updated=False) # If the source file no longer exports `dag_id`, delete it from self.dags if found_dags and dag_id in [ found_dag.dag_id for found_dag in found_dags ]: return self.dags[dag_id] elif dag_id in self.dags: del self.dags[dag_id] return self.dags.get(dag_id)
def test_user_defined_filter_and_macros_raise_error( self, get_dag_function): """ Test that the Rendered View is able to show rendered values even for TIs that have not yet executed """ get_dag_function.return_value = SerializedDagModel.get( self.dag.dag_id).dag self.assertEqual(self.task2.bash_command, 'echo {{ fullname("Apache", "Airflow") | hello }}') url = ( '/admin/airflow/rendered?task_id=task2&dag_id=testdag&execution_date={}' .format(self.percent_encode(self.default_date))) resp = self.app.get(url, follow_redirects=True) self.assertNotIn("echo Hello Apache Airflow", resp.data.decode('utf-8')) if six.PY3: self.assertIn( "Webserver does not have access to User-defined Macros or Filters " "when Dag Serialization is enabled. Hence for the task that have not yet " "started running, please use 'airflow tasks render' for debugging the " "rendering of template_fields.<br/><br/>OriginalError: no filter named 'hello'", resp.data.decode('utf-8')) else: self.assertIn( "Webserver does not have access to User-defined Macros or Filters " "when Dag Serialization is enabled. Hence for the task that have not yet " "started running, please use 'airflow tasks render' for debugging the " "rendering of template_fields.", resp.data.decode('utf-8'))
def get_dag_by_deserialization(dag_id: str) -> "DAG": from airflow.models.serialized_dag import SerializedDagModel dag_model = SerializedDagModel.get(dag_id) if dag_model is None: raise AirflowException(f"Serialized DAG: {dag_id} could not be found") return dag_model.dag
def get_dag(self, dag_id): """ Gets the DAG out of the dictionary, and refreshes it if expired :param dag_id: DAG Id :type dag_id: str """ # Avoid circular import from airflow.models.dag import DagModel # Only read DAGs from DB if this dagbag is read_dags_from_db. if self.read_dags_from_db: # Import here so that serialized dag is only imported when serialization is enabled from airflow.models.serialized_dag import SerializedDagModel if dag_id not in self.dags: # Load from DB if not (yet) in the bag row = SerializedDagModel.get(dag_id) if not row: return None dag = row.dag for subdag in dag.subdags: self.dags[subdag.dag_id] = subdag self.dags[dag.dag_id] = dag return self.dags.get(dag_id) # If asking for a known subdag, we want to refresh the parent dag = None root_dag_id = dag_id if dag_id in self.dags: dag = self.dags[dag_id] if dag.is_subdag: root_dag_id = dag.parent_dag.dag_id # If DAG Model is absent, we can't check last_expired property. Is the DAG not yet synchronized? orm_dag = DagModel.get_current(root_dag_id) if not orm_dag: return self.dags.get(dag_id) # If the dag corresponding to root_dag_id is absent or expired is_missing = root_dag_id not in self.dags is_expired = (orm_dag.last_expired and dag.last_loaded < orm_dag.last_expired) if is_missing or is_expired: # Reprocess source file found_dags = self.process_file(filepath=correct_maybe_zipped( orm_dag.fileloc), only_if_updated=False) # If the source file no longer exports `dag_id`, delete it from self.dags if found_dags and dag_id in [ found_dag.dag_id for found_dag in found_dags ]: return self.dags[dag_id] elif dag_id in self.dags: del self.dags[dag_id] return self.dags.get(dag_id)
def _add_dag_from_db(self, dag_id: str): """Add DAG to DagBag from DB""" from airflow.models.serialized_dag import SerializedDagModel row = SerializedDagModel.get(dag_id) if not row: raise ValueError(f"DAG '{dag_id}' not found in serialized_dag table") dag = row.dag for subdag in dag.subdags: self.dags[subdag.dag_id] = subdag self.dags[dag.dag_id] = dag self.dags_last_fetched[dag.dag_id] = timezone.utcnow()
def init_dag_and_dag_run( dag_file: str, dag_id: str, execution_date: datetime.datetime ) -> Tuple[SerializedDagModel, DagRun]: dags = DagBag(dag_folder=dag_file).dags dag = dags[dag_id] SerializedDagModel.write_dag(dag) serialized_dag = SerializedDagModel.get(dag.dag_id) dag_run = dag.create_dagrun(run_type=DagRunType.MANUAL, execution_date=execution_date, state=State.RUNNING) return serialized_dag, dag_run
def _add_dag_from_db(self, dag_id: str, session: Session): """Add DAG to DagBag from DB""" from airflow.models.serialized_dag import SerializedDagModel row = SerializedDagModel.get(dag_id, session) if not row: raise SerializedDagNotFound(f"DAG '{dag_id}' not found in serialized_dag table") row.load_op_links = self.load_op_links dag = row.dag for subdag in dag.subdags: self.dags[subdag.dag_id] = subdag self.dags[dag.dag_id] = dag self.dags_last_fetched[dag.dag_id] = timezone.utcnow() self.dags_hash[dag.dag_id] = row.dag_hash
def test_rendered_view_for_unexecuted_tis(self, get_dag_function): """ Test that the Rendered View is able to show rendered values even for TIs that have not yet executed """ get_dag_function.return_value = SerializedDagModel.get( self.dag.dag_id).dag self.assertEqual(self.task1.bash_command, '{{ task_instance_key_str }}') url = ( '/admin/airflow/rendered?task_id=task1&dag_id=task1&execution_date={}' .format(self.percent_encode(self.default_date))) resp = self.app.get(url, follow_redirects=True) self.assertIn("testdag__task1__20200301", resp.data.decode('utf-8'))
def test_rendered_view(self, get_dag_function): """ Test that the Rendered View contains the values from RenderedTaskInstanceFields """ get_dag_function.return_value = SerializedDagModel.get( self.dag.dag_id).dag self.assertEqual(self.task1.bash_command, '{{ task_instance_key_str }}') ti = TaskInstance(self.task1, self.default_date) with create_session() as session: session.add(RTIF(ti)) url = ( '/admin/airflow/rendered?task_id=task1&dag_id=testdag&execution_date={}' .format(self.percent_encode(self.default_date))) resp = self.app.get(url, follow_redirects=True) self.assertIn("testdag__task1__20200301", resp.data.decode('utf-8'))
def __init__(self, mailbox: Mailbox, dag_run_id: DagRunId, max_num_event: int, poll_timeout: int = 0) -> None: """ :param mailbox: where the EventHandleResult is send to. :type mailbox: Mailbox :param dag_run_id: the run id of the dag run :type dag_run_id: str :param max_num_event: max number of event can be handled before exit :type max_num_event: int :param poll_timeout: poll timeout in second for event before exit :type poll_timeout: int """ super().__init__() self._mailbox = mailbox self._dag_run_id = dag_run_id self._event_queue = Queue() self._max_num_event = max_num_event self._poll_timeout = poll_timeout dag_runs = DagRun.find(dag_id=dag_run_id.dag_id, run_id=dag_run_id.run_id) if len(dag_runs) < 1: raise RuntimeError( "no dag_run found with dag_run_id: {}".format(dag_run_id)) elif len(dag_runs) > 1: raise RuntimeError( "more than one dag_run found with dag_run_id: {}".format( dag_run_id)) self._dag_run = dag_runs[0] dag_id = self._dag_run.dag_id self._serialized_dag_model = SerializedDagModel.get(dag_id) if self._serialized_dag_model is None: raise RuntimeError( "no serialized dag is found with dag_id: {}".format(dag_id))
def get_dag(self, dag_id, from_file_only=False): """ Gets the DAG out of the dictionary, and refreshes it if expired :param dag_id: DAG Id :type dag_id: str :param from_file_only: returns a DAG loaded from file. :type from_file_only: bool """ # Avoid circular import from airflow.models.dag import DagModel # Only read DAGs from DB if this dagbag is store_serialized_dags. # from_file_only is an exception, currently it is for renderring templates # in UI only. Because functions are gone in serialized DAGs, DAGs must be # imported from files. # FIXME: this exception should be removed in future, then webserver can be # decoupled from DAG files. if self.store_serialized_dags and not from_file_only: # Import here so that serialized dag is only imported when serialization is enabled from airflow.models.serialized_dag import SerializedDagModel if dag_id not in self.dags: # Load from DB if not (yet) in the bag row = SerializedDagModel.get(dag_id) if not row: return None dag = row.dag for subdag in dag.subdags: self.dags[subdag.dag_id] = subdag self.dags[dag.dag_id] = dag return self.dags.get(dag_id) # If asking for a known subdag, we want to refresh the parent dag = None root_dag_id = dag_id if dag_id in self.dags: dag = self.dags[dag_id] if dag.is_subdag: root_dag_id = dag.parent_dag.dag_id # Needs to load from file for a store_serialized_dags dagbag. enforce_from_file = False if self.store_serialized_dags and dag is not None: from airflow.serialization.serialized_dag import SerializedDAG enforce_from_file = isinstance(dag, SerializedDAG) # If the dag corresponding to root_dag_id is absent or expired orm_dag = DagModel.get_current(root_dag_id) if (orm_dag and ( root_dag_id not in self.dags or ( orm_dag.last_expired and dag.last_loaded < orm_dag.last_expired ) )) or enforce_from_file: # Reprocess source file found_dags = self.process_file( filepath=correct_maybe_zipped(orm_dag.fileloc), only_if_updated=False) # If the source file no longer exports `dag_id`, delete it from self.dags if found_dags and dag_id in [found_dag.dag_id for found_dag in found_dags]: return self.dags[dag_id] elif dag_id in self.dags: del self.dags[dag_id] return self.dags.get(dag_id)