def get_dag(self, dag_id): """ Gets the DAG out of the dictionary, and refreshes it if expired """ # If asking for a known subdag, we want to refresh the parent root_dag_id = dag_id if dag_id in self.dags: dag = self.dags[dag_id] if dag.is_subdag: root_dag_id = dag.parent_dag.dag_id # If the dag corresponding to root_dag_id is absent or expired orm_dag = DagModel.get_current(root_dag_id) if orm_dag and (root_dag_id not in self.dags or (orm_dag.last_expired and dag.last_loaded < orm_dag.last_expired)): # Reprocess source file found_dags = self.process_file(filepath=orm_dag.fileloc, only_if_updated=False) # If the source file no longer exports `dag_id`, delete it from self.dags if found_dags and dag_id in [ found_dag.dag_id for found_dag in found_dags ]: return self.dags[dag_id] elif dag_id in self.dags: del self.dags[dag_id] return self.dags.get(dag_id)
def trigger_dag( dag_id: str, run_id: Optional[str] = None, conf: Optional[Union[dict, str]] = None, execution_date: Optional[datetime] = None, replace_microseconds: bool = True, ) -> Optional[DagRun]: """Triggers execution of DAG specified by dag_id :param dag_id: DAG ID :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: first dag run triggered - even if more than one Dag Runs were triggered or None """ dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException(f"{self.trigger_dag_id} failed with failed states {state}") if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def get_dag_by_file_location(dag_id: str): """Returns DAG of a given dag_id by looking up file location""" # Benefit is that logging from other dags in dagbag will not appear dag_model = DagModel.get_current(dag_id) if dag_model is None: raise AirflowException( 'dag_id could not be found: {}. Either the dag did not exist or it failed to ' 'parse.'.format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc) return dagbag.dags[dag_id]
def get_dag_by_file_location(dag_id: str): """Returns DAG of a given dag_id by looking up file location""" from airflow.models import DagBag, DagModel # Benefit is that logging from other dags in dagbag will not appear dag_model = DagModel.get_current(dag_id) if dag_model is None: raise AirflowException( f"Dag {dag_id!r} could not be found; either it does not exist or it failed to parse." ) dagbag = DagBag(dag_folder=dag_model.fileloc) return dagbag.dags[dag_id]
def index(self): logging.info("REST_API.index() called") dagbag = DagBag() dags = [] for dag_id in dagbag.dags: orm_dag = DagModel.get_current(dag_id) dags.append({"dag_id": dag_id, "is_active": not orm_dag.is_paused}) return self.render( "rest_api_plugin/index.html", dags=dags, airflow_webserver_base_url=airflow_webserver_base_url, url_dict=url_dict)
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound(f"Dag id {dag_id} not found in DagModel") dagbag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dagbag.get_dag(dag_id) if not dag: error_message = f"Dag id {dag_id} not found" raise DagNotFound(error_message) if task_id and not dag.has_task(task_id): error_message = f'Task {task_id} not found in dag {dag_id}' raise TaskNotFound(error_message) return dag
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc, store_serialized_dags=conf.getboolean( 'core', 'store_serialized_dags')) dag = dagbag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) if task_id and not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) return dag
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=settings.STORE_SERIALIZED_DAGS) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) else: raise e
def index(self): logging.info("REST_API.index() called") # get the information that we want to display on the page regarding the dags that are available dagbag = self.get_dagbag() dags = [] for dag_id in dagbag.dags: orm_dag = DagModel.get_current(dag_id) dags.append({ "dag_id": dag_id, "is_active": (not orm_dag.is_paused) if orm_dag is not None else False }) return self.render("rest_api_plugin/index.html", dags=dags, airflow_webserver_base_url=airflow_webserver_base_url, rest_api_endpoint=rest_api_endpoint, apis_metadata=apis_metadata, airflow_version=airflow_version, )
def index(self): logging.info("REST_API.index() called") dagbag = self.get_dagbag() dags = [] for dag_id in dagbag.dags: orm_dag = DagModel.get_current(dag_id) dags.append({ "dag_id": dag_id, "is_active": (not orm_dag.is_paused) if orm_dag is not None else False }) return self.render( "rest_api_plugin/index.html", dags=dags, airflow_webserver_base_url=airflow_webserver_base_url, rest_api_endpoint=rest_api_endpoint, apis=apis, airflow_version=airflow_version, rest_api_plugin_version=rest_api_plugin_version)
def trigger_dag( dag_id, # type: str run_id=None, # type: Optional[str] conf=None, # type: Optional[Union[dict, str]] execution_date=None, # type: Optional[datetime] replace_microseconds=True, # type: bool ): """Triggers execution of DAG specified by dag_id :param dag_id: DAG ID :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: first dag run triggered - even if more than one Dag Runs were triggered or None """ dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) def read_store_serialized_dags(): from airflow.configuration import conf return conf.getboolean('core', 'store_serialized_dags') dagbag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=read_store_serialized_dags() ) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def trigger_dag(self, dag_id, run_id, conf): try: dag_path = DagModel.get_current(dag_id).fileloc except Exception: dag_path = path.join(DAGS_FOLDER, dag_id + ".py") dag_bag = DagBag(dag_folder=dag_path) if not dag_bag.dags: logging.info("Failed to import dag due to the following errors") logging.info(dag_bag.import_errors) logging.info("Sleep for 3 seconds and give it a second try") sleep(3) dag_bag = DagBag(dag_folder=dag_path) triggers = trigger_dag._trigger_dag(dag_id=dag_id, dag_run=DagRun(), dag_bag=dag_bag, run_id=run_id, conf=conf, execution_date=None, replace_microseconds=False) return triggers[0] if triggers else None
def trigger_dag( dag_id, run_id=None, conf=None, execution_date=None, replace_microseconds=True, ): dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def trigger_dag(self): """ Triggers execution of DAG interpreted from the report's dag_id _trigger_dag iterates through the class registry and looks For any model that has dag_id as an attribute and deletes all references to the specific dag_id :param dag_id: DAG ID :param dagbag: dagbag :param dagrun: empty dag run to be created """ dag_model = DagModel.get_current(self.dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.dag_id} not found in DagModel") dagbag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=conf.getboolean("core", "store_serialized_dags"), ) dag_run = DagRun() self._trigger_dag(dag_id=self.dag_id, dag_bag=dagbag, dag_run=dag_run)
def execute(self, context: Context): if isinstance(self.execution_date, datetime.datetime): parsed_execution_date = self.execution_date elif isinstance(self.execution_date, str): parsed_execution_date = timezone.parse(self.execution_date) else: parsed_execution_date = timezone.utcnow() if self.trigger_run_id: run_id = self.trigger_run_id else: run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_execution_date) try: dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=parsed_execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, parsed_execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=parsed_execution_date, end_date=parsed_execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if dag_run is None: raise RuntimeError("The dag_run should be set here!") # Store the execution date from the dag run (either created or found above) to # be used when creating the extra link on the webserver. ti = context['task_instance'] ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO, value=dag_run.execution_date.isoformat()) ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id) if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException( f"{self.trigger_dag_id} failed with failed states {state}" ) if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return