Exemple #1
0
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel:
    """Checks that DAG exists and in case it is specified that Task exist"""
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound(f"Dag id {dag_id} not found in DagModel")

    dagbag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True)
    dag = dagbag.get_dag(dag_id)
    if not dag:
        error_message = f"Dag id {dag_id} not found"
        raise DagNotFound(error_message)
    if task_id and not dag.has_task(task_id):
        error_message = f'Task {task_id} not found in dag {dag_id}'
        raise TaskNotFound(error_message)
    return dag
Exemple #2
0
    def delete_dag(self, keep_records_in_log: bool = True, session=None):
        dag = session.query(DagModel).filter(
            DagModel.dag_id == self.dag_id).first()
        if dag is None:
            raise DagNotFound(f"Dag id {self.dag_id} not found")

        # so explicitly removes serialized DAG here.
        if STORE_SERIALIZED_DAGS and SerializedDagModel.has_dag(
                dag_id=self.dag_id, session=session):
            SerializedDagModel.remove_dag(dag_id=self.dag_id, session=session)

        # noinspection PyUnresolvedReferences,PyProtectedMember
        for model in models.base.Base._decl_class_registry.values():
            if hasattr(model, "dag_id"):
                if model.__name__:
                    print(model.__name__)
                if keep_records_in_log and model.__name__ == "Log":
                    continue
                cond = or_(model.dag_id == self.dag_id,
                           model.dag_id.like(self.dag_id + ".%"))
                session.query(model).filter(cond).delete(
                    synchronize_session="fetch")

        # Delete entries in Import Errors table for a deleted DAG
        # This handles the case when the dag_id is changed in the file
        session.query(models.ImportError).filter(
            models.ImportError.filename == dag.fileloc).delete(
                synchronize_session="fetch")
Exemple #3
0
def delete_dag(dag_id: str, keep_records_in_log: bool = True, session=None) -> int:
    """
    :param dag_id: the dag_id of the DAG to delete
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :param session: session used
    :return count of deleted dags
    """
    dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    if dag.fileloc and os.path.exists(dag.fileloc):
        raise DagFileExists("Dag id {} is still in DagBag. "
                            "Remove the DAG file first: {}".format(dag_id, dag.fileloc))

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for model in models.base.Base._decl_class_registry.values():  # pylint: disable=protected-access
        if hasattr(model, "dag_id"):
            if keep_records_in_log and model.__name__ == 'Log':
                continue
            cond = or_(model.dag_id == dag_id, model.dag_id.like(dag_id + ".%"))
            count += session.query(model).filter(cond).delete(synchronize_session='fetch')
    if dag.is_subdag:
        parent_dag_id, task_id = dag_id.rsplit(".", 1)
        for model in models.DagRun, TaskFail, models.TaskInstance:
            count += session.query(model).filter(model.dag_id == parent_dag_id,
                                                 model.task_id == task_id).delete()

    return count
def delete_dag(dag_id):
    session = settings.Session()

    DM = models.DagModel
    dag = session.query(DM).filter(DM.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    dagbag = models.DagBag()
    if dag_id in dagbag.dags:
        raise DagFileExists("Dag id {} is still in DagBag. "
                            "Remove the DAG file first.".format(dag_id))

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for m in models.Base._decl_class_registry.values():
        if hasattr(m, "dag_id"):
            cond = or_(m.dag_id == dag_id, m.dag_id.like(dag_id + ".%"))
            count += session.query(m).filter(cond).delete(
                synchronize_session='fetch')

    if dag.is_subdag:
        p, c = dag_id.rsplit(".", 1)
        for m in models.DagRun, models.TaskFail, models.TaskInstance:
            count += session.query(m).filter(m.dag_id == p,
                                             m.task_id == c).delete()

    session.commit()

    return count
Exemple #5
0
def trigger_dag(
    dag_id: str,
    run_id: Optional[str] = None,
    conf: Optional[Union[dict, str]] = None,
    execution_date: Optional[datetime] = None,
    replace_microseconds: bool = True,
) -> Optional[DagRun]:
    """Triggers execution of DAG specified by dag_id

    :param dag_id: DAG ID
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: first dag run triggered - even if more than one Dag Runs were triggered or None
    """
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))
    dagbag = DagBag(dag_folder=dag_model.fileloc)
    dag_run = DagRun()
    triggers = _trigger_dag(
        dag_id=dag_id,
        dag_run=dag_run,
        dag_bag=dagbag,
        run_id=run_id,
        conf=conf,
        execution_date=execution_date,
        replace_microseconds=replace_microseconds,
    )

    return triggers[0] if triggers else None
def get_task_instance(dag_id, task_id, execution_date):
    """Return the task object identified by the given dag_id and task_id."""

    dagbag = DagBag()

    # Check DAG exists.
    if dag_id not in dagbag.dags:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)

    # Get DAG object and check Task Exists
    dag = dagbag.get_dag(dag_id)
    if not dag.has_task(task_id):
        error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
        raise TaskNotFound(error_message)

    # Get DagRun object and check that it exists
    dagrun = dag.get_dagrun(execution_date=execution_date)
    if not dagrun:
        error_message = ('Dag Run for date {} not found in dag {}'
                         .format(execution_date, dag_id))
        raise DagRunNotFound(error_message)

    # Get task instance object and check that it exists
    task_instance = dagrun.get_task_instance(task_id)
    if not task_instance:
        error_message = ('Task {} instance for date {} not found'
                         .format(task_id, execution_date))
        raise TaskInstanceNotFound(error_message)

    return task_instance
Exemple #7
0
    def execute(self, context: Dict):
        if isinstance(self.execution_date, datetime.datetime):
            execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            execution_date = timezone.parse(self.execution_date)
            self.execution_date = execution_date
        else:
            execution_date = timezone.utcnow()

        run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date)
        try:
            # Ignore MyPy type for self.execution_date
            # because it doesn't pick up the timezone.parse() for strings
            dag_run = trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=self.execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True)

                dag = dag_bag.get_dag(self.trigger_dag_id)

                dag.clear(start_date=self.execution_date, end_date=self.execution_date)

                dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0]
            else:
                raise e

        if self.wait_for_completion:
            # wait for dag to complete
            while True:
                self.log.info(
                    'Waiting for %s on %s to become allowed state %s ...',
                    self.trigger_dag_id,
                    dag_run.execution_date,
                    self.allowed_states,
                )
                time.sleep(self.poke_interval)

                dag_run.refresh_from_db()
                state = dag_run.state
                if state in self.failed_states:
                    raise AirflowException(f"{self.trigger_dag_id} failed with failed states {state}")
                if state in self.allowed_states:
                    self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state)
                    return
Exemple #8
0
def _trigger_dag(
        dag_id,
        dag_bag,
        dag_run,
        run_id,
        conf,
        execution_date,
        replace_microseconds,
):
    if dag_id not in dag_bag.dags:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    # 根据dag_id获得的dag对象
    dag = dag_bag.get_dag(dag_id)

    # 获得调度时间
    if not execution_date:
        execution_date = datetime.datetime.now()

    # 验证调度时间必须存在时区信息
    assert isinstance(execution_date, datetime.datetime)

    if replace_microseconds:
        execution_date = execution_date.replace(microsecond=0)

    # 获得dag实例运行ID,默认调度时间与run_id关联
    # 还有一种情况是,同一个调度时间有多个run_id
    if not run_id:
        run_id = "manual__{0}".format(execution_date.isoformat())

    # 判断dag实例是否存在,(dag_id, run_id)可以确认唯一性
    dr = dag_run.find(dag_id=dag_id, run_id=run_id)
    if dr:
        raise DagRunAlreadyExists("Run id {} already exists for dag id {}".format(
            run_id,
            dag_id
        ))

    # 获得dag实例参数配置
    run_conf = None
    if conf:
        run_conf = json.loads(conf)

    triggers = list()
    dags_to_trigger = list()
    dags_to_trigger.append(dag)
    while dags_to_trigger:
        dag = dags_to_trigger.pop()
        trigger = dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.RUNNING,
            conf=run_conf,
            external_trigger=True,
        )
        triggers.append(trigger)
        if dag.subdags:
            dags_to_trigger.extend(dag.subdags)
    return triggers
Exemple #9
0
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel:
    """Checks that DAG exists and in case it is specified that Task exist"""
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))

    dagbag = DagBag(dag_folder=dag_model.fileloc,
                    store_serialized_dags=conf.getboolean(
                        'core', 'store_serialized_dags'))
    dag = dagbag.get_dag(dag_id)  # prefetch dag if it is stored serialized
    if dag_id not in dagbag.dags:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)
    if task_id and not dag.has_task(task_id):
        error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
        raise TaskNotFound(error_message)
    return dag
Exemple #10
0
def delete_dag(dag_id: str,
               keep_records_in_log: bool = True,
               session=None) -> int:
    """
    :param dag_id: the dag_id of the DAG to delete
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :param session: session used
    :return count of deleted dags
    """
    log.info("Deleting DAG: %s", dag_id)
    running_tis = (session.query(models.TaskInstance.state).filter(
        models.TaskInstance.dag_id == dag_id).filter(
            models.TaskInstance.state == State.RUNNING).first())
    if running_tis:
        raise AirflowException("TaskInstances still running")
    dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound(f"Dag id {dag_id} not found")

    # deleting a DAG should also delete all of its subdags
    dags_to_delete_query = session.query(DagModel.dag_id).filter(
        or_(
            DagModel.dag_id == dag_id,
            and_(DagModel.dag_id.like(f"{dag_id}.%"), DagModel.is_subdag),
        ))
    dags_to_delete = [dag_id for dag_id, in dags_to_delete_query]

    # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval.
    # There may be a lag, so explicitly removes serialized DAG here.
    if SerializedDagModel.has_dag(dag_id=dag_id, session=session):
        SerializedDagModel.remove_dag(dag_id=dag_id, session=session)

    count = 0

    for model in get_sqla_model_classes():
        if hasattr(model, "dag_id"):
            if keep_records_in_log and model.__name__ == 'Log':
                continue
            count += (session.query(model).filter(
                model.dag_id.in_(dags_to_delete)).delete(
                    synchronize_session='fetch'))
    if dag.is_subdag:
        parent_dag_id, task_id = dag_id.rsplit(".", 1)
        for model in TaskFail, models.TaskInstance:
            count += (session.query(model).filter(
                model.dag_id == parent_dag_id,
                model.task_id == task_id).delete())

    # Delete entries in Import Errors table for a deleted DAG
    # This handles the case when the dag_id is changed in the file
    session.query(models.ImportError).filter(
        models.ImportError.filename == dag.fileloc).delete(
            synchronize_session='fetch')

    return count
Exemple #11
0
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel:
    """Checks that DAG exists and in case it is specified that Task exist"""
    dagbag = DagBag()
    if dag_id not in dagbag.dags:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)
    dag = dagbag.get_dag(dag_id)
    if task_id and not dag.has_task(task_id):
        error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
        raise TaskNotFound(error_message)
    return dag
Exemple #12
0
def _trigger_dag(
    dag_id,
    dag_bag,
    dag_run,
    run_id,
    conf,
    execution_date,
    replace_microseconds,
):
    if dag_id not in dag_bag.dags:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    dag = dag_bag.get_dag(dag_id)

    if not execution_date:
        execution_date = timezone.utcnow()

    assert timezone.is_localized(execution_date)

    if replace_microseconds:
        execution_date = execution_date.replace(microsecond=0)

    if not run_id:
        run_id = "manual__{0}".format(execution_date.isoformat())

    dr = dag_run.find(dag_id=dag_id, run_id=run_id)
    if dr:
        raise DagRunAlreadyExists(
            "Run id {} already exists for dag id {}".format(run_id, dag_id))

    run_conf = None
    if conf:
        if type(conf) is dict:
            run_conf = conf
        else:
            run_conf = json.loads(conf)

    triggers = list()
    dags_to_trigger = list()
    dags_to_trigger.append(dag)
    while dags_to_trigger:
        dag = dags_to_trigger.pop()
        trigger = dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.RUNNING,
            conf=run_conf,
            external_trigger=True,
        )
        triggers.append(trigger)
        if dag.subdags:
            dags_to_trigger.extend(dag.subdags)
    return triggers
Exemple #13
0
def delete_dag(session, model, dag_id):
    """Deletes the DAG based on dag_id"""
    dag = session.query(model).filter(model.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))
    for module in Base._decl_class_registry.values():
        if hasattr(module, "dag_id"):
            if module.__name__ == "DagModel" or module.__name__ == "XCom":
                cond = or_(module.dag_id == dag_id,
                           module.dag_id.like(dag_id + ".%"))
                session.query(module).filter(cond).delete(
                    synchronize_session='fetch')
            continue
Exemple #14
0
def delete_dag(dag_id: str,
               keep_records_in_log: bool = True,
               session=None) -> int:
    """
    :param dag_id: the dag_id of the DAG to delete
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :param session: session used
    :return count of deleted dags
    """
    logger = LoggingMixin()
    logger.log.info("Deleting DAG: %s", dag_id)
    dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval.
    # There may be a lag, so explicitly removes serialized DAG here.
    if STORE_SERIALIZED_DAGS and SerializedDagModel.has_dag(dag_id=dag_id,
                                                            session=session):
        SerializedDagModel.remove_dag(dag_id=dag_id, session=session)

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for model in models.base.Base._decl_class_registry.values():  # pylint: disable=protected-access
        if hasattr(model, "dag_id"):
            if keep_records_in_log and model.__name__ == 'Log':
                continue
            cond = or_(model.dag_id == dag_id,
                       model.dag_id.like(dag_id + ".%"))
            count += session.query(model).filter(cond).delete(
                synchronize_session='fetch')
    if dag.is_subdag:
        parent_dag_id, task_id = dag_id.rsplit(".", 1)
        for model in models.DagRun, TaskFail, models.TaskInstance:
            count += session.query(model).filter(
                model.dag_id == parent_dag_id,
                model.task_id == task_id).delete()

    # Delete entries in Import Errors table for a deleted DAG
    # This handles the case when the dag_id is changed in the file
    session.query(models.ImportError).filter(
        models.ImportError.filename == dag.fileloc).delete(
            synchronize_session='fetch')

    return count
Exemple #15
0
def get_task(dag_id, task_id):
    """Return the task object identified by the given dag_id and task_id."""
    dagbag = DagBag()

    # Check DAG exists.
    if dag_id not in dagbag.dags:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)

    # Get DAG object and check Task Exists
    dag = dagbag.get_dag(dag_id)
    if not dag.has_task(task_id):
        error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
        raise TaskNotFound(error_message)

    # Return the task.
    return dag.get_task(task_id)
Exemple #16
0
    def execute(self, context: Dict):
        if isinstance(self.execution_date, datetime.datetime):
            execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            execution_date = timezone.parse(self.execution_date)
            self.execution_date = execution_date
        else:
            execution_date = timezone.utcnow()

        run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date)
        try:
            # Ignore MyPy type for self.execution_date
            # because it doesn't pick up the timezone.parse() for strings
            trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=self.execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id,
                              self.execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(
                        f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(
                    dag_folder=dag_model.fileloc,
                    store_serialized_dags=settings.STORE_SERIALIZED_DAGS)

                dag = dag_bag.get_dag(self.trigger_dag_id)

                dag.clear(start_date=self.execution_date,
                          end_date=self.execution_date)
            else:
                raise e
Exemple #17
0
def get_code(dag_id):
    """Return python code of a given dag_id."""
    session = settings.Session()
    DM = models.DagModel
    dag = session.query(DM).filter(DM.dag_id == dag_id).first()
    session.close()
    # Check DAG exists.
    if dag is None:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)

    try:
        with wwwutils.open_maybe_zipped(dag.fileloc, 'r') as f:
            code = f.read()
            return code
    except IOError as e:
        error_message = "Error {} while reading Dag id {} Code".format(
            str(e), dag_id)
        raise AirflowException(error_message)
Exemple #18
0
def delete_dag(dag_id, keep_records_in_log=True):
    """
    :param dag_id: the dag_id of the DAG to delete
    :type dag_id: str
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :type keep_records_in_log: bool
    """
    session = settings.Session()

    DM = models.DagModel
    dag = session.query(DM).filter(DM.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    if dag.fileloc and not os.path.exists(dag.fileloc):
        raise DagFileExists("Dag id {} is still in DagBag. "
                            "Remove the DAG file first: {}".format(
                                dag_id, dag.fileloc))

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for m in models.base.Base._decl_class_registry.values():
        if hasattr(m, "dag_id"):
            if keep_records_in_log and m.__name__ == 'Log':
                continue
            cond = or_(m.dag_id == dag_id, m.dag_id.like(dag_id + ".%"))
            count += session.query(m).filter(cond).delete(
                synchronize_session='fetch')

    if dag.is_subdag:
        p, c = dag_id.rsplit(".", 1)
        for m in models.DagRun, models.TaskFail, models.TaskInstance:
            count += session.query(m).filter(m.dag_id == p,
                                             m.task_id == c).delete()

    session.commit()

    return count
Exemple #19
0
    def _trigger_dag(self, dag_id: str, dag_bag: DagBag, dag_run: DagRun):
        dag = dag_bag.get_dag(
            dag_id)  # prefetch dag if it is stored serialized

        if dag_id not in dag_bag.dags:
            raise DagNotFound(f"Dag id {dag_id} not found")

        execution_date = timezone.utcnow()

        run_id = f"rb_status_manual__{execution_date.isoformat()}"
        dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id)
        if dag_run_id:
            raise DagRunAlreadyExists(
                f"Run id {run_id} already exists for dag id {dag_id}")

        dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.RUNNING,
            external_trigger=True,
        )
Exemple #20
0
def get_dag_run_state(dag_id, execution_date):
    """Return the task object identified by the given dag_id and task_id."""

    dagbag = DagBag()

    # Check DAG exists.
    if dag_id not in dagbag.dags:
        error_message = "Dag id {} not found".format(dag_id)
        raise DagNotFound(error_message)

    # Get DAG object and check Task Exists
    dag = dagbag.get_dag(dag_id)

    # Get DagRun object and check that it exists
    dagrun = dag.get_dagrun(execution_date=execution_date)
    if not dagrun:
        error_message = ('Dag Run for date {} not found in dag {}'.format(
            execution_date, dag_id))
        raise DagRunNotFound(error_message)

    return {'state': dagrun.get_state()}
Exemple #21
0
def trigger_dag(
        dag_id,  # type: str
        run_id=None,  # type: Optional[str]
        conf=None,  # type: Optional[Union[dict, str]]
        execution_date=None,  # type: Optional[datetime]
        replace_microseconds=True,  # type: bool
):
    """Triggers execution of DAG specified by dag_id

    :param dag_id: DAG ID
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: first dag run triggered - even if more than one Dag Runs were triggered or None
    """
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))

    def read_store_serialized_dags():
        from airflow.configuration import conf
        return conf.getboolean('core', 'store_serialized_dags')
    dagbag = DagBag(
        dag_folder=dag_model.fileloc,
        store_serialized_dags=read_store_serialized_dags()
    )
    dag_run = DagRun()
    triggers = _trigger_dag(
        dag_id=dag_id,
        dag_run=dag_run,
        dag_bag=dagbag,
        run_id=run_id,
        conf=conf,
        execution_date=execution_date,
        replace_microseconds=replace_microseconds,
    )

    return triggers[0] if triggers else None
Exemple #22
0
def delete_dag(dag_id, keep_records_in_log=True, session=None):
    """
    :param dag_id: the dag_id of the DAG to delete
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :param session: session used
    :return count of deleted dags
    """
    dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for model in models.base.Base._decl_class_registry.values():  # pylint: disable=protected-access
        if hasattr(model, "dag_id"):
            if keep_records_in_log and model.__name__ == 'Log':
                continue
            cond = or_(model.dag_id == dag_id,
                       model.dag_id.like(dag_id + ".%"))
            count += session.query(model).filter(cond).delete(
                synchronize_session='fetch')
    if dag.is_subdag:
        parent_dag_id, task_id = dag_id.rsplit(".", 1)
        for model in models.DagRun, TaskFail, models.TaskInstance:
            count += session.query(model).filter(
                model.dag_id == parent_dag_id,
                model.task_id == task_id).delete()

    # Delete entries in Import Errors table for a deleted DAG
    # This handles the case when the dag_id is changed in the file
    session.query(models.ImportError).filter(
        models.ImportError.filename == dag.fileloc).delete(
            synchronize_session='fetch')

    return count
Exemple #23
0
    def trigger_dag(self):
        """
        Triggers execution of DAG interpreted from the report's dag_id

        _trigger_dag iterates through the class registry and looks
        For any model that has dag_id as an attribute and deletes
        all references to the specific dag_id

        :param dag_id: DAG ID
        :param dagbag: dagbag
        :param dagrun: empty dag run to be created
        """
        dag_model = DagModel.get_current(self.dag_id)
        if dag_model is None:
            raise DagNotFound(f"Dag id {self.dag_id} not found in DagModel")

        dagbag = DagBag(
            dag_folder=dag_model.fileloc,
            store_serialized_dags=conf.getboolean("core",
                                                  "store_serialized_dags"),
        )
        dag_run = DagRun()
        self._trigger_dag(dag_id=self.dag_id, dag_bag=dagbag, dag_run=dag_run)
def trigger_dag(
    dag_id,
    run_id=None,
    conf=None,
    execution_date=None,
    replace_microseconds=True,
):
    dag_model = DagModel.get_current(dag_id)
    if dag_model is None:
        raise DagNotFound("Dag id {} not found in DagModel".format(dag_id))
    dagbag = DagBag(dag_folder=dag_model.fileloc)
    dag_run = DagRun()
    triggers = _trigger_dag(
        dag_id=dag_id,
        dag_run=dag_run,
        dag_bag=dagbag,
        run_id=run_id,
        conf=conf,
        execution_date=execution_date,
        replace_microseconds=replace_microseconds,
    )

    return triggers[0] if triggers else None
Exemple #25
0
    def execute(self, context: Context):
        if isinstance(self.execution_date, datetime.datetime):
            parsed_execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            parsed_execution_date = timezone.parse(self.execution_date)
        else:
            parsed_execution_date = timezone.utcnow()

        if self.trigger_run_id:
            run_id = self.trigger_run_id
        else:
            run_id = DagRun.generate_run_id(DagRunType.MANUAL,
                                            parsed_execution_date)
        try:
            dag_run = trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=parsed_execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id,
                              parsed_execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(
                        f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(dag_folder=dag_model.fileloc,
                                 read_dags_from_db=True)
                dag = dag_bag.get_dag(self.trigger_dag_id)
                dag.clear(start_date=parsed_execution_date,
                          end_date=parsed_execution_date)
                dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0]
            else:
                raise e
        if dag_run is None:
            raise RuntimeError("The dag_run should be set here!")
        # Store the execution date from the dag run (either created or found above) to
        # be used when creating the extra link on the webserver.
        ti = context['task_instance']
        ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO,
                     value=dag_run.execution_date.isoformat())
        ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id)

        if self.wait_for_completion:
            # wait for dag to complete
            while True:
                self.log.info(
                    'Waiting for %s on %s to become allowed state %s ...',
                    self.trigger_dag_id,
                    dag_run.execution_date,
                    self.allowed_states,
                )
                time.sleep(self.poke_interval)

                dag_run.refresh_from_db()
                state = dag_run.state
                if state in self.failed_states:
                    raise AirflowException(
                        f"{self.trigger_dag_id} failed with failed states {state}"
                    )
                if state in self.allowed_states:
                    self.log.info("%s finished with allowed state %s",
                                  self.trigger_dag_id, state)
                    return
Exemple #26
0
def _trigger_dag(
    dag_id: str,
    dag_bag: DagBag,
    dag_run: DagModel,
    run_id: Optional[str],
    conf: Optional[Union[dict, str]],
    execution_date: Optional[datetime],
    replace_microseconds: bool,
) -> List[DagRun]:  # pylint: disable=too-many-arguments
    """Triggers DAG run.

    :param dag_id: DAG ID
    :param dag_bag: DAG Bag model
    :param dag_run: DAG Run model
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: list of triggered dags
    """
    dag = dag_bag.get_dag(dag_id)  # prefetch dag if it is stored serialized

    if dag_id not in dag_bag.dags:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    execution_date = execution_date if execution_date else timezone.utcnow()

    if not timezone.is_localized(execution_date):
        raise ValueError("The execution_date should be localized")

    if replace_microseconds:
        execution_date = execution_date.replace(microsecond=0)

    if dag.default_args and 'start_date' in dag.default_args:
        min_dag_start_date = dag.default_args["start_date"]
        if min_dag_start_date and execution_date < min_dag_start_date:
            raise ValueError(
                "The execution_date [{0}] should be >= start_date [{1}] from DAG's default_args"
                .format(execution_date.isoformat(),
                        min_dag_start_date.isoformat()))

    run_id = run_id or DagRun.generate_run_id(DagRunType.MANUAL,
                                              execution_date)
    dag_run = dag_run.find(dag_id=dag_id, run_id=run_id)

    if dag_run:
        raise DagRunAlreadyExists(
            f"Run id {dag_run.run_id} already exists for dag id {dag_id}")

    run_conf = None
    if conf:
        run_conf = conf if isinstance(conf, dict) else json.loads(conf)

    triggers = []
    dags_to_trigger = [dag] + dag.subdags
    for _dag in dags_to_trigger:
        trigger = _dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.RUNNING,
            conf=run_conf,
            external_trigger=True,
        )

        triggers.append(trigger)
    return triggers
Exemple #27
0
def _trigger_dag(
    dag_id: str,
    dag_bag: DagBag,
    dag_run: DagModel,
    run_id: Optional[str],
    conf: Optional[Union[dict, str]],
    execution_date: Optional[datetime],
    replace_microseconds: bool,
) -> List[DagRun]:  # pylint: disable=too-many-arguments
    """Triggers DAG run.

    :param dag_id: DAG ID
    :param dag_bag: DAG Bag model
    :param dag_run: DAG Run model
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: list of triggered dags
    """
    if dag_id not in dag_bag.dags:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    dag = dag_bag.get_dag(dag_id)

    execution_date = execution_date if execution_date else timezone.utcnow()

    if not timezone.is_localized(execution_date):
        raise ValueError("The execution_date should be localized")

    if replace_microseconds:
        execution_date = execution_date.replace(microsecond=0)

    if not run_id:
        run_id = "manual__{0}".format(execution_date.isoformat())

    dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id)
    if dag_run_id:
        raise DagRunAlreadyExists(
            "Run id {} already exists for dag id {}".format(run_id, dag_id))

    run_conf = None
    if conf:
        if isinstance(conf, dict):
            run_conf = conf
        else:
            run_conf = json.loads(conf)

    triggers = []
    dags_to_trigger = []
    dags_to_trigger.append(dag)
    while dags_to_trigger:
        dag = dags_to_trigger.pop()
        trigger = dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.RUNNING,
            conf=run_conf,
            external_trigger=True,
        )
        triggers.append(trigger)
        if dag.subdags:
            dags_to_trigger.extend(dag.subdags)
    return triggers
Exemple #28
0
def _trigger_dag(
    dag_id: str,
    dag_bag: DagBag,
    run_id: Optional[str] = None,
    conf: Optional[Union[dict, str]] = None,
    execution_date: Optional[datetime] = None,
    replace_microseconds: bool = True,
) -> List[Optional[DagRun]]:
    """Triggers DAG run.

    :param dag_id: DAG ID
    :param dag_bag: DAG Bag model
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: list of triggered dags
    """
    dag = dag_bag.get_dag(dag_id)  # prefetch dag if it is stored serialized

    if dag_id not in dag_bag.dags:
        raise DagNotFound(f"Dag id {dag_id} not found")

    execution_date = execution_date if execution_date else timezone.utcnow()

    if not timezone.is_localized(execution_date):
        raise ValueError("The execution_date should be localized")

    if replace_microseconds:
        execution_date = execution_date.replace(microsecond=0)

    if dag.default_args and 'start_date' in dag.default_args:
        min_dag_start_date = dag.default_args["start_date"]
        if min_dag_start_date and execution_date < min_dag_start_date:
            raise ValueError(
                f"The execution_date [{execution_date.isoformat()}] should be >= start_date "
                f"[{min_dag_start_date.isoformat()}] from DAG's default_args")

    run_id = run_id or DagRun.generate_run_id(DagRunType.MANUAL,
                                              execution_date)
    dag_run = DagRun.find_duplicate(dag_id=dag_id,
                                    execution_date=execution_date,
                                    run_id=run_id)

    if dag_run:
        raise DagRunAlreadyExists(
            f"A Dag Run already exists for dag id {dag_id} at {execution_date} with run id {run_id}"
        )

    run_conf = None
    if conf:
        run_conf = conf if isinstance(conf, dict) else json.loads(conf)

    dag_runs = []
    dags_to_run = [dag] + dag.subdags
    for _dag in dags_to_run:
        dag_run = _dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.QUEUED,
            conf=run_conf,
            external_trigger=True,
            dag_hash=dag_bag.dags_hash.get(dag_id),
        )
        dag_runs.append(dag_run)

    return dag_runs
def _trigger_dag(
        dag_id,  # type: str
        dag_bag,  # type: DagBag
        dag_run,  # type: DagModel
        run_id,  # type: Optional[str]
        conf,  # type: Optional[Union[dict, str]]
        execution_date,  # type: Optional[datetime]
        replace_microseconds,  # type: bool
):  # pylint: disable=too-many-arguments
    # type: (...) -> List[DagRun]
    """Triggers DAG run.

    :param dag_id: DAG ID
    :param dag_bag: DAG Bag model
    :param dag_run: DAG Run model
    :param run_id: ID of the dag_run
    :param conf: configuration
    :param execution_date: date of execution
    :param replace_microseconds: whether microseconds should be zeroed
    :return: list of triggered dags
    """
    dag = dag_bag.get_dag(dag_id)  # prefetch dag if it is stored serialized

    if dag_id not in dag_bag.dags:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    execution_date = execution_date if execution_date else timezone.utcnow()

    assert timezone.is_localized(execution_date)

    if replace_microseconds:
        execution_date = execution_date.replace(microsecond=0)

    if dag.default_args and 'start_date' in dag.default_args:
        min_dag_start_date = dag.default_args["start_date"]
        if min_dag_start_date and execution_date < min_dag_start_date:
            raise ValueError(
                "The execution_date [{0}] should be >= start_date [{1}] from DAG's default_args"
                .format(execution_date.isoformat(),
                        min_dag_start_date.isoformat()))

    if not run_id:
        run_id = "manual__{0}".format(execution_date.isoformat())

    dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id)
    if dag_run_id:
        raise DagRunAlreadyExists(
            "Run id {} already exists for dag id {}".format(run_id, dag_id))

    run_conf = None
    if conf:
        if isinstance(conf, dict):
            run_conf = conf
        else:
            run_conf = json.loads(conf)

    triggers = list()
    dags_to_trigger = list()
    dags_to_trigger.append(dag)
    while dags_to_trigger:
        dag = dags_to_trigger.pop()
        trigger = dag.create_dagrun(
            run_id=run_id,
            execution_date=execution_date,
            state=State.RUNNING,
            conf=run_conf,
            external_trigger=True,
        )
        triggers.append(trigger)
        if dag.subdags:
            dags_to_trigger.extend(dag.subdags)
    return triggers