Beispiel #1
0
    def get_many(
        cls,
        execution_date: Optional[datetime.datetime] = None,
        key: Optional[str] = None,
        task_ids: Optional[Union[str, Iterable[str]]] = None,
        dag_ids: Optional[Union[str, Iterable[str]]] = None,
        map_indexes: Union[int, Iterable[int], None] = None,
        include_prior_dates: bool = False,
        limit: Optional[int] = None,
        session: Session = NEW_SESSION,
        *,
        run_id: Optional[str] = None,
    ) -> Query:
        """:sphinx-autoapi-skip:"""
        from airflow.models.dagrun import DagRun

        if not exactly_one(execution_date is not None, run_id is not None):
            raise ValueError(
                f"Exactly one of run_id or execution_date must be passed. "
                f"Passed execution_date={execution_date}, run_id={run_id}"
            )
        if execution_date is not None:
            message = "Passing 'execution_date' to 'XCom.get_many()' is deprecated. Use 'run_id' instead."
            warnings.warn(message, PendingDeprecationWarning, stacklevel=3)

        query = session.query(cls).join(cls.dag_run)

        if key:
            query = query.filter(cls.key == key)

        if is_container(task_ids):
            query = query.filter(cls.task_id.in_(task_ids))
        elif task_ids is not None:
            query = query.filter(cls.task_id == task_ids)

        if is_container(dag_ids):
            query = query.filter(cls.dag_id.in_(dag_ids))
        elif dag_ids is not None:
            query = query.filter(cls.dag_id == dag_ids)

        if is_container(map_indexes):
            query = query.filter(cls.map_index.in_(map_indexes))
        elif map_indexes is not None:
            query = query.filter(cls.map_index == map_indexes)

        if include_prior_dates:
            if execution_date is not None:
                query = query.filter(DagRun.execution_date <= execution_date)
            else:
                dr = session.query(DagRun.execution_date).filter(DagRun.run_id == run_id).subquery()
                query = query.filter(cls.execution_date <= dr.c.execution_date)
        elif execution_date is not None:
            query = query.filter(DagRun.execution_date == execution_date)
        else:
            query = query.filter(cls.run_id == run_id)

        query = query.order_by(DagRun.execution_date.desc(), cls.timestamp.desc())
        if limit:
            return query.limit(limit)
        return query
Beispiel #2
0
    def find(
        cls,
        dag_id: Optional[Union[str, List[str]]] = None,
        run_id: Optional[Iterable[str]] = None,
        execution_date: Optional[Union[datetime, Iterable[datetime]]] = None,
        state: Optional[DagRunState] = None,
        external_trigger: Optional[bool] = None,
        no_backfills: bool = False,
        run_type: Optional[DagRunType] = None,
        session: Session = NEW_SESSION,
        execution_start_date: Optional[datetime] = None,
        execution_end_date: Optional[datetime] = None,
    ) -> List["DagRun"]:
        """
        Returns a set of dag runs for the given search criteria.

        :param dag_id: the dag_id or list of dag_id to find dag runs for
        :param run_id: defines the run id for this dag run
        :param run_type: type of DagRun
        :param execution_date: the execution date
        :param state: the state of the dag run
        :param external_trigger: whether this dag run is externally triggered
        :param no_backfills: return no backfills (True), return all (False).
            Defaults to False
        :param session: database session
        :param execution_start_date: dag run that was executed from this date
        :param execution_end_date: dag run that was executed until this date
        """
        qry = session.query(cls)
        dag_ids = [dag_id] if isinstance(dag_id, str) else dag_id
        if dag_ids:
            qry = qry.filter(cls.dag_id.in_(dag_ids))

        if is_container(run_id):
            qry = qry.filter(cls.run_id.in_(run_id))
        elif run_id is not None:
            qry = qry.filter(cls.run_id == run_id)
        if is_container(execution_date):
            qry = qry.filter(cls.execution_date.in_(execution_date))
        elif execution_date is not None:
            qry = qry.filter(cls.execution_date == execution_date)
        if execution_start_date and execution_end_date:
            qry = qry.filter(
                cls.execution_date.between(execution_start_date,
                                           execution_end_date))
        elif execution_start_date:
            qry = qry.filter(cls.execution_date >= execution_start_date)
        elif execution_end_date:
            qry = qry.filter(cls.execution_date <= execution_end_date)
        if state:
            qry = qry.filter(cls.state == state)
        if external_trigger is not None:
            qry = qry.filter(cls.external_trigger == external_trigger)
        if run_type:
            qry = qry.filter(cls.run_type == run_type)
        if no_backfills:
            qry = qry.filter(cls.run_type != DagRunType.BACKFILL_JOB)

        return qry.order_by(cls.execution_date).all()
    def test_is_container(self):
        self.assertFalse(helpers.is_container("a string is not a container"))
        self.assertTrue(helpers.is_container(["a", "list", "is", "a", "container"]))

        self.assertTrue(helpers.is_container(['test_list']))
        self.assertFalse(helpers.is_container('test_str_not_iterable'))
        # Pass an object that is not iter nor a string.
        self.assertFalse(helpers.is_container(10))
Beispiel #4
0
    def test_is_container(self):
        assert not helpers.is_container("a string is not a container")
        assert helpers.is_container(["a", "list", "is", "a", "container"])

        assert helpers.is_container(['test_list'])
        assert not helpers.is_container('test_str_not_iterable')
        # Pass an object that is not iter nor a string.
        assert not helpers.is_container(10)
Beispiel #5
0
    def get_many(
        cls,
        execution_date: Optional[pendulum.DateTime] = None,
        key: Optional[str] = None,
        task_ids: Optional[Union[str, Iterable[str]]] = None,
        dag_ids: Optional[Union[str, Iterable[str]]] = None,
        include_prior_dates: bool = False,
        limit: Optional[int] = None,
        session: Session = NEW_SESSION,
        *,
        run_id: Optional[str] = None,
    ) -> Query:
        """:sphinx-autoapi-skip:"""
        if not (execution_date is None) ^ (run_id is None):
            raise ValueError(
                "Exactly one of execution_date or run_id must be passed")

        filters = []

        if key:
            filters.append(cls.key == key)

        if task_ids:
            if is_container(task_ids):
                filters.append(cls.task_id.in_(task_ids))
            else:
                filters.append(cls.task_id == task_ids)

        if dag_ids:
            if is_container(dag_ids):
                filters.append(cls.dag_id.in_(dag_ids))
            else:
                filters.append(cls.dag_id == dag_ids)

        if include_prior_dates:
            if execution_date is None:
                # In theory it would be possible to build a subquery that joins to DagRun and then gets the
                # execution dates. Lets do that for 2.3
                raise ValueError(
                    "Using include_prior_dates needs an execution_date to be passed"
                )
            filters.append(cls.execution_date <= execution_date)
        elif execution_date is not None:
            filters.append(cls.execution_date == execution_date)

        query = session.query(cls).filter(*filters)

        if run_id:
            from airflow.models.dagrun import DagRun

            query = query.join(cls.dag_run).filter(DagRun.run_id == run_id)

        query = query.order_by(cls.execution_date.desc(), cls.timestamp.desc())

        if limit:
            return query.limit(limit)
        else:
            return query
Beispiel #6
0
    def get_many(
        cls,
        execution_date: Optional[pendulum.DateTime] = None,
        key: Optional[str] = None,
        task_ids: Optional[Union[str, Iterable[str]]] = None,
        dag_ids: Optional[Union[str, Iterable[str]]] = None,
        include_prior_dates: bool = False,
        limit: Optional[int] = None,
        session: Session = NEW_SESSION,
        *,
        run_id: Optional[str] = None,
    ) -> Query:
        """:sphinx-autoapi-skip:"""
        from airflow.models.dagrun import DagRun

        if not exactly_one(execution_date is not None, run_id is not None):
            raise ValueError(
                "Exactly one of execution_date or run_id must be passed")
        if execution_date is not None:
            message = "Passing 'execution_date' to 'XCom.get_many()' is deprecated. Use 'run_id' instead."
            warnings.warn(message, PendingDeprecationWarning, stacklevel=3)

        query = session.query(cls)

        if key:
            query = query.filter(cls.key == key)

        if is_container(task_ids):
            query = query.filter(cls.task_id.in_(task_ids))
        elif task_ids is not None:
            query = query.filter(cls.task_id == task_ids)

        if is_container(dag_ids):
            query = query.filter(cls.dag_id.in_(dag_ids))
        elif dag_ids is not None:
            query = query.filter(cls.dag_id == dag_ids)

        if include_prior_dates:
            if execution_date is not None:
                query = query.filter(cls.execution_date <= execution_date)
            else:
                # This returns an empty query result for IN_MEMORY_DAGRUN_ID,
                # but that is impossible to implement. Sorry?
                dr = session.query(DagRun.execution_date).filter(
                    DagRun.run_id == run_id).subquery()
                query = query.filter(cls.execution_date <= dr.c.execution_date)
        elif execution_date is not None:
            query = query.filter(cls.execution_date == execution_date)
        elif run_id == IN_MEMORY_DAGRUN_ID:
            query = query.filter(cls.execution_date == _DISTANT_FUTURE)
        else:
            query = query.join(cls.dag_run).filter(DagRun.run_id == run_id)

        query = query.order_by(cls.execution_date.desc(), cls.timestamp.desc())
        if limit:
            return query.limit(limit)
        return query
Beispiel #7
0
def ensure_xcomarg_return_value(arg: Any) -> None:
    from airflow.models.xcom_arg import XCOM_RETURN_KEY, XComArg

    if isinstance(arg, XComArg):
        if arg.key != XCOM_RETURN_KEY:
            raise ValueError(f"cannot map over XCom with custom key {arg.key!r} from {arg.operator}")
    elif not is_container(arg):
        return
    elif isinstance(arg, collections.abc.Mapping):
        for v in arg.values():
            ensure_xcomarg_return_value(v)
    elif isinstance(arg, collections.abc.Iterable):
        for v in arg:
            ensure_xcomarg_return_value(v)
    def unknown_to_array(value):
        if value is None or len(value) == 0:
            return None

        if isinstance(value, basestring):
            if value[0] == '[' and value[-1] == ']':
                return ast.literal_eval(value)
            else:
                return [value]
        elif is_container(value):
            return value
        else:
            raise ValueError(
                'input was not array or string or string representing an array'
            )
Beispiel #9
0
def try_xcom_pull(context,
                  task_ids,
                  dag_id=None,
                  key=XCOM_RETURN_KEY,
                  include_prior_dates=False):
    """
    Pull XComs that optionally meet certain criteria.

    The default value for `key` limits the search to XComs
    that were returned by other tasks (as opposed to those that were pushed
    manually). To remove this filter, pass key=None (or any desired value).

    If a single task_id string is provided, the result is a tuple (True, val)
    where val is the value of the most recent matching XCom from that task_id.
    If multiple task_ids are provided, a tuple of matching values is returned.
    Tuple (False, None) is returned whenever no matches are found.

    :param key: A key for the XCom. If provided, only XComs with matching
        keys will be returned. The default key is 'return_value', also
        available as a constant XCOM_RETURN_KEY. This key is automatically
        given to XComs returned by tasks (as opposed to being pushed
        manually). To remove the filter, pass key=None.
    :type key: string
    :param task_ids: Only XComs from tasks with matching ids will be
        pulled. Can pass None to remove the filter.
    :type task_ids: string or iterable of strings (representing task_ids)
    :param dag_id: If provided, only pulls XComs from this DAG.
        If None (default), the DAG of the calling task is used.
    :type dag_id: string
    :param include_prior_dates: If False, only XComs from the current
        execution_date are returned. If True, XComs from previous dates
        are returned as well.
    :type include_prior_dates: bool
    """

    if dag_id is None:
        dag_id = context['ti'].dag_id

    pull_fn = functools.partial(try_get_one,
                                execution_date=context['ti'].execution_date,
                                key=key,
                                dag_id=dag_id,
                                include_prior_dates=include_prior_dates)

    if is_container(task_ids):
        return tuple(pull_fn(task_id=t) for t in task_ids)
    else:
        return pull_fn(task_id=task_ids)
Beispiel #10
0
 def test_is_container(self):
     self.assertFalse(helpers.is_container("a string is not a container"))
     self.assertTrue(
         helpers.is_container(["a", "list", "is", "a", "container"]))
 def test_is_container(self):
     self.assertTrue(helpers.is_container(['test_list']))
     self.assertFalse(helpers.is_container('test_str_not_iterable'))
     # Pass an object that is not iter nor a string.
     self.assertFalse(helpers.is_container(10))
 def test_is_container(self):
     self.assertFalse(helpers.is_container("a string is not a container"))
     self.assertTrue(helpers.is_container(["a", "list", "is", "a", "container"]))
Beispiel #13
0
    def get_many(cls,
                 execution_date: pendulum.DateTime,
                 key: Optional[str] = None,
                 task_ids: Optional[Union[str, Iterable[str]]] = None,
                 dag_ids: Optional[Union[str, Iterable[str]]] = None,
                 include_prior_dates: bool = False,
                 limit: Optional[int] = None,
                 session: Session = None) -> Query:
        """
        Composes a query to get one or more values from the xcom table.

        :param execution_date: Execution date for the task
        :type execution_date: pendulum.datetime
        :param key: A key for the XCom. If provided, only XComs with matching
            keys will be returned. To remove the filter, pass key=None.
        :type key: str
        :param task_ids: Only XComs from tasks with matching ids will be
            pulled. Can pass None to remove the filter.
        :type task_ids: str or iterable of strings (representing task_ids)
        :param dag_ids: If provided, only pulls XComs from this DAG.
            If None (default), the DAG of the calling task is used.
        :type dag_ids: str
        :param include_prior_dates: If False, only XComs from the current
            execution_date are returned. If True, XComs from previous dates
            are returned as well.
        :type include_prior_dates: bool
        :param limit: If required, limit the number of returned objects.
            XCom objects can be quite big and you might want to limit the
            number of rows.
        :type limit: int
        :param session: database session
        :type session: sqlalchemy.orm.session.Session
        """
        filters = []

        if key:
            filters.append(cls.key == key)

        if task_ids:
            if is_container(task_ids):
                filters.append(cls.task_id.in_(task_ids))
            else:
                filters.append(cls.task_id == task_ids)

        if dag_ids:
            if is_container(dag_ids):
                filters.append(cls.dag_id.in_(dag_ids))
            else:
                filters.append(cls.dag_id == dag_ids)

        if include_prior_dates:
            filters.append(cls.execution_date <= execution_date)
        else:
            filters.append(cls.execution_date == execution_date)

        query = (session.query(cls).filter(and_(*filters)).order_by(
            cls.execution_date.desc(), cls.timestamp.desc()))

        if limit:
            return query.limit(limit)
        else:
            return query
Beispiel #14
0
    def get_many(
        cls,
        execution_date: Optional[pendulum.DateTime] = None,
        run_id: Optional[str] = None,
        key: Optional[str] = None,
        task_ids: Optional[Union[str, Iterable[str]]] = None,
        dag_ids: Optional[Union[str, Iterable[str]]] = None,
        include_prior_dates: bool = False,
        limit: Optional[int] = None,
        session: Session = None,
    ) -> Query:
        """
        Composes a query to get one or more values from the xcom table.

        ``run_id`` and ``execution_date`` are mutually exclusive.

        :param execution_date: Execution date for the task
        :type execution_date: pendulum.datetime
        :param run_id: Dag run id for the task
        :type run_id: str
        :param key: A key for the XCom. If provided, only XComs with matching
            keys will be returned. To remove the filter, pass key=None.
        :type key: str
        :param task_ids: Only XComs from tasks with matching ids will be
            pulled. Can pass None to remove the filter.
        :type task_ids: str or iterable of strings (representing task_ids)
        :param dag_ids: If provided, only pulls XComs from this DAG.
            If None (default), the DAG of the calling task is used.
        :type dag_ids: str
        :param include_prior_dates: If False, only XComs from the current
            execution_date are returned. If True, XComs from previous dates
            are returned as well.
        :type include_prior_dates: bool
        :param limit: If required, limit the number of returned objects.
            XCom objects can be quite big and you might want to limit the
            number of rows.
        :type limit: int
        :param session: database session
        :type session: sqlalchemy.orm.session.Session
        """
        if not (execution_date is None) ^ (run_id is None):
            raise ValueError("Exactly one of execution_date or run_id must be passed")

        filters = []

        if key:
            filters.append(cls.key == key)

        if task_ids:
            if is_container(task_ids):
                filters.append(cls.task_id.in_(task_ids))
            else:
                filters.append(cls.task_id == task_ids)

        if dag_ids:
            if is_container(dag_ids):
                filters.append(cls.dag_id.in_(dag_ids))
            else:
                filters.append(cls.dag_id == dag_ids)

        if include_prior_dates:
            if execution_date is None:
                # In theory it would be possible to build a subquery that joins to DagRun and then gets the
                # execution dates. Lets do that for 2.3
                raise ValueError("Using include_prior_dates needs an execution_date to be passed")
            filters.append(cls.execution_date <= execution_date)
        elif execution_date is not None:
            filters.append(cls.execution_date == execution_date)

        query = session.query(cls).filter(*filters)

        if run_id:
            from airflow.models.dagrun import DagRun

            query = query.join(cls.dag_run).filter(DagRun.run_id == run_id)

        query = query.order_by(cls.execution_date.desc(), cls.timestamp.desc())

        if limit:
            return query.limit(limit)
        else:
            return query
 def test_is_container(self):
     self.assertTrue(helpers.is_container(HelpersTest.TestObjIter()))
     self.assertFalse(helpers.is_container(HelpersTest.TestObjNoIter))
     self.assertFalse(helpers.is_container(HelpersTest.TestObjIterStr))
     self.assertFalse(helpers.is_container(HelpersTest.TestObjStrNoIter()))