コード例 #1
0
    def _make_proxy(self,
                    selectable,
                    name=None,
                    attach=True,
                    name_is_truncatable=False,
                    **kw):
        co = ColumnClause(self.name, self.type)
        co._proxies = [self]
        if selectable._is_clone_of is not None:
            co._is_clone_of = \
                selectable._is_clone_of.columns.get(co.key)

        if attach:
            selectable._columns[co.key] = co
        return co
コード例 #2
0
    def _make_proxy(
        self, selectable, name=None, attach=True, name_is_truncatable=False, **kw
    ):
        if self.name == self.function.name:
            name = selectable.name
        else:
            name = self.name

        co = ColumnClause(name, self.type)
        co.key = self.name
        co._proxies = [self]
        if selectable._is_clone_of is not None:
            co._is_clone_of = selectable._is_clone_of.columns.get(co.key)
        co.table = selectable
        co.named_with_table = False
        if attach:
            selectable._columns[co.key] = co
        return co
コード例 #3
0
    def query(  # sqla
            self,
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            extras=None,
            columns=None):
        """Querying any sqla table from this common interface"""
        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        cols = {col.column_name: col for col in self.columns}
        qry_start_dttm = datetime.now()

        if not granularity and is_timeseries:
            raise Exception(
                "Datetime column not provided as part table configuration "
                "and is required by this type of chart")

        metrics_exprs = [
            literal_column(m.expression).label(m.metric_name)
            for m in self.metrics if m.metric_name in metrics
        ]

        if metrics:
            main_metric_expr = literal_column([
                m.expression for m in self.metrics
                if m.metric_name == metrics[0]
            ][0])
        else:
            main_metric_expr = literal_column("COUNT(*)")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                expr = col.expression
                if expr:
                    outer = literal_column(expr).label(s)
                    inner = literal_column(expr).label('__' + s)
                else:
                    outer = column(s).label(s)
                    inner = column(s).label('__' + s)

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(s)
            metrics_exprs = []

        if granularity:
            dttm_expr = cols[granularity].expression or granularity
            timestamp = literal_column(dttm_expr).label('timestamp')

            # Transforming time grain into an expression based on configuration
            time_grain_sqla = extras.get('time_grain_sqla')
            if time_grain_sqla:
                udf = self.database.grains_dict().get(time_grain_sqla, '{col}')
                timestamp_grain = literal_column(
                    udf.function.format(col=dttm_expr)).label('timestamp')
            else:
                timestamp_grain = timestamp

            if is_timeseries:
                select_exprs += [timestamp_grain]
                groupby_exprs += [timestamp_grain]

            tf = '%Y-%m-%d %H:%M:%S.%f'
            time_filter = [
                timestamp >= from_dttm.strftime(tf),
                timestamp <= to_dttm.strftime(tf),
            ]
            inner_time_filter = copy(time_filter)
            if inner_from_dttm:
                inner_time_filter[0] = timestamp >= inner_from_dttm.strftime(
                    tf)
            if inner_to_dttm:
                inner_time_filter[1] = timestamp <= inner_to_dttm.strftime(tf)

        select_exprs += metrics_exprs
        qry = select(select_exprs)
        from_clause = table(self.table_name)
        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for col, op, eq in filter:
            col_obj = cols[col]
            if op in ('in', 'not in'):
                values = eq.split(",")
                if col_obj.expression:
                    cond = ColumnClause(col_obj.expression,
                                        is_literal=True).in_(values)
                else:
                    cond = column(col).in_(values)
                if op == 'not in':
                    cond = ~cond
                where_clause_and.append(cond)
        if extras and 'where' in extras:
            where_clause_and += [text(extras['where'])]
        if extras and 'having' in extras:
            having_clause_and += [text(extras['having'])]
        if granularity:
            qry = qry.where(and_(*(time_filter + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))
        if groupby:
            qry = qry.order_by(desc(main_metric_expr))
        qry = qry.limit(row_limit)

        if timeseries_limit and groupby:
            subq = select(inner_select_exprs)
            subq = subq.select_from(table(self.table_name))
            subq = subq.where(and_(*(where_clause_and + inner_time_filter)))
            subq = subq.group_by(*inner_groupby_exprs)
            subq = subq.order_by(desc(main_metric_expr))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(groupby_exprs[i] == column("__" + gb))

            from_clause = from_clause.join(subq.alias(), and_(*on_clause))

        qry = qry.select_from(from_clause)

        engine = self.database.get_sqla_engine()
        sql = "{}".format(
            qry.compile(engine, compile_kwargs={"literal_binds": True}))
        df = pd.read_sql_query(sql=sql, con=engine)
        sql = sqlparse.format(sql, reindent=True)
        return QueryResult(df=df,
                           duration=datetime.now() - qry_start_dttm,
                           query=sql)
コード例 #4
0
    def query(
            self, groupby, metrics,
            granularity,
            from_dttm, to_dttm,
            limit_spec=None,
            filter=None,
            is_timeseries=True,
            timeseries_limit=15, row_limit=None,
            inner_from_dttm=None, inner_to_dttm=None,
            extras=None):

        cols = {col.column_name: col for col in self.columns}
        qry_start_dttm = datetime.now()
        if not self.main_dttm_col:
            raise Exception(
                "Datetime column not provided as part table configuration")
        timestamp = literal_column(
            self.main_dttm_col).label('timestamp')
        metrics_exprs = [
            literal_column(m.expression).label(m.metric_name)
            for m in self.metrics if m.metric_name in metrics]

        if metrics:
            main_metric_expr = literal_column([
                m.expression for m in self.metrics
                if m.metric_name == metrics[0]][0])
        else:
            main_metric_expr = literal_column("COUNT(*)")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = [literal_column(s) for s in groupby]
            select_exprs = []
            groupby_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                expr = col.expression
                if expr:
                    outer = ColumnClause(expr, is_literal=True).label(s)
                    inner = ColumnClause(expr, is_literal=True).label('__' + s)
                else:
                    outer = literal_column(s).label(s)
                    inner = literal_column(s).label('__' + s)

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)

        if granularity != "all":
            select_exprs += [timestamp]
            groupby_exprs += [timestamp]

        select_exprs += metrics_exprs
        qry = select(select_exprs)
        from_clause = table(self.table_name)
        qry = qry.group_by(*groupby_exprs)

        time_filter = [
            timestamp >= from_dttm.isoformat(),
            timestamp <= to_dttm.isoformat(),
        ]
        inner_time_filter = copy(time_filter)
        if inner_from_dttm:
            inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat()
        if inner_to_dttm:
            inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat()
        where_clause_and = []
        for col, op, eq in filter:
            col_obj = cols[col]
            if op in ('in', 'not in'):
                values = eq.split(",")
                if col_obj.expression:
                    cond = ColumnClause(
                        col_obj.expression, is_literal=True).in_(values)
                else:
                    cond = literal_column(col).in_(values)
                if op == 'not in':
                    cond = ~cond
                where_clause_and.append(cond)
        if extras and 'where' in extras:
            where_clause_and += [text(extras['where'])]
        qry = qry.where(and_(*(time_filter + where_clause_and)))
        qry = qry.order_by(desc(main_metric_expr))
        qry = qry.limit(row_limit)

        if timeseries_limit and groupby:
            subq = select(inner_select_exprs)
            subq = subq.select_from(table(self.table_name))
            subq = subq.where(and_(*(where_clause_and + inner_time_filter)))
            subq = subq.group_by(*inner_groupby_exprs)
            subq = subq.order_by(desc(main_metric_expr))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(
                    groupby_exprs[i] == literal_column("__" + gb))

            from_clause = from_clause.join(subq.alias(), and_(*on_clause))

        qry = qry.select_from(from_clause)

        engine = self.database.get_sqla_engine()
        sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True}))
        print sql
        df = read_sql_query(
            sql=sql,
            con=engine
        )
        sql = sqlparse.format(sql, reindent=True)
        return QueryResult(
            df=df, duration=datetime.now() - qry_start_dttm, query=sql)
コード例 #5
0
    def query(self,
              groupby,
              metrics,
              granularity,
              from_dttm,
              to_dttm,
              custom_query,
              limit_spec=None,
              filter=None,
              is_timeseries=True,
              timeseries_limit=15,
              row_limit=None,
              inner_from_dttm=None,
              inner_to_dttm=None,
              extras=None,
              columns=None):

        qry_start_dttm = datetime.now()

        if not custom_query:
            # For backward compatibility
            if granularity not in self.dttm_cols:
                granularity = self.main_dttm_col
            cols = {col.column_name: col for col in self.columns}
            if not self.main_dttm_col:
                raise Exception(
                    "Datetime column not provided as part table configuration")
            dttm_expr = cols[granularity].expression

            if dttm_expr:
                timestamp = ColumnClause(dttm_expr,
                                         is_literal=True).label('timestamp')
            else:
                timestamp = literal_column(granularity).label('timestamp')

            metrics_exprs = [
                literal_column(m.expression).label(m.metric_name)
                for m in self.metrics if m.metric_name in metrics
            ]

            if metrics:
                main_metric_expr = literal_column([
                    m.expression for m in self.metrics
                    if m.metric_name == metrics[0]
                ][0])
            else:
                main_metric_expr = literal_column("COUNT(*)")

            groupby_exprs = []
            select_exprs = []

            if groupby:
                inner_select_exprs = []
                inner_groupby_exprs = []
                for s in groupby:
                    col = cols[s]
                    expr = col.expression
                    if expr:
                        outer = ColumnClause(expr, is_literal=True).label(s)
                        inner = ColumnClause(expr,
                                             is_literal=True).label('__' + s)
                    else:
                        outer = column(s).label(s)
                        inner = column(s).label('__' + s)

                    groupby_exprs.append(outer)
                    select_exprs.append(outer)
                    inner_groupby_exprs.append(inner)
                    inner_select_exprs.append(inner)
            elif columns:
                for s in columns:
                    select_exprs.append(s)
                metrics_exprs = []

            if is_timeseries:
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            select_exprs += metrics_exprs
            qry = select(select_exprs)
            from_clause = table(self.table_name)
            if not columns:
                qry = qry.group_by(*groupby_exprs)

            time_filter = [
                timestamp >= from_dttm.isoformat(),
                timestamp <= to_dttm.isoformat(),
            ]
            inner_time_filter = copy(time_filter)
            if inner_from_dttm:
                inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat()
            if inner_to_dttm:
                inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat()
            where_clause_and = []
            having_clause_and = []
            for col, op, eq in filter:
                col_obj = cols[col]
                if op in ('in', 'not in'):
                    values = eq.split(",")
                    if col_obj.expression:
                        cond = ColumnClause(col_obj.expression,
                                            is_literal=True).in_(values)
                    else:
                        cond = column(col).in_(values)
                    if op == 'not in':
                        cond = ~cond
                    where_clause_and.append(cond)
            if extras and 'where' in extras:
                where_clause_and += [text(extras['where'])]
            if extras and 'having' in extras:
                having_clause_and += [text(extras['having'])]
            qry = qry.where(and_(*(time_filter + where_clause_and)))
            qry = qry.having(and_(*having_clause_and))
            if groupby:
                qry = qry.order_by(desc(main_metric_expr))
            qry = qry.limit(row_limit)

            if timeseries_limit and groupby:
                subq = select(inner_select_exprs)
                subq = subq.select_from(table(self.table_name))
                subq = subq.where(and_(*(where_clause_and +
                                         inner_time_filter)))
                subq = subq.group_by(*inner_groupby_exprs)
                subq = subq.order_by(desc(main_metric_expr))
                subq = subq.limit(timeseries_limit)
                on_clause = []
                for i, gb in enumerate(groupby):
                    on_clause.append(groupby_exprs[i] == column("__" + gb))

                from_clause = from_clause.join(subq.alias(), and_(*on_clause))

            qry = qry.select_from(from_clause)

            engine = self.database.get_sqla_engine()
            sql = str(
                qry.compile(engine, compile_kwargs={"literal_binds": True}))
            df = read_sql_query(sql=sql, con=engine)
            textwrap.dedent(sql)

        else:
            """
            Legacy way of querying by building a SQL string without
            using the sqlalchemy expression API (new approach which supports
            all dialects)
            """
            engine = self.database.get_sqla_engine()
            sql = custom_query.format(**locals())
            df = read_sql_query(sql=sql, con=engine)
            textwrap.dedent(sql)

        return QueryResult(df=df,
                           duration=datetime.now() - qry_start_dttm,
                           query=sql)