Esempio n. 1
0
def get_activity_query(user_id=None, session_id=None, test_id=None):
    # pylint: disable=no-member
    from .models import Activity, Comment, User

    _filter = functools.partial(_apply_filters, user_id=user_id, session_id=session_id, test_id=test_id)

    comments = select([
            literal_column("('comment:' || comment.id)").label('id'),
            literal_column(str(ACTION_COMMENTED)).label('action'),
            Comment.user_id.label('user_id'),
            Comment.session_id.label('session_id'),
            Comment.test_id.label('test_id'),
            Comment.timestamp.label('timestamp'),
            Comment.comment.label('text'),
            User.email.label('user_email'),
        ]).select_from(Comment.__table__.join(User, User.id == Comment.user_id))

    comments = _filter(Comment, comments)

    activity = select([
            literal_column("('activity:' || activity.id)").label('id'),
            Activity.action.label('action'),
            Activity.user_id.label('user_id'),
            Activity.session_id.label('session_id'),
            Activity.test_id.label('test_id'),
            Activity.timestamp.label('timestamp'),
            literal_column("NULL").label('text'),
            User.email.label('user_email'),
        ]).select_from(Activity.__table__.join(User, User.id == Activity.user_id))

    activity = _filter(Activity, activity)

    u = union_all(comments, activity).alias('u')

    return select([u]).order_by(u.c.timestamp)
Esempio n. 2
0
    def __init__(self, sess, unfiltered, filt_crit, tt, window_size=WINDOW_SIZE):
        self.sess = sess
        self.unfiltered = unfiltered
        self.filt_crit = filt_crit
        self.tt = tt
        self.window_size = window_size

        self.skipped = []

        # select-only, can't be used for updates
        self.filtered_s = filtered = select(unfiltered.c).where(filt_crit).alias("filtered")

        self.selectable = (
            select(
                [
                    filtered.c.size,
                    func.count().label("inode_count"),
                    func.max(filtered.c.has_updates).label("has_updates"),
                ]
            )
            .group_by(filtered.c.size)
            .having(and_(literal_column("inode_count") > 1, literal_column("has_updates") > 0))
        )

        # This is higher than selectable.first().size, in order to also clear
        # updates without commonality.
        self.upper_bound = self.sess.query(self.unfiltered.c.size).order_by(-self.unfiltered.c.size).limit(1).scalar()
Esempio n. 3
0
    def load(self, request, response, subject, data):
        candidates = data['identifiers']
        if not candidates:
            return response([])

        identifiers = []
        for i, identifier in enumerate(candidates):
            identifiers.append("(%d, '%s')" % (i, str(identifier)))

        expr = select([column('rank'), column('id')],
            from_obj="(values %s) as subset(rank, id)" % ', '.join(identifiers))

        query = (self.schema.session.query(self.model)
            .join(expr.cte('__subset__'), literal_column('__subset__.id')==self.model.id)
            .order_by(literal_column('__subset__.rank')))

        resources = []
        instances = list(query.all())

        instance = (instances.pop(0) if instances else None)
        for id in candidates:
            if instance:
                if instance.id == id:
                    resources.append(self._construct_resource(request, instance, data))
                    if instances:
                        instance = instances.pop(0)
                    else:
                        instance = None
                else:
                    resources.append(None)
            else:
                resources.append(None)

        response(resources)
Esempio n. 4
0
def messages_in_narrow_backend(request, user_profile,
                               msg_ids = REQ(validator=check_list(check_int)),
                               narrow = REQ(converter=narrow_parameter)):
    # type: (HttpRequest, UserProfile, List[int], List[Dict[str, Any]]) -> HttpResponse

    # Note that this function will only work on messages the user
    # actually received

    # TODO: We assume that the narrow is a search.  For now this works because
    # the browser only ever calls this function for searches, since it can't
    # apply that narrow operator itself.

    query = select([column("message_id"), column("subject"), column("rendered_content")],
                   and_(column("user_profile_id") == literal(user_profile.id),
                        column("message_id").in_(msg_ids)),
                   join(table("zerver_usermessage"), table("zerver_message"),
                        literal_column("zerver_usermessage.message_id") ==
                        literal_column("zerver_message.id")))

    builder = NarrowBuilder(user_profile, column("message_id"))
    for term in narrow:
        query = builder.add_term(query, term)

    sa_conn = get_sqlalchemy_connection()
    query_result = list(sa_conn.execute(query).fetchall())

    search_fields = dict()
    for row in query_result:
        (message_id, subject, rendered_content, content_matches, subject_matches) = row
        search_fields[message_id] = get_search_fields(rendered_content, subject,
                                                      content_matches, subject_matches)

    return json_success({"messages": search_fields})
Esempio n. 5
0
 def createView(self):
     # filter indexes
     catalog = self.env.catalog.index_catalog
     xmlindex_list = catalog.getIndexes(package_id='seismology',
                                        resourcetype_id='event')
     filter = ['datetime', 'latitude', 'longitude', 'depth',
               'magnitude', 'magnitude_type', 'event_type', 'np1_strike',
               'np1_dip', 'np1_rake', 'mt_mrr', 'mt_mtt', 'mt_mpp',
               'mt_mrt', 'mt_mrp', 'mt_mtp', 'localisation_method']
     xmlindex_list = [x for x in xmlindex_list if x.label in filter]
     if not xmlindex_list:
         return
     # build up query
     query, joins = catalog._createIndexView(xmlindex_list, compact=True)
     options = [
         sql.literal_column("datetime.keyval").label("end_datetime"),
         sql.literal_column("datetime.keyval").label("start_datetime"),
         sql.case(
             value=sql.literal_column("localisation_method.keyval"),
             whens={'manual': 'circle'},
             else_='square').label('gis_localisation_method'),
         sql.func.GeomFromText(
             sql.text("'POINT(' || longitude.keyval || ' ' || " + \
                      "latitude.keyval || ')', 4326")).label('geom')
     ]
     for option in options:
         query.append_column(option)
     query = query.select_from(joins)
     return util.compileStatement(query)
def get_query(qtype = 'none', qobject = 'none'):

    if qtype != 'none' and qobject != 'none':

        # built queries for specified subset of patients
        query = db.session.query(label('sid', qobject.c.patient_sid),
                                 label('value_d', qobject.c.double_value),
                                 label('value_s', qobject.c.string_value),
                                 label('attribute', qobject.c.attribute_value))

    elif qtype == 'count' and qobject == 'none':

        # count of patients
        query = db.session.query(distinct(Clinical.patient_sid).label('sid'))


    else:

        # entire population
        query = db.session.query(distinct(Clinical.patient_sid).label('sid'),
                                 literal_column("'complement'").label('attribute'),
                                 literal_column("'0'").label('value_d'),
                                 literal_column("'null'").label('value_s'))


    db.session.commit()
    db.session.close()

    return query
Esempio n. 7
0
    def read_many_byuser(self, request):
        """
        """

        username = request.matchdict['username']

        page = int(request.params.get("page", 1))
        pagesize = int(request.params.get("pagesize", 10))

        if self.Session.query(User).filter(User.username == username).first() == None:
            raise HTTPNotFound("Requested user does not exist.")

        items = []

        activities_sub_query = self.Session.query(Activity.activity_identifier.label("identifier"), Activity.version, Changeset.timestamp, Changeset.fk_user).\
            join(Changeset).\
            filter(or_(Activity.fk_status == 2, Activity.fk_status == 3)).subquery(name="sub_act")

        activities_query = self.Session.query(activities_sub_query, User.username).\
            join(User).filter(User.username == username).subquery(name="act")

        # All active and inactive stakeholders
        stakeholder_active = self.Session.query(Stakeholder).\
            filter(or_(Stakeholder.fk_status == 2, Stakeholder.fk_status == 3)).\
            subquery("st_active")

        # Get the five latest stakeholder by changeset
        stakeholder_sub_query = self.Session.query(stakeholder_active.c.stakeholder_identifier.label("identifier"), \
                                                   stakeholder_active.c.version, Changeset.timestamp, Changeset.fk_user).\
            join(Changeset, Changeset.id == stakeholder_active.c.fk_changeset).\
            subquery(name="sub_st")

        # Join the resulting set to the user table
        stakeholder_query = self.Session.query(stakeholder_sub_query, User.username).\
            join(User).filter(User.username == username).subquery(name="st")

        query = self.Session.query(activities_query, literal_column("\'activity\'").label("type")).\
            union(self.Session.query(stakeholder_query, literal_column("\'stakeholder\'").label("type"))).\
            order_by(desc(activities_query.c.timestamp)).order_by(desc(activities_query.c.version))

        for i in query.offset((page-1)*pagesize).limit(pagesize).all():
            items.append({
            "type": i.type,
            "author": i.username,
            "timestamp": i.timestamp,
            "version": i.version,
            "identifier": str(i.identifier)
            })                            
        return {
            "items": items,
            "username": username,
            "totalitems": query.count(),
            "pagesize": pagesize,
            "currentpage": page
        }

        return {}
Esempio n. 8
0
def get_server_search_sources():
    return g.db.query(
        ExternalWFSSource.name.op('||')(literal_column("' ('")).op('||')(ExternalWFSSource.search_property).op('||')(literal_column("')'")).label('label'),
        literal_column("'wfs_'").op('||')
        (ExternalWFSSource.name).label('value')
    ).filter_by(active=True).union_all(g.db.query(
        GBIServer.title.label('label'),
        literal_column("'parcel_'").op('||')
        (ParcelSearchSource.id).label('value')
    ).filter(ParcelSearchSource.active==True).join(ParcelSearchSource.gbi_server)).all()
 def test_render_check_constraint_sqlexpr(self):
     c = column("c")
     five = literal_column("5")
     ten = literal_column("10")
     eq_ignore_whitespace(
         autogenerate.render._render_check_constraint(
             CheckConstraint(and_(c > five, c < ten)), self.autogen_context
         ),
         "sa.CheckConstraint('c > 5 AND c < 10')",
     )
Esempio n. 10
0
 def test_sqlexpr(self):
     m = MetaData()
     t = Table('t', m, Column(
         'x', Integer,
         server_default=literal_column('a') + literal_column('b'))
     )
     self.assert_compile(
         CreateTable(t),
         "CREATE TABLE t (x INTEGER DEFAULT a + b)"
     )
Esempio n. 11
0
 def execute(self, request, user, name):
     alliance = Alliance.load(name)
     if alliance is None:
         return HttpResponseRedirect(reverse("alliance_ranks"))
     
     ph = aliased(PlanetHistory)
     members = count().label("members")
     size = sum(ph.size).label("size")
     value = sum(ph.value).label("value")
     score = sum(ph.score).label("score")
     avg_size = size.op("/")(members).label("avg_size")
     avg_value = value.op("/")(members).label("avg_value")
     t10v = count(case(whens=((ph.value_rank <= 10 ,1),), else_=None)).label("t10v")
     t100v = count(case(whens=((ph.value_rank <= 100 ,1),), else_=None)).label("t100v")
     
     pho = aliased(PlanetHistory)
     sizeo = sum(pho.size).label("sizeo")
     valueo = sum(pho.value).label("valueo")
     scoreo = sum(pho.score).label("scoreo")
     
     Q = session.query(PlanetHistory.tick.label("tick"),
                       Alliance.id.label("id"),
                       literal_column("rank() OVER (PARTITION BY planet_history.tick ORDER BY sum(planet_history.size) DESC)").label("size_rank"),
                       literal_column("rank() OVER (PARTITION BY planet_history.tick ORDER BY sum(planet_history.value) DESC)").label("value_rank"),
                       )
     Q = Q.filter(PlanetHistory.active == True)
     Q = Q.join(PlanetHistory.current)
     Q = Q.join(Planet.intel)
     Q = Q.join(Intel.alliance)
     Q = Q.group_by(PlanetHistory.tick, Alliance.id)
     ranks = Q.subquery()
     
     Q = session.query(ph.tick, members,
                       size, value,
                       avg_size, avg_value,
                       size-sizeo, value-valueo, score-scoreo,
                       t10v, t100v,
                       )
     Q = Q.filter(ph.active == True)
     Q = Q.join(ph.current)
     Q = Q.join(Planet.intel)
     Q = Q.join(Intel.alliance)
     Q = Q.outerjoin((pho, and_(ph.id==pho.id, ph.tick-1==pho.tick),))
     Q = Q.filter(Intel.alliance == alliance)
     Q = Q.group_by(ph.tick)
     
     Q = Q.from_self().add_columns(ranks.c.size_rank, ranks.c.value_rank)
     Q = Q.outerjoin((ranks, and_(ph.tick == ranks.c.tick, alliance.id == ranks.c.id),))
     Q = Q.order_by(desc(ph.tick))
     
     history = Q.all()
     
     return render("ialliancehistory.tpl", request, alliance=alliance, members=alliance.intel_members, history=history)
Esempio n. 12
0
def system_utilisation_counts_by_group(grouping, systems):
    retval = defaultdict(lambda: dict((k, 0) for k in
            ['recipe', 'manual', 'idle_automated', 'idle_manual',
             'idle_broken', 'idle_removed']))
    query = systems.outerjoin(System.open_reservation)\
            .with_entities(grouping,
                func.coalesce(Reservation.type,
                func.concat('idle_', func.lower(System.status))),
                func.count(System.id))\
            .group_by(literal_column("1"), literal_column("2"))
    for group, state, count in query:
        retval[group][state] = count
    return retval
Esempio n. 13
0
    def testlabels2(self):
        metadata = MetaData()
        table = Table("ImATable", metadata,
            Column("col1", Integer))
        x = select([table.c.col1.label("ImATable_col1")]).alias("SomeAlias")
        assert str(select([x.c.ImATable_col1])) == '''SELECT "SomeAlias"."ImATable_col1" \nFROM (SELECT "ImATable".col1 AS "ImATable_col1" \nFROM "ImATable") AS "SomeAlias"'''

        # note that 'foo' and 'FooCol' are literals already quoted
        x = select([sql.literal_column("'foo'").label("somelabel")], from_obj=[table]).alias("AnAlias")
        x = x.select()
        assert str(x) == '''SELECT "AnAlias".somelabel \nFROM (SELECT 'foo' AS somelabel \nFROM "ImATable") AS "AnAlias"'''

        x = select([sql.literal_column("'FooCol'").label("SomeLabel")], from_obj=[table])
        x = x.select()
        assert str(x) == '''SELECT "SomeLabel" \nFROM (SELECT 'FooCol' AS "SomeLabel" \nFROM "ImATable")'''
Esempio n. 14
0
def polymorphic_union(table_map, typecolname, aliasname='p_union'):
    """create a UNION statement used by a polymorphic mapper.
    
    See the SQLAlchemy advanced mapping docs for an example of how this is used."""
    colnames = util.Set()
    colnamemaps = {}
    types = {}
    for key in table_map.keys():
        table = table_map[key]

        # mysql doesnt like selecting from a select; make it an alias of the select
        if isinstance(table, sql.Select):
            table = table.alias()
            table_map[key] = table

        m = {}
        for c in table.c:
            colnames.add(c.name)
            m[c.name] = c
            types[c.name] = c.type
        colnamemaps[table] = m

    def col(name, table):
        try:
            return colnamemaps[table][name]
        except KeyError:
            return sql.cast(sql.null(), types[name]).label(name)

    result = []
    for type, table in table_map.iteritems():
        if typecolname is not None:
            result.append(sql.select([col(name, table) for name in colnames] + [sql.literal_column("'%s'" % type).label(typecolname)], from_obj=[table]))
        else:
            result.append(sql.select([col(name, table) for name in colnames], from_obj=[table]))
    return sql.union_all(*result).alias(aliasname)
def analytics_compability():
    user2set = dict([
        (user_id, map(operator.itemgetter(0), {
            "artist"            : db.session.query(Scrobble.artist).\
                                             group_by(Scrobble.artist),
            "track"             : db.session.query(func.concat(Scrobble.artist, literal_column('" – "'), Scrobble.track)).\
                                             group_by(Scrobble.artist, Scrobble.track),
        }
        [request.args.get("criterion")].\
        filter_by(user_id=user_id).\
        having(func.count(Scrobble.id) > int(request.args.get("more_than_x_scrobbles"))).\
        all()))
        for user_id in map(int, request.args.getlist("users"))
    ])

    user2username = dict(db.session.query(User.id, User.username).all())
    
    length2groups = [
        (length, filter(lambda (users, set): len(set) > 0, sorted([
            (
                ", ".join(sorted([user2username[i] for i in user2username if i in group], key=lambda username: username.lower())),
                reduce(set.intersection, map(set, [user2set[user_id] for user_id in group]))
            )
            for group in itertools.combinations(map(int, request.args.getlist("users")), length) if len(group) == length
        ], key=lambda (users, set): -len(set)))[:10])
        for length in range(2, len(user2username) + 1)
    ]
Esempio n. 16
0
    def adhoc_metric_to_sqla(self, metric, cols):
        """
        Turn an adhoc metric into a sqlalchemy column.

        :param dict metric: Adhoc metric definition
        :param dict cols: Columns for the current table
        :returns: The metric defined as a sqlalchemy column
        :rtype: sqlalchemy.sql.column
        """
        expression_type = metric.get('expressionType')
        db_engine_spec = self.database.db_engine_spec
        label = db_engine_spec.make_label_compatible(metric.get('label'))

        if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE']:
            column_name = metric.get('column').get('column_name')
            sqla_column = column(column_name)
            table_column = cols.get(column_name)

            if table_column:
                sqla_column = table_column.get_sqla_col()

            sqla_metric = self.sqla_aggregations[metric.get('aggregate')](sqla_column)
            sqla_metric = sqla_metric.label(label)
            return sqla_metric
        elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SQL']:
            sqla_metric = literal_column(metric.get('sqlExpression'))
            sqla_metric = sqla_metric.label(label)
            return sqla_metric
        else:
            return None
Esempio n. 17
0
 def query(self):
     pq = qualstat_getstatdata(column("eval_type") == "f")
     base = alias(pq)
     query = (select([
         func.array_agg(column("queryid")).label("queryids"),
         "qualid",
         cast(column("quals"), JSONB).label('quals'),
         "occurences",
         "execution_count",
         func.array_agg(column("query")).label("queries"),
         "avg_filter",
         "filter_ratio"
     ]).select_from(
         join(base, powa_databases,
              onclause=(
                  powa_databases.c.oid == literal_column("dbid"))))
         .where(powa_databases.c.datname == bindparam("database"))
         .where(column("avg_filter") > 1000)
         .where(column("filter_ratio") > 0.3)
         .group_by(column("qualid"), column("execution_count"),
                   column("occurences"),
                   cast(column("quals"), JSONB),
                  column("avg_filter"), column("filter_ratio"))
         .order_by(column("occurences").desc())
         .limit(200))
     return query
Esempio n. 18
0
    def visit_select(self, select, **kwargs):
        """Look for ``LIMIT`` and OFFSET in a select statement, and if
        so tries to wrap it in a subquery with ``row_number()`` criterion.

        """
        if not getattr(select, '_mssql_visit', None) and select._offset:
            # to use ROW_NUMBER(), an ORDER BY is required.
            orderby = self.process(select._order_by_clause)
            if not orderby:
                raise exc.InvalidRequestError('MSSQL requires an order_by when '
                                              'using an offset.')

            _offset = select._offset
            _limit = select._limit
            select._mssql_visit = True
            select = select.column(
                sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" \
                % orderby).label("mssql_rn")
                                   ).order_by(None).alias()

            limitselect = sql.select([c for c in select.c if
                                        c.key!='mssql_rn'])
            limitselect.append_whereclause("mssql_rn>%d" % _offset)
            if _limit is not None:
                limitselect.append_whereclause("mssql_rn<=%d" % 
                                            (_limit + _offset))
            return self.process(limitselect, iswrapper=True, **kwargs)
        else:
            return compiler.SQLCompiler.visit_select(self, select, **kwargs)
Esempio n. 19
0
    def visit_select(self, select, **kwargs):
        """Look for ``LIMIT`` and OFFSET in a select statement, and if
        so tries to wrap it in a subquery with ``row_number()`` criterion.
        """
        if self.dialect.has_window_funcs and (not getattr(select, '_mssql_visit', None)) and (select._limit is not None or select._offset is not None):
            # to use ROW_NUMBER(), an ORDER BY is required.
            orderby = self.process(select._order_by_clause)
            if not orderby:
                orderby = list(select.oid_column.proxies)[0]
                orderby = self.process(orderby)

            _offset = select._offset
            _limit = select._limit
            select._mssql_visit = True
            select = select.column(sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" % orderby).label("mssql_rn")).order_by(None).alias()

            limitselect = sql.select([c for c in select.c if c.key!='mssql_rn'])
            if _offset is not None:
                limitselect.append_whereclause("mssql_rn>=%d" % _offset)
                if _limit is not None:
                    limitselect.append_whereclause("mssql_rn<=%d" % (_limit + _offset))
            else:
                limitselect.append_whereclause("mssql_rn<=%d" % _limit)
            return self.process(limitselect, iswrapper=True, **kwargs)
        else:
            return compiler.DefaultCompiler.visit_select(self, select, **kwargs)
Esempio n. 20
0
def polymorphic_union(table_map, typecolname, aliasname="p_union", cast_nulls=True):
    """Create a ``UNION`` statement used by a polymorphic mapper.

    See  :ref:`concrete_inheritance` for an example of how
    this is used.

    :param table_map: mapping of polymorphic identities to
     :class:`.Table` objects.
    :param typecolname: string name of a "discriminator" column, which will be
     derived from the query, producing the polymorphic identity for each row.  If
     ``None``, no polymorphic discriminator is generated.
    :param aliasname: name of the :func:`~sqlalchemy.sql.expression.alias()`
     construct generated.
    :param cast_nulls: if True, non-existent columns, which are represented as labeled
     NULLs, will be passed into CAST.   This is a legacy behavior that is problematic
     on some backends such as Oracle - in which case it can be set to False.

    """

    colnames = util.OrderedSet()
    colnamemaps = {}
    types = {}
    for key in table_map.keys():
        table = table_map[key]

        # mysql doesnt like selecting from a select;
        # make it an alias of the select
        if isinstance(table, sql.Select):
            table = table.alias()
            table_map[key] = table

        m = {}
        for c in table.c:
            colnames.add(c.key)
            m[c.key] = c
            types[c.key] = c.type
        colnamemaps[table] = m

    def col(name, table):
        try:
            return colnamemaps[table][name]
        except KeyError:
            if cast_nulls:
                return sql.cast(sql.null(), types[name]).label(name)
            else:
                return sql.type_coerce(sql.null(), types[name]).label(name)

    result = []
    for type, table in table_map.iteritems():
        if typecolname is not None:
            result.append(
                sql.select(
                    [col(name, table) for name in colnames]
                    + [sql.literal_column(sql_util._quote_ddl_expr(type)).label(typecolname)],
                    from_obj=[table],
                )
            )
        else:
            result.append(sql.select([col(name, table) for name in colnames], from_obj=[table]))
    return sql.union_all(*result).alias(aliasname)
Esempio n. 21
0
 def sqla_col(self):
     name = self.column_name
     if not self.expression:
         col = column(self.column_name).label(name)
     else:
         col = literal_column(self.expression).label(name)
     return col
Esempio n. 22
0
    def visit_select(self, select):
        """Look for ``LIMIT`` and OFFSET in a select statement, and if
        so tries to wrap it in a subquery with ``row_number()`` criterion.
        """

        # TODO: put a real copy-container on Select and copy, or somehow make this
        # not modify the Select statement
        if getattr(select, '_oracle_visit', False):
            # cancel out the compiled order_by on the select
            if hasattr(select, "order_by_clause"):
                self.strings[select.order_by_clause] = ""
            ansisql.ANSICompiler.visit_select(self, select)
            return

        if select.limit is not None or select.offset is not None:
            select._oracle_visit = True
            # to use ROW_NUMBER(), an ORDER BY is required.
            orderby = self.strings[select.order_by_clause]
            if not orderby:
                orderby = select.oid_column
                self.traverse(orderby)
                orderby = self.strings[orderby]
            select.append_column(sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" % orderby).label("ora_rn"))
            limitselect = sql.select([c for c in select.c if c.key!='ora_rn'])
            if select.offset is not None:
                limitselect.append_whereclause("ora_rn>%d" % select.offset)
                if select.limit is not None:
                    limitselect.append_whereclause("ora_rn<=%d" % (select.limit + select.offset))
            else:
                limitselect.append_whereclause("ora_rn<=%d" % select.limit)
            self.traverse(limitselect)
            self.strings[select] = self.strings[limitselect]
            self.froms[select] = self.froms[limitselect]
        else:
            ansisql.ANSICompiler.visit_select(self, select)
Esempio n. 23
0
 def get_sqla_col(self, label=None):
     db_engine_spec = self.table.database.db_engine_spec
     label = db_engine_spec.make_label_compatible(label if label else self.column_name)
     if not self.expression:
         col = column(self.column_name).label(label)
     else:
         col = literal_column(self.expression).label(label)
     return col
Esempio n. 24
0
def qualstat_get_figures(conn, database, tsfrom, tsto, queries=None, quals=None):
    condition = text("""datname = :database AND coalesce_range && tstzrange(:from, :to)""")
    if queries is not None:
        condition = and_(condition, array([int(q) for q in queries])
                         .any(literal_column("s.queryid")))
    if quals is not None:
        condition = and_(condition, array([int(q) for q in quals])
                         .any(literal_column("qnc.qualid")))

    sql = (select([
        text('most_filtering.quals'),
        text('most_filtering.query'),
        text('to_json(most_filtering) as "most filtering"'),
        text('to_json(least_filtering) as "least filtering"'),
        text('to_json(most_executed) as "most executed"'),
        text('to_json(most_used) as "most used"')])
           .select_from(
               qual_constants("most_filtering", condition)
               .alias("most_filtering")
               .join(
                   qual_constants("least_filtering", condition)
                   .alias("least_filtering"),
                   text("most_filtering.rownumber = "
                        "least_filtering.rownumber"))
               .join(qual_constants("most_executed", condition)
                     .alias("most_executed"),
                     text("most_executed.rownumber = "
                          "least_filtering.rownumber"))
               .join(qual_constants("most_used", condition)
                     .alias("most_used"),
                     text("most_used.rownumber = "
                          "least_filtering.rownumber"))))


    params = {"database": database,
              "from": tsfrom,
              "to": tsto}
    quals = conn.execute(sql, params=params)

    if quals.rowcount == 0:
        return None

    row = quals.first()

    return row
Esempio n. 25
0
def list_packages():
    """
    Return a list of all packages as JSON. Uses Postgres to generate all the JSON in a
    single query.

    Optional query parameters:
    collection: filter by collection name (list, literal match)
    name: filter by package name (list, literal match)

    Response format:
    [
        {
            "name": "foo",
            "collection": "f29",
            "state": "unresolved",
            "last_complete_build": {
                "task_id": 123
            }
        },
        ...
    ]
    """
    query = (
        db.query(
            Package.name.label('name'),
            Collection.name.label('collection'),
            # pylint:disable=no-member
            Package.state_string.label('state'),
            sql_if(
                Build.id != None,
                db.query(Build.task_id.label('task_id'))
                .correlate(Build)
                .as_record()
            ).label('last_complete_build')
        )
        .join(Collection)
        .outerjoin(
            Build,
            (Package.last_complete_build_id == Build.id) & Build.last_complete
        )
        .order_by(Package.name)
    )
    if 'name' in request.args:
        query = query.filter(Package.name.in_(request.args.getlist('name')))
    if 'collection' in request.args:
        query = query.filter(Collection.name.in_(request.args.getlist('collection')))

    result = (
        db.query(literal_column(
            "coalesce(array_to_json(array_agg(row_to_json(pkg_query)))::text, '[]')"
        ).label('q'))
        .select_from(query.subquery('pkg_query'))
        .scalar()
    )

    return Response(result, mimetype='application/json')
Esempio n. 26
0
 def get_sqla_col(self, label=None):
     label = label or self.column_name
     if not self.expression:
         db_engine_spec = self.table.database.db_engine_spec
         type_ = db_engine_spec.get_sqla_column_type(self.type)
         col = column(self.column_name, type_=type_)
     else:
         col = literal_column(self.expression)
     col = self.table.make_sqla_column_compatible(col, label)
     return col
Esempio n. 27
0
 def visit_extract(self, extract, **kwargs):
     field = self.extract_map.get(extract.field, extract.field)
     affinity = sql_util.determine_date_affinity(extract.expr)
     
     casts = {sqltypes.Date:'date', sqltypes.DateTime:'timestamp', sqltypes.Interval:'interval', sqltypes.Time:'time'}
     cast = casts.get(affinity, None)
     if isinstance(extract.expr, sql.ColumnElement) and cast is not None:
         expr = extract.expr.op('::')(sql.literal_column(cast))
     else:
         expr = extract.expr
     return "EXTRACT(%s FROM %s)" % (
         field, self.process(expr))
Esempio n. 28
0
def weighted_ilike(self, value, weight=1):
    """ Calls the ILIKE operator and returns either 0 or the given weight. """

    # Make sure weight is numeric and we can safely
    # pass it to the literal_column()
    assert isinstance(weight, (int, float))

    # Convert weight to a literal_column()
    weight = literal_column(str(weight))

    # Return ilike expression
    return cast(and_(self != None, self.ilike(value)), Integer) * weight
Esempio n. 29
0
 def get_rows(cls, app_id, group_id):
     q1 =  DBSession.query(osModules.id.label('module_id'),
               osModules.kode.label('module_kode'),
               osModules.nama.label('module_nama'),
               literal_column('0').label('reads'),
               literal_column('0').label('writes'),
               literal_column('0').label('inserts'),
               literal_column('0').label('deletes'),
           ).filter(osModules.app_id == app_id) \
            .filter(osModules.id.notin_(
               DBSession.query(cls.module_id).filter(cls.group_id==group_id)))
     q2 =  DBSession.query(osGroupModules.module_id.label('module_id'),
               osModules.kode.label('module_kode'),
               osModules.nama.label('module_nama'),
               osGroupModules.reads.label('reads'),
               osGroupModules.writes.label('writes'),
               osGroupModules.inserts.label('inserts'),
               osGroupModules.deletes.label('deletes'),
           ).join(osModules) \
           .filter(osGroupModules.group_id == group_id) \
           .filter(osModules.app_id == app_id)   
     return q1.union(q2).all()
Esempio n. 30
0
    def test_literal_column_already_with_quotes(self):
        # Lower case names
        metadata = MetaData()
        table = Table("t1", metadata, Column("col1", Integer))

        # Note that 'col1' is already quoted (literal_column)
        columns = [sql.literal_column("'col1'").label("label1")]
        x = select(columns, from_obj=[table]).alias("alias1")
        x = x.select()
        self.assert_compile(
            x,
            "SELECT "
            "alias1.label1 "
            "FROM ("
            "SELECT "
            "'col1' AS label1 "
            "FROM t1"
            ") AS alias1",
        )

        # Not lower case names
        metadata = MetaData()
        table = Table("T1", metadata, Column("Col1", Integer))

        # Note that 'Col1' is already quoted (literal_column)
        columns = [sql.literal_column("'Col1'").label("Label1")]
        x = select(columns, from_obj=[table]).alias("Alias1")
        x = x.select()
        self.assert_compile(
            x,
            "SELECT "
            '"Alias1"."Label1" '
            "FROM ("
            "SELECT "
            "'Col1' AS \"Label1\" "
            'FROM "T1"'
            ') AS "Alias1"',
        )
Esempio n. 31
0
def get_measurement(measurement_id, download=None):
    """Get one measurement by measurement_id,
    fetching the file from S3 or the fastpath host as needed
    Returns only the measurement without extra data from the database
    """
    if measurement_id.startswith(FASTPATH_MSM_ID_PREFIX):
        return get_one_fastpath_measurement(measurement_id, download)

    # XXX this query is slow due to filtering by report_id and input
    # It also occasionally return multiple rows and serves only the first one
    # TODO: add timing metric
    # TODO: switch to OOID to speed up the query
    # https://github.com/ooni/pipeline/issues/48
    m = RE_MSM_ID.match(measurement_id)
    if not m:
        raise BadRequest("Invalid measurement_id")
    msm_no = int(m.group(1))

    cols = [
        literal_column("measurement.report_no"),
        literal_column("frame_off"),
        literal_column("frame_size"),
        literal_column("intra_off"),
        literal_column("intra_size"),
        literal_column("textname"),
        literal_column("report.autoclaved_no"),
        literal_column("autoclaved.filename"),
    ]
    table = (
        sql.table("measurement")
        .join(
            sql.table("report"), sql.text("measurement.report_no = report.report_no"),
        )
        .join(
            sql.table("autoclaved"),
            sql.text("autoclaved.autoclaved_no = report.autoclaved_no"),
        )
    )
    where = sql.text("measurement.msm_no = :msm_no")
    query = select(cols).where(where).select_from(table)
    query_params = dict(msm_no=msm_no)
    q = current_app.db_session.execute(query, query_params)

    msmt = q.fetchone()
    if msmt is None:
        abort(404)

    # Usual size of LZ4 frames is 256kb of decompressed text.
    # Largest size of LZ4 frame was ~55Mb compressed and ~56Mb decompressed. :-/
    range_header = "bytes={}-{}".format(
        msmt.frame_off, msmt.frame_off + msmt.frame_size - 1
    )
    filename = msmt["autoclaved.filename"]
    r = requests.get(
        urljoin(current_app.config["AUTOCLAVED_BASE_URL"], filename),
        headers={"Range": range_header, REQID_HDR: request_id()},
    )
    r.raise_for_status()
    blob = r.content
    if len(blob) != msmt.frame_size:
        raise RuntimeError("Failed to fetch LZ4 frame", len(blob), msmt.frame_size)
    blob = lz4framed.decompress(blob)[msmt.intra_off : msmt.intra_off + msmt.intra_size]
    if len(blob) != msmt.intra_size or blob[:1] != b"{" or blob[-1:] != b"}":
        raise RuntimeError(
            "Failed to decompress LZ4 frame to measurement.json",
            len(blob),
            msmt.intra_size,
            blob[:1],
            blob[-1:],
        )
    # There is no replacement of `measurement_id` with `msm_no` or anything
    # else to keep sanity. Maybe it'll happen as part of orchestration update.
    # Also, blob is not decoded intentionally to save CPU
    filename = "ooni-msmt-{}-{}".format(measurement_id, msmt.textname.replace("/", "-"))
    response = make_response(blob)
    response.headers.set("Content-Type", "application/json")
    if download is not None:
        response.headers.set("Content-Disposition", "attachment", filename=filename)
    return response
Esempio n. 32
0
def list_measurements(
    report_id=None,
    probe_asn=None,
    probe_cc=None,
    test_name=None,
    since=None,
    until=None,
    since_index=None,
    order_by=None,
    order="desc",
    offset=0,
    limit=100,
    failure=None,
    anomaly=None,
    confirmed=None,
    category_code=None,
):
    """Search for measurements using only the database. Provide pagination.
    """
    # TODO: list_measurements and get_measurement will be simplified and
    # made faster by OOID: https://github.com/ooni/pipeline/issues/48

    log = current_app.logger

    ## Workaround for https://github.com/ooni/probe/issues/1034
    user_agent = request.headers.get("User-Agent")
    if user_agent.startswith("okhttp"):
        bug_probe1034_response = jsonify(
            {
                "metadata": {
                    "count": 1,
                    "current_page": 1,
                    "limit": 100,
                    "next_url": None,
                    "offset": 0,
                    "pages": 1,
                    "query_time": 0.001,
                },
                "results": [{"measurement_url": ""}],
            }
        )
        return bug_probe1034_response

    ## Prepare query parameters

    input_ = request.args.get("input")
    domain = request.args.get("domain")

    if probe_asn is not None:
        if probe_asn.startswith("AS"):
            probe_asn = probe_asn[2:]
        probe_asn = int(probe_asn)

    # When the user specifies a list that includes all the possible values for
    # boolean arguments, that is logically the same of applying no filtering at
    # all.
    # TODO: treat it as an error?
    if failure is not None:
        if set(failure) == set(["true", "false"]):
            failure = None
        else:
            failure = set(failure) == set(["true"])
    if anomaly is not None:
        if set(anomaly) == set(["true", "false"]):
            anomaly = None
        else:
            anomaly = set(anomaly) == set(["true"])
    if confirmed is not None:
        if set(confirmed) == set(["true", "false"]):
            confirmed = None
        else:
            confirmed = set(confirmed) == set(["true"])

    try:
        if since is not None:
            since = parse_date(since)
    except ValueError:
        raise BadRequest("Invalid since")

    try:
        if until is not None:
            until = parse_date(until)
    except ValueError:
        raise BadRequest("Invalid until")

    if order.lower() not in ("asc", "desc"):
        raise BadRequest("Invalid order")

    INULL = ""  # Special value for input = NULL to merge rows with FULL OUTER JOIN

    ## Create measurement+report colums for SQL query
    cols = [
        # sql.text("measurement.input_no"),
        literal_column("report.test_start_time").label("test_start_time"),
        literal_column("measurement.measurement_start_time").label(
            "measurement_start_time"
        ),
        func.concat(MSM_ID_PREFIX, "-", sql.text("measurement.msm_no")).label(
            "measurement_id"
        ),
        literal_column("measurement.report_no").label("m_report_no"),
        func.coalesce(sql.text("measurement.anomaly"), false()).label("anomaly"),
        func.coalesce(sql.text("measurement.confirmed"), false()).label("confirmed"),
        sql.text("measurement.exc IS NOT NULL AS failure"),
        func.coalesce("{}").label("scores"),
        literal_column("measurement.exc").label("exc"),
        literal_column("measurement.residual_no").label("residual_no"),
        literal_column("report.report_id").label("report_id"),
        literal_column("report.probe_cc").label("probe_cc"),
        literal_column("report.probe_asn").label("probe_asn"),
        literal_column("report.test_name").label("test_name"),
        literal_column("report.report_no").label("report_no"),
        func.coalesce(sql.text("domain_input.input"), INULL).label("input"),
    ]

    ## Create fastpath columns for query
    fpcols = [
        # func.coalesce(0).label("m_input_no"),
        # We use test_start_time here as the batch pipeline has many NULL measurement_start_times
        literal_column("measurement_start_time").label("test_start_time"),
        literal_column("measurement_start_time").label("measurement_start_time"),
        func.concat(FASTPATH_MSM_ID_PREFIX, sql.text("tid")).label("measurement_id"),
        literal_column("anomaly").label("anomaly"),
        literal_column("confirmed").label("confirmed"),
        literal_column("msm_failure").label("failure"),
        cast(sql.text("scores"), String).label("scores"),
        literal_column("report_id"),
        literal_column("probe_cc"),
        literal_column("probe_asn"),
        literal_column("test_name"),
        func.coalesce(sql.text("fastpath.input"), INULL).label("input"),
    ]

    mrwhere = []
    fpwhere = []
    query_params = {}

    # Populate WHERE clauses and query_params dict

    if since is not None:
        query_params["since"] = since
        mrwhere.append(sql.text("measurement.measurement_start_time > :since"))
        fpwhere.append(sql.text("measurement_start_time > :since"))

    if until is not None:
        query_params["until"] = until
        mrwhere.append(sql.text("measurement.measurement_start_time <= :until"))
        fpwhere.append(sql.text("measurement_start_time <= :until"))

    if report_id:
        query_params["report_id"] = report_id
        mrwhere.append(sql.text("report.report_id = :report_id"))
        fpwhere.append(sql.text("report_id = :report_id"))

    if probe_cc:
        query_params["probe_cc"] = probe_cc
        mrwhere.append(sql.text("report.probe_cc = :probe_cc"))
        fpwhere.append(sql.text("probe_cc = :probe_cc"))

    if probe_asn is not None:
        query_params["probe_asn"] = probe_asn
        mrwhere.append(sql.text("report.probe_asn = :probe_asn"))
        fpwhere.append(sql.text("probe_asn = :probe_asn"))

    if test_name is not None:
        query_params["test_name"] = test_name
        mrwhere.append(sql.text("report.test_name = :test_name"))
        fpwhere.append(sql.text("test_name = :test_name"))

    # Filter on anomaly, confirmed and failure:
    # The database stores anomaly and confirmed as boolean + NULL and stores
    # failures in different columns. This leads to many possible combinations
    # but only a subset is used.
    # On anomaly and confirmed: any value != TRUE is treated as FALSE
    # See test_list_measurements_filter_flags_fastpath

    if anomaly is True:
        mrwhere.append(sql.text("measurement.anomaly IS TRUE"))
        fpwhere.append(sql.text("fastpath.anomaly IS TRUE"))

    elif anomaly is False:
        mrwhere.append(sql.text("measurement.anomaly IS NOT TRUE"))
        fpwhere.append(sql.text("fastpath.anomaly IS NOT TRUE"))

    if confirmed is True:
        mrwhere.append(sql.text("measurement.confirmed IS TRUE"))
        fpwhere.append(sql.text("fastpath.confirmed IS TRUE"))

    elif confirmed is False:
        mrwhere.append(sql.text("measurement.confirmed IS NOT TRUE"))
        fpwhere.append(sql.text("fastpath.confirmed IS NOT TRUE"))

    if failure is True:
        # residual_no is never NULL, msm_failure is always NULL
        mrwhere.append(sql.text("measurement.exc IS NOT NULL"))
        fpwhere.append(sql.text("fastpath.msm_failure IS TRUE"))

    elif failure is False:
        # on success measurement.exc is NULL
        mrwhere.append(sql.text("measurement.exc IS NULL"))
        fpwhere.append(sql.text("fastpath.msm_failure IS NOT TRUE"))

    fpq_table = sql.table("fastpath")
    mr_table = sql.table("measurement").join(
        sql.table("report"), sql.text("measurement.report_no = report.report_no"),
    )

    if input_ or domain or category_code:
        # join in domain_input
        mr_table = mr_table.join(
            sql.table("domain_input"),
            sql.text("domain_input.input_no = measurement.input_no"),
        )
        fpq_table = fpq_table.join(
            sql.table("domain_input"), sql.text("domain_input.input = fastpath.input")
        )

        if input_:
            # input_ overrides domain and category_code
            query_params["input"] = input_
            mrwhere.append(sql.text("domain_input.input = :input"))
            fpwhere.append(sql.text("domain_input.input = :input"))

        else:
            # both domain and category_code can be set at the same time
            if domain:
                query_params["domain"] = domain
                mrwhere.append(sql.text("domain_input.domain = :domain"))
                fpwhere.append(sql.text("domain_input.domain = :domain"))

            if category_code:
                query_params["category_code"] = category_code
                mr_table = mr_table.join(
                    sql.table("citizenlab"),
                    sql.text("citizenlab.url = domain_input.input"),
                )
                fpq_table = fpq_table.join(
                    sql.table("citizenlab"),
                    sql.text("citizenlab.url = domain_input.input"),
                )
                mrwhere.append(sql.text("citizenlab.category_code = :category_code"))
                fpwhere.append(sql.text("citizenlab.category_code = :category_code"))

    else:
        mr_table = mr_table.outerjoin(
            sql.table("domain_input"),
            sql.text("domain_input.input_no = measurement.input_no"),
        )

    # We runs SELECTs on the measurement-report (mr) tables and faspath independently
    # from each other and then merge them.
    # The FULL OUTER JOIN query is using LIMIT and OFFSET based on the
    # list_measurements arguments. To speed up the two nested queries,
    # an ORDER BY + LIMIT on "limit+offset" is applied in each of them to trim
    # away rows that would be removed anyways by the outer query.
    #
    # During a merge we can find that a measurement is:
    # - only in fastpath:       get_measurement will pick the JSON msmt from the fastpath host
    # - in both selects:        pick `scores` from fastpath and the msmt from the can
    # - only in "mr":           the msmt from the can
    #
    # This implements a failover mechanism where new msmts are loaded from fastpath
    # but can fall back to the traditional pipeline.

    mr_query = (
        select(cols).where(and_(*mrwhere)).select_from(mr_table).limit(offset + limit)
    )
    fp_query = (
        select(fpcols)
        .where(and_(*fpwhere))
        .select_from(fpq_table)
        .limit(offset + limit)
    )

    if order_by is None:
        # Use test_start_time or measurement_start_time depending on other
        # filters in order to avoid heavy joins.
        # Filtering on anomaly, confirmed, msm_failure -> measurement_start_time
        # Filtering on probe_cc, probe_asn, test_name -> test_start_time
        # See test_list_measurements_slow_order_by_* tests
        if probe_cc or probe_asn or test_name:
            order_by = "test_start_time"
        elif anomaly or confirmed or failure or input_ or domain or category_code:
            order_by = "measurement_start_time"
        else:
            order_by = "measurement_start_time"

    mr_query = mr_query.order_by(text("{} {}".format(order_by, order)))
    fp_query = fp_query.order_by(text("{} {}".format(order_by, order)))

    mr_query = mr_query.alias("mr")
    fp_query = fp_query.alias("fp")

    j = fp_query.join(
        mr_query,
        sql.text("fp.input = mr.input AND fp.report_id = mr.report_id"),
        full=True,
    )

    def coal(colname):
        return func.coalesce(
            literal_column(f"fp.{colname}"), literal_column(f"mr.{colname}")
        ).label(colname)

    # Merge data from mr_table and fastpath.
    # Most of the time we prefer data from fastpath, using coal().
    # For measurement_id, we prefer mr_table. See test_list_measurements_shared
    merger = [
        coal("test_start_time"),
        coal("measurement_start_time"),
        func.coalesce(
            literal_column("mr.measurement_id"), literal_column("fp.measurement_id")
        ).label("measurement_id"),
        func.coalesce(literal_column("mr.m_report_no"), 0).label("m_report_no"),
        coal("anomaly"),
        coal("confirmed"),
        coal("failure"),
        func.coalesce(literal_column("fp.scores"), "{}").label("scores"),
        column("exc"),
        func.coalesce(literal_column("mr.residual_no"), 0).label("residual_no"),
        coal("report_id"),
        coal("probe_cc"),
        coal("probe_asn"),
        coal("test_name"),
        coal("input"),
    ]
    # Assemble the "external" query. Run a final order by followed by limit and
    # offset
    fob = text("{} {}".format(order_by, order))
    query = select(merger).select_from(j).order_by(fob).offset(offset).limit(limit)

    with sentry.configure_scope() as scope:
        # Set query (without params) in Sentry scope for the rest of the API call
        # https://github.com/getsentry/sentry-python/issues/184
        scope.set_extra("sql_query", query)

    # Run the query, generate the results list
    iter_start_time = time.time()

    try:
        q = current_app.db_session.execute(query, query_params)
        tmpresults = []
        for row in q:
            url = urljoin(
                current_app.config["BASE_URL"],
                "/api/v1/measurement/%s" % row.measurement_id,
            )
            tmpresults.append(
                {
                    "measurement_url": url,
                    "measurement_id": row.measurement_id,
                    "report_id": row.report_id,
                    "probe_cc": row.probe_cc,
                    "probe_asn": "AS{}".format(row.probe_asn),
                    "test_name": row.test_name,
                    "measurement_start_time": row.measurement_start_time,
                    "input": row.input,
                    "anomaly": row.anomaly,
                    "confirmed": row.confirmed,
                    "failure": row.failure,
                    "scores": json.loads(row.scores),
                }
            )
    except OperationalError as exc:
        if isinstance(exc.orig, QueryCanceledError):
            # Timeout due to a slow query. Generate metric and do not feed it
            # to Sentry.
            abort(504)

        raise exc

    # For each report_id / input tuple, we want at most one entry. Measurements
    # from mr_table and fastpath has already been merged by the FULL OUTER JOIN
    # but we have duplicate msmts sharing the same report_id / input.
    results = _merge_results(tmpresults)

    # Replace the special value INULL for "input" with None
    for i, r in enumerate(results):
        if r["input"] == INULL:
            results[i]["input"] = None

    pages = -1
    count = -1
    current_page = math.ceil(offset / limit) + 1

    # We got less results than what we expected, we know the count and that we are done
    if len(results) < limit:
        count = offset + len(results)
        pages = math.ceil(count / limit)
        next_url = None
    else:
        # XXX this is too intensive. find a workaround
        # count_start_time = time.time()
        # count = q.count()
        # pages = math.ceil(count / limit)
        # current_page = math.ceil(offset / limit) + 1
        # query_time += time.time() - count_start_time
        next_args = request.args.to_dict()
        next_args["offset"] = "%s" % (offset + limit)
        next_args["limit"] = "%s" % limit
        next_url = urljoin(
            current_app.config["BASE_URL"],
            "/api/v1/measurements?%s" % urlencode(next_args),
        )

    query_time = time.time() - iter_start_time
    metadata = {
        "offset": offset,
        "limit": limit,
        "count": count,
        "pages": pages,
        "current_page": current_page,
        "next_url": next_url,
        "query_time": query_time,
    }

    return jsonify({"metadata": metadata, "results": results[:limit]})
Esempio n. 33
0
 def coal(colname):
     return func.coalesce(
         literal_column(f"fp.{colname}"), literal_column(f"mr.{colname}")
     ).label(colname)
Esempio n. 34
0
 async def update(cls, conn: SAConn, col: dict, col_upd: dict) -> bool:
     key = list(col.keys())[0]
     query = cls.__table__.update().where(
         literal_column(key) == col[key]).values(**col_upd)
     await conn.execute(query)
     return True
Esempio n. 35
0
    def get_sqla_query(  # sqla
            self,
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            timeseries_limit_metric=None,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            orderby=None,
            extras=None,
            columns=None):
        """Querying any sqla table from this common interface"""

        template_kwargs = {
            'from_dttm': from_dttm,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'to_dttm': to_dttm,
        }
        template_processor = self.get_template_processor(**template_kwargs)

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        cols = {col.column_name: col for col in self.columns}
        metrics_dict = {m.metric_name: m for m in self.metrics}

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))
        for m in metrics:
            if m not in metrics_dict:
                raise Exception(_("Metric '{}' is not valid".format(m)))
        metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics]
        timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric)
        timeseries_limit_metric_expr = None
        if timeseries_limit_metric:
            timeseries_limit_metric_expr = \
                timeseries_limit_metric.sqla_col
        if metrics:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr = literal_column("COUNT(*)").label("ccount")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                outer = col.sqla_col
                inner = col.sqla_col.label(col.column_name + '__')

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(cols[s].sqla_col)
            metrics_exprs = []

        if granularity:

            @compiles(ColumnClause)
            def visit_column(element, compiler, **kw):
                """Patch for sqlalchemy bug

                TODO: sqlalchemy 1.2 release should be doing this on its own.
                Patch only if the column clause is specific for DateTime
                set and granularity is selected.
                """
                text = compiler.visit_column(element, **kw)
                try:
                    if (element.is_literal
                            and hasattr(element.type, 'python_type')
                            and type(element.type) is DateTime):
                        text = text.replace('%%', '%')
                except NotImplementedError:
                    # Some elements raise NotImplementedError for python_type
                    pass
                return text

            dttm_col = cols[granularity]
            time_grain = extras.get('time_grain_sqla')

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            time_filter = dttm_col.get_time_filter(from_dttm, to_dttm)

        select_exprs += metrics_exprs
        qry = sa.select(select_exprs)

        # Supporting arbitrary SQL statements in place of tables
        if self.sql:
            from_sql = template_processor.process_template(self.sql)
            tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry')
        else:
            tbl = self.get_sqla_table()

        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for flt in filter:
            if not all([flt.get(s) for s in ['col', 'op', 'val']]):
                continue
            col = flt['col']
            op = flt['op']
            eq = flt['val']
            col_obj = cols.get(col)
            if col_obj:
                if op in ('in', 'not in'):
                    values = [types.strip("'").strip('"') for types in eq]
                    if col_obj.is_num:
                        values = [utils.js_string_to_num(s) for s in values]
                    cond = col_obj.sqla_col.in_(values)
                    if op == 'not in':
                        cond = ~cond
                    where_clause_and.append(cond)
                elif op == '==':
                    where_clause_and.append(col_obj.sqla_col == eq)
                elif op == '!=':
                    where_clause_and.append(col_obj.sqla_col != eq)
                elif op == '>':
                    where_clause_and.append(col_obj.sqla_col > eq)
                elif op == '<':
                    where_clause_and.append(col_obj.sqla_col < eq)
                elif op == '>=':
                    where_clause_and.append(col_obj.sqla_col >= eq)
                elif op == '<=':
                    where_clause_and.append(col_obj.sqla_col <= eq)
                elif op == 'LIKE':
                    where_clause_and.append(col_obj.sqla_col.like(eq))
        if extras:
            where = extras.get('where')
            if where:
                where = template_processor.process_template(where)
                where_clause_and += [sa.text('({})'.format(where))]
            having = extras.get('having')
            if having:
                having = template_processor.process_template(having)
                having_clause_and += [sa.text('({})'.format(having))]
        if granularity:
            qry = qry.where(and_(*([time_filter] + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))
        if groupby:
            qry = qry.order_by(desc(main_metric_expr))
        elif orderby:
            for col, ascending in orderby:
                direction = asc if ascending else desc
                qry = qry.order_by(direction(col))

        qry = qry.limit(row_limit)

        if is_timeseries and timeseries_limit and groupby:
            # some sql dialects require for order by expressions
            # to also be in the select clause -- others, e.g. vertica,
            # require a unique inner alias
            inner_main_metric_expr = main_metric_expr.label('mme_inner__')
            inner_select_exprs += [inner_main_metric_expr]
            subq = select(inner_select_exprs)
            subq = subq.select_from(tbl)
            inner_time_filter = dttm_col.get_time_filter(
                inner_from_dttm or from_dttm,
                inner_to_dttm or to_dttm,
            )
            subq = subq.where(and_(*(where_clause_and + [inner_time_filter])))
            subq = subq.group_by(*inner_groupby_exprs)
            ob = inner_main_metric_expr
            if timeseries_limit_metric_expr is not None:
                ob = timeseries_limit_metric_expr
            subq = subq.order_by(desc(ob))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(groupby_exprs[i] == column(gb + '__'))

            tbl = tbl.join(subq.alias(), and_(*on_clause))

        return qry.select_from(tbl)
Esempio n. 36
0
 async def delete(cls, conn: SAConn, col: dict) -> bool:
     key = list(col.keys())[0]
     query = cls.__table__.delete().where(literal_column(key) == col[key])
     await conn.execute(query)
     return True
Esempio n. 37
0
    def visit_select(self, select, **kwargs):
        """Look for ``LIMIT`` and OFFSET in a select statement, and if
        so tries to wrap it in a subquery with ``rownum`` criterion.
        """

        if not getattr(select, '_oracle_visit', None):
            if not self.dialect.use_ansi:
                if self.stack and 'from' in self.stack[-1]:
                    existingfroms = self.stack[-1]['from']
                else:
                    existingfroms = None

                froms = select._get_display_froms(existingfroms)
                whereclause = self._get_nonansi_join_whereclause(froms)
                if whereclause:
                    select = select.where(whereclause)
                    select._oracle_visit = True

            if select._limit is not None or select._offset is not None:
                # See http://www.oracle.com/technology/oramag/oracle/06-sep/o56asktom.html
                #
                # Generalized form of an Oracle pagination query:
                #   select ... from (
                #     select /*+ FIRST_ROWS(N) */ ...., rownum as ora_rn from (
                #         select distinct ... where ... order by ...
                #     ) where ROWNUM <= :limit+:offset
                #   ) where ora_rn > :offset
                # Outer select and "ROWNUM as ora_rn" can be dropped if limit=0

                # TODO: use annotations instead of clone + attr set ?
                select = select._generate()
                select._oracle_visit = True

                # Wrap the middle select and add the hint
                limitselect = sql.select([c for c in select.c])
                if select._limit and self.dialect.optimize_limits:
                    limitselect = limitselect.prefix_with("/*+ FIRST_ROWS(%d) */" % select._limit)

                limitselect._oracle_visit = True
                limitselect._is_wrapper = True

                # If needed, add the limiting clause
                if select._limit is not None:
                    max_row = select._limit
                    if select._offset is not None:
                        max_row += select._offset
                    limitselect.append_whereclause(
                            sql.literal_column("ROWNUM")<=max_row)
 
                # If needed, add the ora_rn, and wrap again with offset.
                if select._offset is None:
                    select = limitselect
                else:
                     limitselect = limitselect.column(
                             sql.literal_column("ROWNUM").label("ora_rn"))
                     limitselect._oracle_visit = True
                     limitselect._is_wrapper = True
 
                     offsetselect = sql.select(
                             [c for c in limitselect.c if c.key!='ora_rn'])
                     offsetselect._oracle_visit = True
                     offsetselect._is_wrapper = True
 
                     offsetselect.append_whereclause(
                             sql.literal_column("ora_rn")>select._offset)
 
                     select = offsetselect

        kwargs['iswrapper'] = getattr(select, '_is_wrapper', False)
        return compiler.DefaultCompiler.visit_select(self, select, **kwargs)
Esempio n. 38
0
    def get_sqla_query(  # sqla
            self,
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            timeseries_limit_metric=None,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            orderby=None,
            extras=None,
            columns=None,
            form_data=None):
        """Querying any sqla table from this common interface"""

        template_kwargs = {
            'from_dttm': from_dttm,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'to_dttm': to_dttm,
            'form_data': form_data,
        }
        template_processor = self.get_template_processor(**template_kwargs)
        db_engine_spec = self.database.db_engine_spec

        if DTTM_ALIAS in groupby:
            groupby.remove(DTTM_ALIAS)
            is_timeseries = True

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        # Database spec supports join-free timeslot grouping
        time_groupby_inline = db_engine_spec.time_groupby_inline

        cols = {col.column_name: col for col in self.columns}
        metrics_dict = {m.metric_name: m for m in self.metrics}

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))
        for m in metrics:
            if m not in metrics_dict:
                raise Exception(_("Metric '{}' is not valid".format(m)))
        metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics]
        timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric)
        timeseries_limit_metric_expr = None
        if timeseries_limit_metric:
            timeseries_limit_metric_expr = \
                timeseries_limit_metric.sqla_col
        if metrics_exprs:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr = literal_column("COUNT(*)").label("ccount")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                outer = col.sqla_col
                inner = col.sqla_col.label(col.column_name + '__')

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(cols[s].sqla_col)
            metrics_exprs = []

        if granularity:
            dttm_col = cols[granularity]
            time_grain = extras.get('time_grain_sqla')
            time_filters = []

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            # Use main dttm column to support index with secondary dttm columns
            if db_engine_spec.time_secondary_columns and \
                    self.main_dttm_col in self.dttm_cols and \
                    self.main_dttm_col != dttm_col.column_name:
                time_filters.append(cols[self.main_dttm_col].get_time_filter(
                    from_dttm, to_dttm))
            time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm))

        select_exprs += metrics_exprs
        qry = sa.select(select_exprs)

        tbl = self.get_from_clause(template_processor, db_engine_spec)

        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for flt in filter:
            if not all([flt.get(s) for s in ['col', 'op', 'val']]):
                continue
            col = flt['col']
            op = flt['op']
            eq = flt['val']
            col_obj = cols.get(col)
            if col_obj:
                if op in ('in', 'not in'):
                    values = []
                    for v in eq:
                        # For backwards compatibility and edge cases
                        # where a column data type might have changed
                        if isinstance(v, basestring):
                            v = v.strip("'").strip('"')
                            if col_obj.is_num:
                                v = utils.string_to_num(v)

                        # Removing empty strings and non numeric values
                        # targeting numeric columns
                        if v is not None:
                            values.append(v)
                    cond = col_obj.sqla_col.in_(values)
                    if op == 'not in':
                        cond = ~cond
                    where_clause_and.append(cond)
                else:
                    if col_obj.is_num:
                        eq = utils.string_to_num(flt['val'])
                    if op == '==':
                        where_clause_and.append(col_obj.sqla_col == eq)
                    elif op == '!=':
                        where_clause_and.append(col_obj.sqla_col != eq)
                    elif op == '>':
                        where_clause_and.append(col_obj.sqla_col > eq)
                    elif op == '<':
                        where_clause_and.append(col_obj.sqla_col < eq)
                    elif op == '>=':
                        where_clause_and.append(col_obj.sqla_col >= eq)
                    elif op == '<=':
                        where_clause_and.append(col_obj.sqla_col <= eq)
                    elif op == 'LIKE':
                        where_clause_and.append(col_obj.sqla_col.like(eq))
        if extras:
            where = extras.get('where')
            if where:
                where = template_processor.process_template(where)
                where_clause_and += [sa.text('({})'.format(where))]
            having = extras.get('having')
            if having:
                having = template_processor.process_template(having)
                having_clause_and += [sa.text('({})'.format(having))]
        if granularity:
            qry = qry.where(and_(*(time_filters + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))
        if groupby:
            qry = qry.order_by(desc(main_metric_expr))
        elif orderby:
            for col, ascending in orderby:
                direction = asc if ascending else desc
                qry = qry.order_by(direction(col))

        if row_limit:
            qry = qry.limit(row_limit)

        if is_timeseries and \
                timeseries_limit and groupby and not time_groupby_inline:
            # some sql dialects require for order by expressions
            # to also be in the select clause -- others, e.g. vertica,
            # require a unique inner alias
            inner_main_metric_expr = main_metric_expr.label('mme_inner__')
            inner_select_exprs += [inner_main_metric_expr]
            subq = select(inner_select_exprs)
            subq = subq.select_from(tbl)
            inner_time_filter = dttm_col.get_time_filter(
                inner_from_dttm or from_dttm,
                inner_to_dttm or to_dttm,
            )
            subq = subq.where(and_(*(where_clause_and + [inner_time_filter])))
            subq = subq.group_by(*inner_groupby_exprs)
            ob = inner_main_metric_expr
            if timeseries_limit_metric_expr is not None:
                ob = timeseries_limit_metric_expr
            subq = subq.order_by(desc(ob))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(groupby_exprs[i] == column(gb + '__'))

            tbl = tbl.join(subq.alias(), and_(*on_clause))

        return qry.select_from(tbl)
Esempio n. 39
0
def airspace_image(cache_key, airspace_id):
    if not mapscript_available:
        abort(404)

    # get information from cache...
    infringements = cache.get('upload_airspace_infringements_' + cache_key)
    flight_path = cache.get('upload_airspace_flight_path_' + cache_key)

    # abort if invalid cache key
    if not infringements \
       or not flight_path:
        abort(404)

    # Convert the coordinate into a list of tuples
    coordinates = [(c.location['longitude'], c.location['latitude']) for c in flight_path]
    # Create a shapely LineString object from the coordinates
    linestring = LineString(coordinates)
    # Save the new path as WKB
    locations = from_shape(linestring, srid=4326)

    highlight_locations = []
    extent_epsg4326 = [180, 85.05112878, -180, -85.05112878]

    for period in infringements[airspace_id]:
        # Convert the coordinate into a list of tuples
        coordinates = [(c['location']['longitude'], c['location']['latitude']) for c in period]

        # Create a shapely LineString object from the coordinates
        if len(coordinates) == 1:
            # a LineString must contain at least two points...
            linestring = LineString([coordinates[0], coordinates[0]])
        else:
            linestring = LineString(coordinates)

        highlight_locations.append(linestring)

        # gather extent
        (minx, miny, maxx, maxy) = linestring.bounds

        extent_epsg4326[0] = min(extent_epsg4326[0], minx)
        extent_epsg4326[1] = min(extent_epsg4326[1], miny)
        extent_epsg4326[2] = max(extent_epsg4326[2], maxx)
        extent_epsg4326[3] = max(extent_epsg4326[3], maxy)

    # Save the new path as WKB
    highlight_multilinestring = from_shape(MultiLineString(highlight_locations), srid=4326)

    # increase extent by factor 1.05
    width = abs(extent_epsg4326[0] - extent_epsg4326[2])
    height = abs(extent_epsg4326[1] - extent_epsg4326[3])

    center_x = (extent_epsg4326[0] + extent_epsg4326[2]) / 2
    center_y = (extent_epsg4326[1] + extent_epsg4326[3]) / 2

    extent_epsg4326[0] = center_x - width / 2 * 1.05
    extent_epsg4326[1] = center_y - height / 2 * 1.05
    extent_epsg4326[2] = center_x + width / 2 * 1.05
    extent_epsg4326[3] = center_y + height / 2 * 1.05

    # minimum extent should be 0.3 deg
    width = abs(extent_epsg4326[0] - extent_epsg4326[2])
    height = abs(extent_epsg4326[1] - extent_epsg4326[3])

    if width < 0.3:
        extent_epsg4326[0] = center_x - 0.15
        extent_epsg4326[2] = center_x + 0.15

    if height < 0.3:
        extent_epsg4326[1] = center_y - 0.15
        extent_epsg4326[3] = center_y + 0.15

    # convert extent from EPSG4326 to EPSG3857
    epsg4326 = pyproj.Proj(init='epsg:4326')
    epsg3857 = pyproj.Proj(init='epsg:3857')

    x1, y1 = pyproj.transform(epsg4326, epsg3857, extent_epsg4326[0], extent_epsg4326[1])
    x2, y2 = pyproj.transform(epsg4326, epsg3857, extent_epsg4326[2], extent_epsg4326[3])

    extent_epsg3857 = [x1, y1, x2, y2]

    # load basemap and set size + extent
    basemap_path = os.path.join(current_app.config.get('SKYLINES_MAPSERVER_PATH'), 'basemap.map')
    map_object = mapscript.mapObj(basemap_path)
    map_object.setSize(400, 400)
    map_object.setExtent(extent_epsg3857[0], extent_epsg3857[1], extent_epsg3857[2], extent_epsg3857[3])

    # enable airspace and airports layers
    num_layers = map_object.numlayers
    for i in range(num_layers):
        layer = map_object.getLayer(i)

        if layer.group == 'Airports':
            layer.status = mapscript.MS_ON

        if layer.group == 'Airspace':
            layer.status = mapscript.MS_ON

    # get flights layer
    flights_layer = map_object.getLayerByName('Flights')
    highlight_layer = map_object.getLayerByName('Flights_Highlight')

    # set sql query for blue flight
    one = literal_column('1 as flight_id')
    flight_query = db.session.query(locations.label('flight_geometry'), one)

    flights_layer.data = 'flight_geometry FROM (' + query_to_sql(flight_query) + ')' + \
                         ' AS foo USING UNIQUE flight_id USING SRID=4326'

    # set sql query for highlighted linestrings
    highlighted_query = db.session.query(highlight_multilinestring.label('flight_geometry'), one)

    highlight_layer.data = 'flight_geometry FROM (' + query_to_sql(highlighted_query) + ')' + \
                           ' AS foo USING UNIQUE flight_id USING SRID=4326'

    highlight_layer.status = mapscript.MS_ON

    # get osm layer and set WMS url
    osm_layer = map_object.getLayerByName('OSM')
    osm_layer.connection = current_app.config.get('SKYLINES_MAP_TILE_URL') + \
        '/service?'

    # draw map
    map_image = map_object.draw()

    # get image
    mapscript.msIO_installStdoutToBuffer()
    map_image.write()
    content = mapscript.msIO_getStdoutBufferBytes()

    # return to client
    resp = make_response(content)
    resp.headers['Content-type'] = map_image.format.mimetype
    return resp
Esempio n. 40
0
 def get_lastrowid(self):
     s = sql.select([sql.literal_column('IDENTITY_VAL_LOCAL()')])
     return self.connection.scalar(s)
Esempio n. 41
0
    def get_sqla_query(  # sqla
        self,
        groupby,
        metrics,
        granularity,
        from_dttm,
        to_dttm,
        filter=None,
        is_timeseries=True,
        timeseries_limit=15,
        timeseries_limit_metric=None,
        row_limit=None,
        inner_from_dttm=None,
        inner_to_dttm=None,
        orderby=None,
        extras=None,
        columns=None,
        order_desc=True,
    ) -> SqlaQuery:
        """Querying any sqla table from this common interface"""
        template_kwargs = {
            "from_dttm": from_dttm,
            "groupby": groupby,
            "metrics": metrics,
            "row_limit": row_limit,
            "to_dttm": to_dttm,
            "filter": filter,
            "columns": {col.column_name: col
                        for col in self.columns},
        }
        template_kwargs.update(self.template_params_dict)
        extra_cache_keys: List[Any] = []
        template_kwargs["extra_cache_keys"] = extra_cache_keys
        template_processor = self.get_template_processor(**template_kwargs)
        db_engine_spec = self.database.db_engine_spec
        prequeries: List[str] = []

        orderby = orderby or []

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        # Database spec supports join-free timeslot grouping
        time_groupby_inline = db_engine_spec.time_groupby_inline

        cols: Dict[str,
                   Column] = {col.column_name: col
                              for col in self.columns}
        metrics_dict: Dict[str, SqlMetric] = {
            m.metric_name: m
            for m in self.metrics
        }

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))
        if not groupby and not metrics and not columns:
            raise Exception(_("Empty query?"))
        metrics_exprs = []
        for m in metrics:
            if utils.is_adhoc_metric(m):
                metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols))
            elif m in metrics_dict:
                metrics_exprs.append(metrics_dict[m].get_sqla_col())
            else:
                raise Exception(
                    _("Metric '%(metric)s' does not exist", metric=m))
        if metrics_exprs:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr, label = literal_column("COUNT(*)"), "ccount"
            main_metric_expr = self.make_sqla_column_compatible(
                main_metric_expr, label)

        select_exprs: List[Column] = []
        groupby_exprs_sans_timestamp: OrderedDict = OrderedDict()

        if groupby:
            select_exprs = []
            for s in groupby:
                if s in cols:
                    outer = cols[s].get_sqla_col()
                else:
                    outer = literal_column(f"({s})")
                    outer = self.make_sqla_column_compatible(outer, s)

                groupby_exprs_sans_timestamp[outer.name] = outer
                select_exprs.append(outer)
        elif columns:
            for s in columns:
                select_exprs.append(
                    cols[s].get_sqla_col() if s in cols else self.
                    make_sqla_column_compatible(literal_column(s)))
            metrics_exprs = []

        groupby_exprs_with_timestamp = OrderedDict(
            groupby_exprs_sans_timestamp.items())
        if granularity:
            dttm_col = cols[granularity]
            time_grain = extras.get("time_grain_sqla")
            time_filters = []

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs_with_timestamp[timestamp.name] = timestamp

            # Use main dttm column to support index with secondary dttm columns
            if (db_engine_spec.time_secondary_columns
                    and self.main_dttm_col in self.dttm_cols
                    and self.main_dttm_col != dttm_col.column_name):
                time_filters.append(cols[self.main_dttm_col].get_time_filter(
                    from_dttm, to_dttm))
            time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm))

        select_exprs += metrics_exprs

        labels_expected = [c._df_label_expected for c in select_exprs]

        select_exprs = db_engine_spec.make_select_compatible(
            groupby_exprs_with_timestamp.values(), select_exprs)
        qry = sa.select(select_exprs)

        tbl = self.get_from_clause(template_processor)

        if not columns:
            qry = qry.group_by(*groupby_exprs_with_timestamp.values())

        where_clause_and = []
        having_clause_and: List = []
        for flt in filter:
            if not all([flt.get(s) for s in ["col", "op"]]):
                continue
            col = flt["col"]
            op = flt["op"]
            col_obj = cols.get(col)
            if col_obj:
                is_list_target = op in ("in", "not in")
                eq = self.filter_values_handler(
                    flt.get("val"),
                    target_column_is_numeric=col_obj.is_num,
                    is_list_target=is_list_target,
                )
                if op in ("in", "not in"):
                    cond = col_obj.get_sqla_col().in_(eq)
                    if "<NULL>" in eq:
                        cond = or_(cond, col_obj.get_sqla_col() == None)
                    if op == "not in":
                        cond = ~cond
                    where_clause_and.append(cond)
                else:
                    if col_obj.is_num:
                        eq = utils.string_to_num(flt["val"])
                    if op == "==":
                        where_clause_and.append(col_obj.get_sqla_col() == eq)
                    elif op == "!=":
                        where_clause_and.append(col_obj.get_sqla_col() != eq)
                    elif op == ">":
                        where_clause_and.append(col_obj.get_sqla_col() > eq)
                    elif op == "<":
                        where_clause_and.append(col_obj.get_sqla_col() < eq)
                    elif op == ">=":
                        where_clause_and.append(col_obj.get_sqla_col() >= eq)
                    elif op == "<=":
                        where_clause_and.append(col_obj.get_sqla_col() <= eq)
                    elif op == "LIKE":
                        where_clause_and.append(
                            col_obj.get_sqla_col().like(eq))
                    elif op == "IS NULL":
                        where_clause_and.append(col_obj.get_sqla_col() == None)
                    elif op == "IS NOT NULL":
                        where_clause_and.append(col_obj.get_sqla_col() != None)
        if extras:
            where = extras.get("where")
            if where:
                where = template_processor.process_template(where)
                where_clause_and += [sa.text("({})".format(where))]
            having = extras.get("having")
            if having:
                having = template_processor.process_template(having)
                having_clause_and += [sa.text("({})".format(having))]
        if granularity:
            qry = qry.where(and_(*(time_filters + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))

        if not orderby and not columns:
            orderby = [(main_metric_expr, not order_desc)]

        for col, ascending in orderby:
            direction = asc if ascending else desc
            if utils.is_adhoc_metric(col):
                col = self.adhoc_metric_to_sqla(col, cols)
            elif col in cols:
                col = cols[col].get_sqla_col()
            qry = qry.order_by(direction(col))

        if row_limit:
            qry = qry.limit(row_limit)

        if is_timeseries and timeseries_limit and groupby and not time_groupby_inline:
            if self.database.db_engine_spec.allows_joins:
                # some sql dialects require for order by expressions
                # to also be in the select clause -- others, e.g. vertica,
                # require a unique inner alias
                inner_main_metric_expr = self.make_sqla_column_compatible(
                    main_metric_expr, "mme_inner__")
                inner_groupby_exprs = []
                inner_select_exprs = []
                for gby_name, gby_obj in groupby_exprs_sans_timestamp.items():
                    inner = self.make_sqla_column_compatible(
                        gby_obj, gby_name + "__")
                    inner_groupby_exprs.append(inner)
                    inner_select_exprs.append(inner)

                inner_select_exprs += [inner_main_metric_expr]
                subq = select(inner_select_exprs).select_from(tbl)
                inner_time_filter = dttm_col.get_time_filter(
                    inner_from_dttm or from_dttm, inner_to_dttm or to_dttm)
                subq = subq.where(
                    and_(*(where_clause_and + [inner_time_filter])))
                subq = subq.group_by(*inner_groupby_exprs)

                ob = inner_main_metric_expr
                if timeseries_limit_metric:
                    ob = self._get_timeseries_orderby(timeseries_limit_metric,
                                                      metrics_dict, cols)
                direction = desc if order_desc else asc
                subq = subq.order_by(direction(ob))
                subq = subq.limit(timeseries_limit)

                on_clause = []
                for gby_name, gby_obj in groupby_exprs_sans_timestamp.items():
                    # in this case the column name, not the alias, needs to be
                    # conditionally mutated, as it refers to the column alias in
                    # the inner query
                    col_name = db_engine_spec.make_label_compatible(gby_name +
                                                                    "__")
                    on_clause.append(gby_obj == column(col_name))

                tbl = tbl.join(subq.alias(), and_(*on_clause))
            else:
                if timeseries_limit_metric:
                    orderby = [(
                        self._get_timeseries_orderby(timeseries_limit_metric,
                                                     metrics_dict, cols),
                        False,
                    )]

                # run prequery to get top groups
                prequery_obj = {
                    "is_timeseries": False,
                    "row_limit": timeseries_limit,
                    "groupby": groupby,
                    "metrics": metrics,
                    "granularity": granularity,
                    "from_dttm": inner_from_dttm or from_dttm,
                    "to_dttm": inner_to_dttm or to_dttm,
                    "filter": filter,
                    "orderby": orderby,
                    "extras": extras,
                    "columns": columns,
                    "order_desc": True,
                }
                result = self.query(prequery_obj)
                prequeries.append(result.query)
                dimensions = [
                    c for c in result.df.columns
                    if c not in metrics and c in groupby_exprs_sans_timestamp
                ]
                top_groups = self._get_top_groups(
                    result.df, dimensions, groupby_exprs_sans_timestamp)
                qry = qry.where(top_groups)

        return SqlaQuery(
            extra_cache_keys=extra_cache_keys,
            labels_expected=labels_expected,
            sqla_query=qry.select_from(tbl),
            prequeries=prequeries,
        )
Esempio n. 42
0
def do_analyse():
    print("\r\n[do_analyse...]")
    min_to_end_stmnt = static_assessment.maximum_question_length // letters_per_min
    created_ago = datetime.datetime.now() - datetime.timedelta(
        minutes=min_to_end_stmnt)

    session = db_session()

    stmnts = session.query(Statement.id, Statement.channel_id, Statement.user_id, Statement.first_msg_id, Statement.last_msg_id).\
        filter(and_(Statement.created<created_ago, Statement.was_processed==False)).distinct().all()

    if stmnts is None or len(stmnts) == 0:
        print("[do_analyse] nothing to process.")
        return
    else:
        print("[do_analyse] to process: ", len(stmnts))

    pairs = dict()
    for stmnt in stmnts:
        stmnt_id, channel_id, user_id, first_id, last_id = stmnt
        message_text = session.query(func.string_agg(TelegramTextMessage.message,
                    aggregate_order_by(literal_column("'. '"),
                            TelegramTextMessage.created))).\
                filter(and_(TelegramTextMessage.channel_id==channel_id, TelegramTextMessage.user_id==user_id)).\
                filter(TelegramTextMessage.message_id.between(first_id, last_id)).\
                distinct().\
                all()
        pairs[stmnt_id] = message_text
    session.close()

    questions = list()
    not_question = list()
    for stmnt_id, message in pairs.items():
        if len(message) == 0:
            print("[Message len error]")
            not_question.append(stmnt_id)
            continue

        is_question = analyser.validate(''.join(message[0]))
        if is_question:
            questions.append(stmnt_id)
        else:
            not_question.append(stmnt_id)

    session = db_session()

    if len(questions) > 0:
        print("[do_analyse] questions found: ", len(questions))
        update_query = Statement.__table__.update().values(is_question=True, was_processed=True).\
            where(Statement.id.in_(questions))
        session.execute(update_query)

    if len(not_question) > 0:
        print("[do_analyse] not questions: ", len(not_question))
        update_query_2 = Statement.__table__.update().values(is_question=False, was_processed=True).\
            where(Statement.id.in_(not_question))
        session.execute(update_query_2)

    session.commit()
    session.close()
    print("[do_analyse] done.")
Esempio n. 43
0
    def get_sqla_query(  # sqla
            self,
            groupby, metrics,
            granularity,
            from_dttm, to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            timeseries_limit_metric=None,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            orderby=None,
            extras=None,
            columns=None,
            order_desc=True,
            prequeries=None,
            is_prequery=False,
        ):
        """Querying any sqla table from this common interface"""
        template_kwargs = {
            'from_dttm': from_dttm,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'to_dttm': to_dttm,
            'filter': filter,
            'columns': {col.column_name: col for col in self.columns},
        }
        template_kwargs.update(self.template_params_dict)
        template_processor = self.get_template_processor(**template_kwargs)
        db_engine_spec = self.database.db_engine_spec

        orderby = orderby or []

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        # Database spec supports join-free timeslot grouping
        time_groupby_inline = db_engine_spec.time_groupby_inline

        cols = {col.column_name: col for col in self.columns}
        metrics_dict = {m.metric_name: m for m in self.metrics}

        if not granularity and is_timeseries:
            raise Exception(_(
                'Datetime column not provided as part table configuration '
                'and is required by this type of chart'))
        if not groupby and not metrics and not columns:
            raise Exception(_('Empty query?'))
        metrics_exprs = []
        for m in metrics:
            if utils.is_adhoc_metric(m):
                metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols))
            elif m in metrics_dict:
                metrics_exprs.append(metrics_dict.get(m).get_sqla_col())
            else:
                raise Exception(_("Metric '{}' is not valid".format(m)))
        if metrics_exprs:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr = literal_column('COUNT(*)').label(
                db_engine_spec.make_label_compatible('count'))

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                outer = col.get_sqla_col()
                inner = col.get_sqla_col(col.column_name + '__')

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(cols[s].get_sqla_col())
            metrics_exprs = []

        if granularity:
            dttm_col = cols[granularity]
            time_grain = extras.get('time_grain_sqla')
            time_filters = []

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            # Use main dttm column to support index with secondary dttm columns
            if db_engine_spec.time_secondary_columns and \
                    self.main_dttm_col in self.dttm_cols and \
                    self.main_dttm_col != dttm_col.column_name:
                time_filters.append(cols[self.main_dttm_col].
                                    get_time_filter(from_dttm, to_dttm))
            time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm))

        select_exprs += metrics_exprs
        qry = sa.select(select_exprs)

        tbl = self.get_from_clause(template_processor)

        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for flt in filter:
            if not all([flt.get(s) for s in ['col', 'op']]):
                continue
            col = flt['col']
            op = flt['op']
            col_obj = cols.get(col)
            if col_obj:
                is_list_target = op in ('in', 'not in')
                eq = self.filter_values_handler(
                    flt.get('val'),
                    target_column_is_numeric=col_obj.is_num,
                    is_list_target=is_list_target)
                if op in ('in', 'not in'):
                    cond = col_obj.get_sqla_col().in_(eq)
                    if '<NULL>' in eq:
                        cond = or_(cond, col_obj.get_sqla_col() == None)  # noqa
                    if op == 'not in':
                        cond = ~cond
                    where_clause_and.append(cond)
                else:
                    if col_obj.is_num:
                        eq = utils.string_to_num(flt['val'])
                    if op == '==':
                        where_clause_and.append(col_obj.get_sqla_col() == eq)
                    elif op == '!=':
                        where_clause_and.append(col_obj.get_sqla_col() != eq)
                    elif op == '>':
                        where_clause_and.append(col_obj.get_sqla_col() > eq)
                    elif op == '<':
                        where_clause_and.append(col_obj.get_sqla_col() < eq)
                    elif op == '>=':
                        where_clause_and.append(col_obj.get_sqla_col() >= eq)
                    elif op == '<=':
                        where_clause_and.append(col_obj.get_sqla_col() <= eq)
                    elif op == 'LIKE':
                        where_clause_and.append(col_obj.get_sqla_col().like(eq))
                    elif op == 'IS NULL':
                        where_clause_and.append(col_obj.get_sqla_col() == None)  # noqa
                    elif op == 'IS NOT NULL':
                        where_clause_and.append(
                            col_obj.get_sqla_col() != None)  # noqa
        if extras:
            where = extras.get('where')
            if where:
                where = template_processor.process_template(where)
                where_clause_and += [sa.text('({})'.format(where))]
            having = extras.get('having')
            if having:
                having = template_processor.process_template(having)
                having_clause_and += [sa.text('({})'.format(having))]
        if granularity:
            qry = qry.where(and_(*(time_filters + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))

        if not orderby and not columns:
            orderby = [(main_metric_expr, not order_desc)]

        for col, ascending in orderby:
            direction = asc if ascending else desc
            if utils.is_adhoc_metric(col):
                col = self.adhoc_metric_to_sqla(col, cols)
            qry = qry.order_by(direction(col))

        if row_limit:
            qry = qry.limit(row_limit)

        if is_timeseries and \
                timeseries_limit and groupby and not time_groupby_inline:
            if self.database.db_engine_spec.inner_joins:
                # some sql dialects require for order by expressions
                # to also be in the select clause -- others, e.g. vertica,
                # require a unique inner alias
                inner_main_metric_expr = main_metric_expr.label('mme_inner__')
                inner_select_exprs += [inner_main_metric_expr]
                subq = select(inner_select_exprs)
                subq = subq.select_from(tbl)
                inner_time_filter = dttm_col.get_time_filter(
                    inner_from_dttm or from_dttm,
                    inner_to_dttm or to_dttm,
                )
                subq = subq.where(and_(*(where_clause_and + [inner_time_filter])))
                subq = subq.group_by(*inner_groupby_exprs)

                ob = inner_main_metric_expr
                if timeseries_limit_metric:
                    if utils.is_adhoc_metric(timeseries_limit_metric):
                        ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols)
                    elif timeseries_limit_metric in metrics_dict:
                        timeseries_limit_metric = metrics_dict.get(
                            timeseries_limit_metric,
                        )
                        ob = timeseries_limit_metric.get_sqla_col()
                    else:
                        raise Exception(_("Metric '{}' is not valid".format(m)))
                direction = desc if order_desc else asc
                subq = subq.order_by(direction(ob))
                subq = subq.limit(timeseries_limit)

                on_clause = []
                for i, gb in enumerate(groupby):
                    on_clause.append(
                        groupby_exprs[i] == column(gb + '__'))

                tbl = tbl.join(subq.alias(), and_(*on_clause))
            else:
                # run subquery to get top groups
                subquery_obj = {
                    'prequeries': prequeries,
                    'is_prequery': True,
                    'is_timeseries': False,
                    'row_limit': timeseries_limit,
                    'groupby': groupby,
                    'metrics': metrics,
                    'granularity': granularity,
                    'from_dttm': inner_from_dttm or from_dttm,
                    'to_dttm': inner_to_dttm or to_dttm,
                    'filter': filter,
                    'orderby': orderby,
                    'extras': extras,
                    'columns': columns,
                    'order_desc': True,
                }
                result = self.query(subquery_obj)
                cols = {col.column_name: col for col in self.columns}
                dimensions = [
                    c for c in result.df.columns
                    if c not in metrics and c in cols
                ]
                top_groups = self._get_top_groups(result.df, dimensions)
                qry = qry.where(top_groups)

        return qry.select_from(tbl)
Esempio n. 44
0
    def query(self,
              groupby,
              metrics,
              granularity,
              from_dttm,
              to_dttm,
              custom_query,
              limit_spec=None,
              filter=None,
              is_timeseries=True,
              timeseries_limit=15,
              row_limit=None,
              inner_from_dttm=None,
              inner_to_dttm=None,
              extras=None,
              columns=None):

        qry_start_dttm = datetime.now()

        if not custom_query:
            # For backward compatibility
            if granularity not in self.dttm_cols:
                granularity = self.main_dttm_col
            cols = {col.column_name: col for col in self.columns}
            if not self.main_dttm_col:
                raise Exception(
                    "Datetime column not provided as part table configuration")
            dttm_expr = cols[granularity].expression

            if dttm_expr:
                timestamp = ColumnClause(dttm_expr,
                                         is_literal=True).label('timestamp')
            else:
                timestamp = literal_column(granularity).label('timestamp')

            metrics_exprs = [
                literal_column(m.expression).label(m.metric_name)
                for m in self.metrics if m.metric_name in metrics
            ]

            if metrics:
                main_metric_expr = literal_column([
                    m.expression for m in self.metrics
                    if m.metric_name == metrics[0]
                ][0])
            else:
                main_metric_expr = literal_column("COUNT(*)")

            groupby_exprs = []
            select_exprs = []

            if groupby:
                inner_select_exprs = []
                inner_groupby_exprs = []
                for s in groupby:
                    col = cols[s]
                    expr = col.expression
                    if expr:
                        outer = ColumnClause(expr, is_literal=True).label(s)
                        inner = ColumnClause(expr,
                                             is_literal=True).label('__' + s)
                    else:
                        outer = column(s).label(s)
                        inner = column(s).label('__' + s)

                    groupby_exprs.append(outer)
                    select_exprs.append(outer)
                    inner_groupby_exprs.append(inner)
                    inner_select_exprs.append(inner)
            elif columns:
                for s in columns:
                    select_exprs.append(s)
                metrics_exprs = []

            if is_timeseries:
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            select_exprs += metrics_exprs
            qry = select(select_exprs)
            from_clause = table(self.table_name)
            if not columns:
                qry = qry.group_by(*groupby_exprs)

            time_filter = [
                timestamp >= from_dttm.isoformat(),
                timestamp <= to_dttm.isoformat(),
            ]
            inner_time_filter = copy(time_filter)
            if inner_from_dttm:
                inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat()
            if inner_to_dttm:
                inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat()
            where_clause_and = []
            having_clause_and = []
            for col, op, eq in filter:
                col_obj = cols[col]
                if op in ('in', 'not in'):
                    values = eq.split(",")
                    if col_obj.expression:
                        cond = ColumnClause(col_obj.expression,
                                            is_literal=True).in_(values)
                    else:
                        cond = column(col).in_(values)
                    if op == 'not in':
                        cond = ~cond
                    where_clause_and.append(cond)
            if extras and 'where' in extras:
                where_clause_and += [text(extras['where'])]
            if extras and 'having' in extras:
                having_clause_and += [text(extras['having'])]
            qry = qry.where(and_(*(time_filter + where_clause_and)))
            qry = qry.having(and_(*having_clause_and))
            if groupby:
                qry = qry.order_by(desc(main_metric_expr))
            qry = qry.limit(row_limit)

            if timeseries_limit and groupby:
                subq = select(inner_select_exprs)
                subq = subq.select_from(table(self.table_name))
                subq = subq.where(and_(*(where_clause_and +
                                         inner_time_filter)))
                subq = subq.group_by(*inner_groupby_exprs)
                subq = subq.order_by(desc(main_metric_expr))
                subq = subq.limit(timeseries_limit)
                on_clause = []
                for i, gb in enumerate(groupby):
                    on_clause.append(groupby_exprs[i] == column("__" + gb))

                from_clause = from_clause.join(subq.alias(), and_(*on_clause))

            qry = qry.select_from(from_clause)

            engine = self.database.get_sqla_engine()
            sql = str(
                qry.compile(engine, compile_kwargs={"literal_binds": True}))
            df = read_sql_query(sql=sql, con=engine)
            textwrap.dedent(sql)

        else:
            """
            Legacy way of querying by building a SQL string without
            using the sqlalchemy expression API (new approach which supports
            all dialects)
            """
            engine = self.database.get_sqla_engine()
            sql = custom_query.format(**locals())
            df = read_sql_query(sql=sql, con=engine)
            textwrap.dedent(sql)

        return QueryResult(df=df,
                           duration=datetime.now() - qry_start_dttm,
                           query=sql)
Esempio n. 45
0
 def get_sqla_col(self, label=None):
     label = label or self.metric_name
     sqla_col = literal_column(self.expression)
     return self.table.make_sqla_column_compatible(sqla_col, label)
Esempio n. 46
0
    def query(self,
              groupby,
              metrics,
              granularity,
              from_dttm,
              to_dttm,
              limit_spec=None,
              filter=None,
              is_timeseries=True,
              timeseries_limit=15,
              row_limit=None):

        qry_start_dttm = datetime.now()
        timestamp = literal_column(
            self.main_datetime_column.column_name).label('timestamp')
        metrics_exprs = [
            literal_column(m.expression).label(m.metric_name)
            for m in self.metrics if m.metric_name in metrics
        ]

        if metrics:
            main_metric_expr = literal_column([
                m.expression for m in self.metrics
                if m.metric_name == metrics[0]
            ][0])
        else:
            main_metric_expr = literal_column("COUNT(*)")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = [literal_column(s) for s in groupby]
            groupby_exprs = [literal_column(s) for s in groupby]
            inner_groupby_exprs = [
                literal_column(s).label('__' + s) for s in groupby
            ]
        if granularity != "all":
            select_exprs += [timestamp]
            groupby_exprs += [timestamp]

        select_exprs += metrics_exprs
        qry = select(select_exprs)
        from_clause = table(self.table_name)
        qry = qry.group_by(*groupby_exprs)

        where_clause_and = [
            timestamp >= from_dttm.isoformat(),
            timestamp < to_dttm.isoformat(),
        ]
        for col, op, eq in filter:
            if op in ('in', 'not in'):
                values = eq.split(",")
                cond = literal_column(col).in_(values)
                if op == 'not in':
                    cond = ~cond
                where_clause_and.append(cond)
        qry = qry.where(and_(*where_clause_and))
        qry = qry.order_by(desc(main_metric_expr))
        qry = qry.limit(row_limit)

        if timeseries_limit and groupby:
            subq = select(inner_groupby_exprs)
            subq = subq.select_from(table(self.table_name))
            subq = subq.where(and_(*where_clause_and))
            subq = subq.group_by(*inner_groupby_exprs)
            subq = subq.order_by(desc(main_metric_expr))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for gb in groupby:
                on_clause.append(
                    literal_column(gb) == literal_column("__" + gb))

            from_clause = from_clause.join(subq.alias(), and_(*on_clause))

        qry = qry.select_from(from_clause)

        engine = self.database.get_sqla_engine()
        sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True}))
        df = read_sql_query(sql=sql, con=engine)
        sql = sqlparse.format(sql, reindent=True)
        return QueryResult(df=df,
                           duration=datetime.now() - qry_start_dttm,
                           query=sql)
Esempio n. 47
0
 async def select_filter_by(cls, conn: SAConn, col: dict) -> RowProxy:
     key = list(col.keys())[0]
     query = cls.__table__.select().where(literal_column(key) == col[key])
     cursor = await conn.execute(query)
     items = await cursor.fetchall()
     return items
Esempio n. 48
0
    def query(  # sqla
            self,
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            extras=None,
            columns=None):
        """Querying any sqla table from this common interface"""
        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        cols = {col.column_name: col for col in self.columns}
        qry_start_dttm = datetime.now()

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))

        metrics_exprs = [
            m.sqla_col for m in self.metrics if m.metric_name in metrics
        ]

        if metrics:
            main_metric_expr = [
                m.sqla_col for m in self.metrics if m.metric_name == metrics[0]
            ][0]
        else:
            main_metric_expr = literal_column("COUNT(*)").label("ccount")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                outer = col.sqla_col
                inner = col.sqla_col.label('__' + col.column_name)

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(cols[s].sqla_col)
            metrics_exprs = []

        if granularity:
            dttm_expr = cols[granularity].sqla_col.label('timestamp')
            timestamp = dttm_expr

            # Transforming time grain into an expression based on configuration
            time_grain_sqla = extras.get('time_grain_sqla')
            if time_grain_sqla:
                udf = self.database.grains_dict().get(time_grain_sqla, '{col}')
                timestamp_grain = literal_column(
                    udf.function.format(col=dttm_expr)).label('timestamp')
            else:
                timestamp_grain = timestamp

            if is_timeseries:
                select_exprs += [timestamp_grain]
                groupby_exprs += [timestamp_grain]

            tf = '%Y-%m-%d %H:%M:%S.%f'
            time_filter = [
                timestamp >= text(self.database.dttm_converter(from_dttm)),
                timestamp <= text(self.database.dttm_converter(to_dttm)),
            ]
            inner_time_filter = copy(time_filter)
            if inner_from_dttm:
                inner_time_filter[0] = timestamp >= text(
                    self.database.dttm_converter(inner_from_dttm))
            if inner_to_dttm:
                inner_time_filter[1] = timestamp <= text(
                    self.database.dttm_converter(inner_to_dttm))
        else:
            inner_time_filter = []

        select_exprs += metrics_exprs
        qry = select(select_exprs)

        tbl = table(self.table_name)
        if self.schema:
            tbl.schema = self.schema

        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for col, op, eq in filter:
            col_obj = cols[col]
            if op in ('in', 'not in'):
                values = eq.split(",")
                cond = col_obj.sqla_col.in_(values)
                if op == 'not in':
                    cond = ~cond
                where_clause_and.append(cond)
        if extras and 'where' in extras:
            where_clause_and += [text(extras['where'])]
        if extras and 'having' in extras:
            having_clause_and += [text(extras['having'])]
        if granularity:
            qry = qry.where(and_(*(time_filter + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))
        if groupby:
            qry = qry.order_by(desc(main_metric_expr))
        qry = qry.limit(row_limit)

        if timeseries_limit and groupby:
            subq = select(inner_select_exprs)
            subq = subq.select_from(tbl)
            subq = subq.where(and_(*(where_clause_and + inner_time_filter)))
            subq = subq.group_by(*inner_groupby_exprs)
            subq = subq.order_by(desc(main_metric_expr))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(groupby_exprs[i] == column("__" + gb))

            tbl = tbl.join(subq.alias(), and_(*on_clause))

        qry = qry.select_from(tbl)

        engine = self.database.get_sqla_engine()
        sql = "{}".format(
            qry.compile(
                engine,
                compile_kwargs={"literal_binds": True},
            ), )
        print(sql)
        df = pd.read_sql_query(sql=sql, con=engine)
        sql = sqlparse.format(sql, reindent=True)
        return QueryResult(df=df,
                           duration=datetime.now() - qry_start_dttm,
                           query=sql)
Esempio n. 49
0
 def visit_select(self, select):
     select.append_column(
         sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" %
                            orderby).label("ora_rn"))
Esempio n. 50
0
 def test_literal_column_default_no_label(self):
     self._run_test(default=literal_column("1", type_=self.MyInteger))
    CoreData.negativeTestsViral,
    CoreData.positiveCasesViral,
    CoreData.deathConfirmed,
    CoreData.deathProbable,
    CoreData.probableCases,
    CoreData.totalTestEncountersViral,
    CoreData.totalTestsPeopleAntibody,
    CoreData.positiveTestsPeopleAntibody,
    CoreData.negativeTestsPeopleAntibody,
    CoreData.totalTestsPeopleAntigen,
    CoreData.positiveTestsPeopleAntigen,
    CoreData.negativeTestsPeopleAntigen,
    CoreData.totalTestsAntigen,
    CoreData.positiveTestsAntigen,
    CoreData.negativeTestsAntigen,

    # Fake Column
    literal_column("''").label('_posNeg'),
    Literal("totalTestResults"),
]


def select(columns):
    return [
        CSVColumn(
            label=COLUMNS_DISPLAY_NAMES.get(c.name)
            if c.name in COLUMNS_DISPLAY_NAMES else c.name,
            model_column=c.name if c.name in COLUMNS_DISPLAY_NAMES else None,
            blank=c in COLUMNS_DISPLAY_NAMES) for c in columns
    ]
Esempio n. 52
0
 def get_sqla_col(self, label=None):
     label = label if label else self.metric_name
     label = self.table.get_label(label)
     return literal_column(self.expression).label(label)
Esempio n. 53
0
    def visit_select(self, select, **kwargs):
        """Look for ``LIMIT`` and OFFSET in a select statement, and if
        so tries to wrap it in a subquery with ``rownum`` criterion.
        """

        if not getattr(select, '_oracle_visit', None):
            if not self.dialect.use_ansi:
                froms = self._display_froms_for_select(
                                    select, kwargs.get('asfrom', False))
                whereclause = self._get_nonansi_join_whereclause(froms)
                if whereclause is not None:
                    select = select.where(whereclause)
                    select._oracle_visit = True

            limit_clause = select._limit_clause
            offset_clause = select._offset_clause
            if limit_clause is not None or offset_clause is not None:
                # See http://www.oracle.com/technology/oramag/oracle/06-sep/o56asktom.html
                #
                # Generalized form of an Oracle pagination query:
                #   select ... from (
                #     select /*+ FIRST_ROWS(N) */ ...., rownum as ora_rn from (
                #         select distinct ... where ... order by ...
                #     ) where ROWNUM <= :limit+:offset
                #   ) where ora_rn > :offset
                # Outer select and "ROWNUM as ora_rn" can be dropped if limit=0

                # TODO: use annotations instead of clone + attr set ?
                select = select._generate()
                select._oracle_visit = True

                # Wrap the middle select and add the hint
                limitselect = sql.select([c for c in select.c])
                if limit_clause is not None and \
                    self.dialect.optimize_limits and \
                        select._simple_int_limit:
                    limitselect = limitselect.prefix_with(
                                            "/*+ FIRST_ROWS(%d) */" %
                                            select._limit)

                limitselect._oracle_visit = True
                limitselect._is_wrapper = True

                # If needed, add the limiting clause
                if limit_clause is not None:
                    if not self.dialect.use_binds_for_limits:
                        # use simple int limits, will raise an exception
                        # if the limit isn't specified this way
                        max_row = select._limit

                        if offset_clause is not None:
                            max_row += select._offset
                        max_row = sql.literal_column("%d" % max_row)
                    else:
                        max_row = limit_clause
                        if offset_clause is not None:
                            max_row = max_row + offset_clause
                    limitselect.append_whereclause(
                            sql.literal_column("ROWNUM") <= max_row)

                # If needed, add the ora_rn, and wrap again with offset.
                if offset_clause is None:
                    limitselect._for_update_arg = select._for_update_arg
                    select = limitselect
                else:
                    limitselect = limitselect.column(
                            sql.literal_column("ROWNUM").label("ora_rn"))
                    limitselect._oracle_visit = True
                    limitselect._is_wrapper = True

                    offsetselect = sql.select(
                            [c for c in limitselect.c if c.key != 'ora_rn'])
                    offsetselect._oracle_visit = True
                    offsetselect._is_wrapper = True

                    if not self.dialect.use_binds_for_limits:
                        offset_clause = sql.literal_column(
                                                "%d" % select._offset)
                    offsetselect.append_whereclause(
                            sql.literal_column("ora_rn") > offset_clause)

                    offsetselect._for_update_arg = select._for_update_arg
                    select = offsetselect

        kwargs['iswrapper'] = getattr(select, '_is_wrapper', False)
        return compiler.SQLCompiler.visit_select(self, select, **kwargs)
Esempio n. 54
0
def get_old_messages_backend(request,
                             user_profile,
                             anchor=REQ(converter=int),
                             num_before=REQ(converter=to_non_negative_int),
                             num_after=REQ(converter=to_non_negative_int),
                             narrow=REQ('narrow',
                                        converter=narrow_parameter,
                                        default=None),
                             use_first_unread_anchor=REQ(
                                 default=False, converter=ujson.loads),
                             apply_markdown=REQ(default=True,
                                                converter=ujson.loads)):
    # type: (HttpRequest, UserProfile, int, int, int, Optional[List[Dict[str, Any]]], bool, bool) -> HttpResponse
    include_history = ok_to_include_history(narrow, user_profile.realm)

    if include_history and not use_first_unread_anchor:
        query = select([column("id").label("message_id")], None,
                       "zerver_message")
        inner_msg_id_col = literal_column("zerver_message.id")
    elif narrow is None:
        query = select(
            [column("message_id"), column("flags")],
            column("user_profile_id") == literal(user_profile.id),
            "zerver_usermessage")
        inner_msg_id_col = column("message_id")
    else:
        # TODO: Don't do this join if we're not doing a search
        query = select(
            [column("message_id"), column("flags")],
            column("user_profile_id") == literal(user_profile.id),
            join(
                "zerver_usermessage", "zerver_message",
                literal_column("zerver_usermessage.message_id") ==
                literal_column("zerver_message.id")))
        inner_msg_id_col = column("message_id")

    num_extra_messages = 1
    is_search = False

    if narrow is not None:
        # Add some metadata to our logging data for narrows
        verbose_operators = []
        for term in narrow:
            if term['operator'] == "is":
                verbose_operators.append("is:" + term['operand'])
            else:
                verbose_operators.append(term['operator'])
        request._log_data['extra'] = "[%s]" % (",".join(verbose_operators), )

        # Build the query for the narrow
        num_extra_messages = 0
        builder = NarrowBuilder(user_profile, inner_msg_id_col)
        for term in narrow:
            if term['operator'] == 'search' and not is_search:
                query = query.column("subject").column("rendered_content")
                is_search = True
            query = builder.add_term(query, term)

    # We add 1 to the number of messages requested if no narrow was
    # specified to ensure that the resulting list always contains the
    # anchor message.  If a narrow was specified, the anchor message
    # might not match the narrow anyway.
    if num_after != 0:
        num_after += num_extra_messages
    else:
        num_before += num_extra_messages

    sa_conn = get_sqlalchemy_connection()
    if use_first_unread_anchor:
        condition = column("flags").op("&")(UserMessage.flags.read.mask) == 0

        # We exclude messages on muted topics when finding the first unread
        # message in this narrow
        muting_conditions = exclude_muting_conditions(user_profile, narrow)
        if muting_conditions:
            condition = and_(condition, *muting_conditions)

        first_unread_query = query.where(condition)
        first_unread_query = first_unread_query.order_by(
            inner_msg_id_col.asc()).limit(1)
        first_unread_result = list(
            sa_conn.execute(first_unread_query).fetchall())
        if len(first_unread_result) > 0:
            anchor = first_unread_result[0][0]
        else:
            anchor = 10000000000000000

    before_query = None
    after_query = None
    if num_before != 0:
        before_anchor = anchor
        if num_after != 0:
            # Don't include the anchor in both the before query and the after query
            before_anchor = anchor - 1
        before_query = query.where(inner_msg_id_col <= before_anchor) \
                            .order_by(inner_msg_id_col.desc()).limit(num_before)
    if num_after != 0:
        after_query = query.where(inner_msg_id_col >= anchor) \
                           .order_by(inner_msg_id_col.asc()).limit(num_after)

    if num_before == 0 and num_after == 0:
        # This can happen when a narrow is specified.
        after_query = query.where(inner_msg_id_col == anchor)

    if before_query is not None:
        if after_query is not None:
            query = union_all(before_query.self_group(),
                              after_query.self_group())
        else:
            query = before_query
    else:
        query = after_query
    main_query = alias(query)
    query = select(main_query.c, None,
                   main_query).order_by(column("message_id").asc())
    # This is a hack to tag the query we use for testing
    query = query.prefix_with("/* get_old_messages */")
    query_result = list(sa_conn.execute(query).fetchall())

    # The following is a little messy, but ensures that the code paths
    # are similar regardless of the value of include_history.  The
    # 'user_messages' dictionary maps each message to the user's
    # UserMessage object for that message, which we will attach to the
    # rendered message dict before returning it.  We attempt to
    # bulk-fetch rendered message dicts from remote cache using the
    # 'messages' list.
    search_fields = dict()  # type: Dict[int, Dict[str, text_type]]
    message_ids = []  # type: List[int]
    user_message_flags = {}  # type: Dict[int, List[str]]
    if include_history:
        message_ids = [row[0] for row in query_result]

        # TODO: This could be done with an outer join instead of two queries
        user_message_flags = dict(
            (user_message.message_id, user_message.flags_list())
            for user_message in UserMessage.objects.filter(
                user_profile=user_profile, message__id__in=message_ids))
        for row in query_result:
            message_id = row[0]
            if user_message_flags.get(message_id) is None:
                user_message_flags[message_id] = ["read", "historical"]
            if is_search:
                (_, subject, rendered_content, content_matches,
                 subject_matches) = row
                search_fields[message_id] = get_search_fields(
                    rendered_content, subject, content_matches,
                    subject_matches)
    else:
        for row in query_result:
            message_id = row[0]
            flags = row[1]
            user_message_flags[message_id] = parse_usermessage_flags(flags)

            message_ids.append(message_id)

            if is_search:
                (_, _, subject, rendered_content, content_matches,
                 subject_matches) = row
                search_fields[message_id] = get_search_fields(
                    rendered_content, subject, content_matches,
                    subject_matches)

    cache_transformer = lambda row: Message.build_dict_from_raw_db_row(
        row, apply_markdown)
    id_fetcher = lambda row: row['id']

    message_dicts = generic_bulk_cached_fetch(
        lambda message_id: to_dict_cache_key_id(message_id, apply_markdown),
        Message.get_raw_db_rows,
        message_ids,
        id_fetcher=id_fetcher,
        cache_transformer=cache_transformer,
        extractor=extract_message_dict,
        setter=stringify_message_dict)

    message_list = []
    for message_id in message_ids:
        msg_dict = message_dicts[message_id]
        msg_dict.update({"flags": user_message_flags[message_id]})
        msg_dict.update(search_fields.get(message_id, {}))
        message_list.append(msg_dict)

    statsd.incr('loaded_old_messages', len(message_list))
    ret = {'messages': message_list, "result": "success", "msg": ""}
    return json_success(ret)
Esempio n. 55
0
def list_files(
    probe_asn=None,
    probe_cc=None,
    test_name=None,
    since=None,
    until=None,
    since_index=None,
    order_by="index",
    order="desc",
    offset=0,
    limit=100,
):
    log = current_app.logger

    if probe_asn is not None:
        if probe_asn.startswith("AS"):
            probe_asn = probe_asn[2:]
        probe_asn = int(probe_asn)

    try:
        if since is not None:
            since = parse_date(since)
    except ValueError:
        raise BadRequest("Invalid since")

    try:
        if until is not None:
            until = parse_date(until)
    except ValueError:
        raise BadRequest("Invalid until")

    if since_index is not None:
        since_index = int(since_index)
        report_no = max(0, since_index - REPORT_INDEX_OFFSET)

    if order_by in ("index", "idx"):
        order_by = "report_no"

    cols = [
        literal_column("textname"),
        literal_column("test_start_time"),
        literal_column("probe_cc"),
        literal_column("probe_asn"),
        literal_column("report_no"),
        literal_column("test_name"),
    ]
    where = []
    query_params = {}

    # XXX maybe all of this can go into some sort of function.
    if probe_cc:
        where.append(sql.text("probe_cc = :probe_cc"))
        query_params["probe_cc"] = probe_cc

    if probe_asn:
        where.append(sql.text("probe_asn = :probe_asn"))
        query_params["probe_asn"] = probe_asn

    if test_name:
        where.append(sql.text("test_name = :test_name"))
        query_params["test_name"] = test_name

    if since:
        where.append(sql.text("test_start_time > :since"))
        query_params["since"] = since

    if until:
        where.append(sql.text("test_start_time <= :until"))
        query_params["until"] = until

    if since_index:
        where.append(sql.text("report_no > :report_no"))
        query_params["report_no"] = report_no

    query = select(cols).where(and_(*where)).select_from("report")
    count = -1
    pages = -1
    current_page = math.ceil(offset / limit) + 1

    query = query.order_by(text("{} {}".format(order_by, order)))
    query = query.limit(limit).offset(offset)

    results = []

    log.debug(query)
    q = current_app.db_session.execute(query, query_params)
    for row in q:
        download_url = urljoin(
            current_app.config["BASE_URL"], "/files/download/%s" % row.textname
        )
        results.append(
            {
                "download_url": download_url,
                "probe_cc": row.probe_cc,
                "probe_asn": "AS{}".format(row.probe_asn),
                "test_name": row.test_name,
                "index": int(row.report_no) + REPORT_INDEX_OFFSET,
                "test_start_time": row.test_start_time,
            }
        )
    # We got less results than what we expected, we know the count and that we are done
    if len(results) < limit:
        count = offset + len(results)
        pages = math.ceil(count / limit)
        next_url = None
    else:
        next_args = request.args.to_dict()
        next_args["offset"] = "%s" % (offset + limit)
        next_args["limit"] = "%s" % limit
        next_url = urljoin(
            current_app.config["BASE_URL"], "/api/v1/files?%s" % urlencode(next_args)
        )

    metadata = {
        "offset": offset,
        "limit": limit,
        "count": count,
        "pages": pages,
        "current_page": current_page,
        "next_url": next_url,
    }

    return jsonify({"metadata": metadata, "results": results})
Esempio n. 56
0
    def get_sqla_query(  # sqla
        self,
        metrics,
        granularity,
        from_dttm,
        to_dttm,
        columns=None,
        groupby=None,
        filter=None,
        is_timeseries=True,
        timeseries_limit=15,
        timeseries_limit_metric=None,
        row_limit=None,
        inner_from_dttm=None,
        inner_to_dttm=None,
        orderby=None,
        extras=None,
        order_desc=True,
    ) -> SqlaQuery:
        """Querying any sqla table from this common interface"""
        template_kwargs = {
            "from_dttm": from_dttm,
            "groupby": groupby,
            "metrics": metrics,
            "row_limit": row_limit,
            "to_dttm": to_dttm,
            "filter": filter,
            "columns": {col.column_name: col
                        for col in self.columns},
        }
        is_sip_38 = is_feature_enabled("SIP_38_VIZ_REARCHITECTURE")
        template_kwargs.update(self.template_params_dict)
        extra_cache_keys: List[Any] = []
        template_kwargs["extra_cache_keys"] = extra_cache_keys
        template_processor = self.get_template_processor(**template_kwargs)
        db_engine_spec = self.database.db_engine_spec
        prequeries: List[str] = []

        orderby = orderby or []

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        # Database spec supports join-free timeslot grouping
        time_groupby_inline = db_engine_spec.time_groupby_inline

        cols: Dict[str,
                   Column] = {col.column_name: col
                              for col in self.columns}
        metrics_dict: Dict[str, SqlMetric] = {
            m.metric_name: m
            for m in self.metrics
        }

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))
        if (not metrics and not columns
                and (is_sip_38 or (not is_sip_38 and not groupby))):
            raise Exception(_("Empty query?"))
        metrics_exprs: List[ColumnElement] = []
        for m in metrics:
            if utils.is_adhoc_metric(m):
                metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols))
            elif m in metrics_dict:
                metrics_exprs.append(metrics_dict[m].get_sqla_col())
            else:
                raise Exception(
                    _("Metric '%(metric)s' does not exist", metric=m))
        if metrics_exprs:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr, label = literal_column("COUNT(*)"), "ccount"
            main_metric_expr = self.make_sqla_column_compatible(
                main_metric_expr, label)

        select_exprs: List[Column] = []
        groupby_exprs_sans_timestamp: OrderedDict = OrderedDict()

        if (is_sip_38 and metrics and columns) or (not is_sip_38 and groupby):
            # dedup columns while preserving order
            groupby = list(dict.fromkeys(columns if is_sip_38 else groupby))

            select_exprs = []
            for s in groupby:
                if s in cols:
                    outer = cols[s].get_sqla_col()
                else:
                    outer = literal_column(f"({s})")
                    outer = self.make_sqla_column_compatible(outer, s)

                groupby_exprs_sans_timestamp[outer.name] = outer
                select_exprs.append(outer)
        elif columns:
            for s in columns:
                select_exprs.append(
                    cols[s].get_sqla_col() if s in cols else self.
                    make_sqla_column_compatible(literal_column(s)))
            metrics_exprs = []

        time_range_endpoints = extras.get("time_range_endpoints")
        groupby_exprs_with_timestamp = OrderedDict(
            groupby_exprs_sans_timestamp.items())
        if granularity:
            dttm_col = cols[granularity]
            time_grain = extras.get("time_grain_sqla")
            time_filters = []

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs_with_timestamp[timestamp.name] = timestamp

            # Use main dttm column to support index with secondary dttm columns.
            if (db_engine_spec.time_secondary_columns
                    and self.main_dttm_col in self.dttm_cols
                    and self.main_dttm_col != dttm_col.column_name):
                time_filters.append(cols[self.main_dttm_col].get_time_filter(
                    from_dttm, to_dttm, time_range_endpoints))
            time_filters.append(
                dttm_col.get_time_filter(from_dttm, to_dttm,
                                         time_range_endpoints))

        select_exprs += metrics_exprs

        labels_expected = [c._df_label_expected for c in select_exprs]

        select_exprs = db_engine_spec.make_select_compatible(
            groupby_exprs_with_timestamp.values(), select_exprs)
        qry = sa.select(select_exprs)

        tbl = self.get_from_clause(template_processor)

        if (is_sip_38 and metrics) or (not is_sip_38 and not columns):
            qry = qry.group_by(*groupby_exprs_with_timestamp.values())

        where_clause_and = []
        having_clause_and: List = []
        for flt in filter:
            if not all([flt.get(s) for s in ["col", "op"]]):
                continue
            col = flt["col"]
            op = flt["op"].upper()
            col_obj = cols.get(col)
            if col_obj:
                is_list_target = op in (
                    utils.FilterOperator.IN.value,
                    utils.FilterOperator.NOT_IN.value,
                )
                eq = self.filter_values_handler(
                    values=flt.get("val"),
                    target_column_is_numeric=col_obj.is_numeric,
                    is_list_target=is_list_target,
                )
                if op in (
                        utils.FilterOperator.IN.value,
                        utils.FilterOperator.NOT_IN.value,
                ):
                    cond = col_obj.get_sqla_col().in_(eq)
                    if isinstance(eq, str) and NULL_STRING in eq:
                        cond = or_(cond, col_obj.get_sqla_col() is None)
                    if op == utils.FilterOperator.NOT_IN.value:
                        cond = ~cond
                    where_clause_and.append(cond)
                else:
                    if col_obj.is_numeric:
                        eq = utils.cast_to_num(flt["val"])
                    if op == utils.FilterOperator.EQUALS.value:
                        where_clause_and.append(col_obj.get_sqla_col() == eq)
                    elif op == utils.FilterOperator.NOT_EQUALS.value:
                        where_clause_and.append(col_obj.get_sqla_col() != eq)
                    elif op == utils.FilterOperator.GREATER_THAN.value:
                        where_clause_and.append(col_obj.get_sqla_col() > eq)
                    elif op == utils.FilterOperator.LESS_THAN.value:
                        where_clause_and.append(col_obj.get_sqla_col() < eq)
                    elif op == utils.FilterOperator.GREATER_THAN_OR_EQUALS.value:
                        where_clause_and.append(col_obj.get_sqla_col() >= eq)
                    elif op == utils.FilterOperator.LESS_THAN_OR_EQUALS.value:
                        where_clause_and.append(col_obj.get_sqla_col() <= eq)
                    elif op == utils.FilterOperator.LIKE.value:
                        where_clause_and.append(
                            col_obj.get_sqla_col().like(eq))
                    elif op == utils.FilterOperator.IS_NULL.value:
                        where_clause_and.append(col_obj.get_sqla_col() == None)
                    elif op == utils.FilterOperator.IS_NOT_NULL.value:
                        where_clause_and.append(col_obj.get_sqla_col() != None)
                    else:
                        raise Exception(
                            _("Invalid filter operation type: %(op)s", op=op))
        if config["ENABLE_ROW_LEVEL_SECURITY"]:
            where_clause_and += self._get_sqla_row_level_filters(
                template_processor)
        if extras:
            where = extras.get("where")
            if where:
                where = template_processor.process_template(where)
                where_clause_and += [sa.text("({})".format(where))]
            having = extras.get("having")
            if having:
                having = template_processor.process_template(having)
                having_clause_and += [sa.text("({})".format(having))]
        if granularity:
            qry = qry.where(and_(*(time_filters + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))

        if not orderby and ((is_sip_38 and metrics) or
                            (not is_sip_38 and not columns)):
            orderby = [(main_metric_expr, not order_desc)]

        # To ensure correct handling of the ORDER BY labeling we need to reference the
        # metric instance if defined in the SELECT clause.
        metrics_exprs_by_label = {m._label: m for m in metrics_exprs}

        for col, ascending in orderby:
            direction = asc if ascending else desc
            if utils.is_adhoc_metric(col):
                col = self.adhoc_metric_to_sqla(col, cols)
            elif col in cols:
                col = cols[col].get_sqla_col()

            if isinstance(col, Label) and col._label in metrics_exprs_by_label:
                col = metrics_exprs_by_label[col._label]

            qry = qry.order_by(direction(col))

        if row_limit:
            qry = qry.limit(row_limit)

        if (is_timeseries and timeseries_limit and not time_groupby_inline
                and ((is_sip_38 and columns) or (not is_sip_38 and groupby))):
            if self.database.db_engine_spec.allows_joins:
                # some sql dialects require for order by expressions
                # to also be in the select clause -- others, e.g. vertica,
                # require a unique inner alias
                inner_main_metric_expr = self.make_sqla_column_compatible(
                    main_metric_expr, "mme_inner__")
                inner_groupby_exprs = []
                inner_select_exprs = []
                for gby_name, gby_obj in groupby_exprs_sans_timestamp.items():
                    inner = self.make_sqla_column_compatible(
                        gby_obj, gby_name + "__")
                    inner_groupby_exprs.append(inner)
                    inner_select_exprs.append(inner)

                inner_select_exprs += [inner_main_metric_expr]
                subq = select(inner_select_exprs).select_from(tbl)
                inner_time_filter = dttm_col.get_time_filter(
                    inner_from_dttm or from_dttm,
                    inner_to_dttm or to_dttm,
                    time_range_endpoints,
                )
                subq = subq.where(
                    and_(*(where_clause_and + [inner_time_filter])))
                subq = subq.group_by(*inner_groupby_exprs)

                ob = inner_main_metric_expr
                if timeseries_limit_metric:
                    ob = self._get_timeseries_orderby(timeseries_limit_metric,
                                                      metrics_dict, cols)
                direction = desc if order_desc else asc
                subq = subq.order_by(direction(ob))
                subq = subq.limit(timeseries_limit)

                on_clause = []
                for gby_name, gby_obj in groupby_exprs_sans_timestamp.items():
                    # in this case the column name, not the alias, needs to be
                    # conditionally mutated, as it refers to the column alias in
                    # the inner query
                    col_name = db_engine_spec.make_label_compatible(gby_name +
                                                                    "__")
                    on_clause.append(gby_obj == column(col_name))

                tbl = tbl.join(subq.alias(), and_(*on_clause))
            else:
                if timeseries_limit_metric:
                    orderby = [(
                        self._get_timeseries_orderby(timeseries_limit_metric,
                                                     metrics_dict, cols),
                        False,
                    )]

                # run prequery to get top groups
                prequery_obj = {
                    "is_timeseries": False,
                    "row_limit": timeseries_limit,
                    "metrics": metrics,
                    "granularity": granularity,
                    "from_dttm": inner_from_dttm or from_dttm,
                    "to_dttm": inner_to_dttm or to_dttm,
                    "filter": filter,
                    "orderby": orderby,
                    "extras": extras,
                    "columns": columns,
                    "order_desc": True,
                }
                if not is_sip_38:
                    prequery_obj["groupby"] = groupby

                result = self.query(prequery_obj)
                prequeries.append(result.query)
                dimensions = [
                    c for c in result.df.columns
                    if c not in metrics and c in groupby_exprs_sans_timestamp
                ]
                top_groups = self._get_top_groups(
                    result.df, dimensions, groupby_exprs_sans_timestamp)
                qry = qry.where(top_groups)
        return SqlaQuery(
            extra_cache_keys=extra_cache_keys,
            labels_expected=labels_expected,
            sqla_query=qry.select_from(tbl),
            prequeries=prequeries,
        )
Esempio n. 57
0
 def sqla_col(self):
     name = self.metric_name
     return literal_column(self.expression).label(name)
Esempio n. 58
0
 def get_sqla_col(self, label=None):
     db_engine_spec = self.table.database.db_engine_spec
     label = db_engine_spec.make_label_compatible(label if label else self.metric_name)
     return literal_column(self.expression).label(label)
Esempio n. 59
0
def polymorphic_union(table_map,
                      typecolname,
                      aliasname='p_union',
                      cast_nulls=True):
    """Create a ``UNION`` statement used by a polymorphic mapper.

    See  :ref:`concrete_inheritance` for an example of how
    this is used.
    
    :param table_map: mapping of polymorphic identities to 
     :class:`.Table` objects.
    :param typecolname: string name of a "discriminator" column, which will be 
     derived from the query, producing the polymorphic identity for each row.  If
     ``None``, no polymorphic discriminator is generated.
    :param aliasname: name of the :func:`~sqlalchemy.sql.expression.alias()` 
     construct generated.
    :param cast_nulls: if True, non-existent columns, which are represented as labeled
     NULLs, will be passed into CAST.   This is a legacy behavior that is problematic
     on some backends such as Oracle - in which case it can be set to False.

    """

    colnames = util.OrderedSet()
    colnamemaps = {}
    types = {}
    for key in table_map.keys():
        table = table_map[key]

        # mysql doesnt like selecting from a select;
        # make it an alias of the select
        if isinstance(table, sql.Select):
            table = table.alias()
            table_map[key] = table

        m = {}
        for c in table.c:
            colnames.add(c.key)
            m[c.key] = c
            types[c.key] = c.type
        colnamemaps[table] = m

    def col(name, table):
        try:
            return colnamemaps[table][name]
        except KeyError:
            if cast_nulls:
                return sql.cast(sql.null(), types[name]).label(name)
            else:
                return sql.type_coerce(sql.null(), types[name]).label(name)

    result = []
    for type, table in table_map.iteritems():
        if typecolname is not None:
            result.append(
                sql.select([col(name, table) for name in colnames] + [
                    sql.literal_column(
                        sql_util._quote_ddl_expr(type)).label(typecolname)
                ],
                           from_obj=[table]))
        else:
            result.append(
                sql.select([col(name, table) for name in colnames],
                           from_obj=[table]))
    return sql.union_all(*result).alias(aliasname)
Esempio n. 60
0
 async def count(self) -> Optional[int]:
     col = sql.func.count(sql.literal_column("*"))
     return await self.from_self(col).scalar()