Beispiel #1
0
    def exact_aggregates(self, query):
        child_scope = Scope()

        if self.options.row_privacy:
            keycount_expr = AggFunction("COUNT", None,  AllColumns())
        else:
            key_col = self.key_col(query)
            keycount_expr = AggFunction("COUNT", "DISTINCT", Column(key_col))
            child_scope.push_name(keycount_expr.expression)

        for ne in query.select.namedExpressions:
            child_scope.push_name(ne.expression)

        keycount = NamedExpression("keycount", keycount_expr)

        select = Seq([keycount] + [ne for ne in query.select.namedExpressions])
        select = Select(None, select)

        subquery = Query(child_scope.select(), query.source, query.where, query.agg, None, None, None)
        if self.options.reservoir_sample and not self.options.row_privacy:
            subquery = self.per_key_random(subquery)
            subquery = [AliasedRelation(subquery, "per_key_random")]

            filtered = Where(BooleanCompare(Column("per_key_random.row_num"), "<=", Literal(str(self.options.max_contrib), self.options.max_contrib)))
            return Query(select, From(subquery), filtered, query.agg, None, None, None)
        else:
            subquery = self.per_key_clamped(subquery)
            subquery = [AliasedRelation(subquery, "per_key_all")]
            return Query(select, From(subquery), None, query.agg, None, None, None)
    def query(self, query):
        query = QueryParser(self.metadata).query(str(query))
        Validate().validateQuery(query, self.metadata)

        child_scope = Scope()
        # we make sure aggregates are in select scope for subqueries
        if query.agg is not None:
            for ge in query.agg.groupingExpressions:
                child_scope.push_name(ge.expression)

        select = Seq([
            self.rewrite_outer_named_expression(ne, child_scope)
            for ne in query.select.namedExpressions
        ])
        select = Select(None, select)

        subquery = Query(child_scope.select(), query.source, query.where,
                         query.agg, query.having, None, None)
        subquery = self.exact_aggregates(subquery)
        subquery = [AliasedRelation(subquery, "exact_aggregates")]

        q = Query(select, From(subquery), None, query.agg, None, query.order,
                  query.limit)

        return QueryParser(self.metadata).query(str(q))
Beispiel #3
0
    def per_key_random(self, query):
        key_col = self.key_col(query)

        select = Seq([NamedExpression(None, AllColumns()), NamedExpression("row_num", Expression("ROW_NUMBER() OVER (PARTITION BY {0} ORDER BY random())".format(key_col)))])
        select = Select(None, select)

        subquery = self.per_key_clamped(query)
        subquery = [AliasedRelation(subquery, "clamped" if self.options.clamp_columns else "not_clamped")]

        return Query(select, From(subquery), None, None, None, None, None)