def get(self): args = self.reqparse.parse_args() regex_id = args['regex_id'] post = Regex.query.outerjoin( Rating, Regex.id == Rating.regex_id).add_columns( func.count(Rating.regex_id).label('views'), func.avg(func.coalesce( Rating.mark, 0)).label('avgmark')).filter( Regex.id == regex_id).group_by(Regex.id).order_by( func.count(Rating.regex_id).desc(), func.avg(func.coalesce(Rating.mark, 0)).desc()).first() return post[0].to_dict(views=post[1], avgmark=float(post[2])), 200
def get_average_test_runs(items=None, release=None): """Returns a list of `Test Runs` grouped by a `Release`.""" avg_runs = db.session.query( TestRun.name, func.avg(TestRun.passed).label('passed'), func.avg(TestRun.failed).label('failed'), func.avg(TestRun.skipped).label('skipped'), func.avg(TestRun.error).label('error'), func.avg(TestRun.percent_passed).label('percent_passed'), func.avg(TestRun.percent_failed).label('percent_failed'), func.avg(TestRun.percent_executed).label('percent_executed'), func.avg(TestRun.percent_not_executed).label('percent_not_executed'), ).group_by(TestRun.release_id, TestRun.name).order_by( TestRun.timestamp.desc(), TestRun.name.desc(), ) avg_runs = avg_runs.filter_by(waved=False) if release: avg_runs = avg_runs.filter_by(release=release) if items: avg_runs = avg_runs.limit(items) return avg_runs
def get_average_test_runs(items=None, release=None): """Returns a list of `Test Runs` grouped by a `Release`.""" avg_runs = db.session.query( TestRun.name, func.avg(TestRun.passed).label('passed'), func.avg(TestRun.failed).label('failed'), func.avg(TestRun.skipped).label('skipped'), func.avg(TestRun.error).label('error'), func.avg(TestRun.percent_passed).label('percent_passed'), func.avg(TestRun.percent_failed).label('percent_failed'), func.avg(TestRun.percent_executed).label('percent_executed'), func.avg(TestRun.percent_not_executed).label('percent_not_executed'), ).group_by( TestRun.release_id, TestRun.name ).order_by( TestRun.timestamp.desc(), TestRun.name.desc(), ) avg_runs = avg_runs.filter_by( waved=False ) if release: avg_runs = avg_runs.filter_by(release=release) if items: avg_runs = avg_runs.limit(items) return avg_runs
def get(self): args = self.reqparse.parse_args() regex_id = args['regex_id'] post = Regex.query.outerjoin( Rating, Regex.id == Rating.regex_id).add_columns( func.count(Rating.regex_id).label('views'), func.avg(Rating.mark).label('avgmark')).filter( Regex.id == regex_id).group_by(Regex.id).order_by( func.count(Rating.regex_id).desc(), func.avg(Rating.mark).desc()).first() post, views, avg_mark = post[0], post[1], post[2] user = User.query.get_or_404(post.author_id) return post.to_dict(author=user.username, views=views, avg_mark=float(avg_mark) if avg_mark else 0), 200
def post(self): args = self.reqparse.parse_args() regex = args['regex'] posts = Regex.query.outerjoin( Rating, Regex.id == Rating.regex_id ).add_columns( func.count(Rating.regex_id).label('views'), func.avg(func.coalesce(Rating.mark, 0)).label('avgmark') ).filter( Regex.expression.like(f'{regex}%') ).group_by( Regex.id ).order_by( func.count(Rating.regex_id).desc(), func.avg(func.coalesce(Rating.mark, 0)).desc() ).all() return [post.to_dict(views=views, avg_mark=float(avgmark)) for post, views, avgmark in posts], 200
def get(self): args = self.reqparse.parse_args() user_id = args['user_id'] views = func.count(Rating.regex_id).label('views') avgmark = func.avg(func.coalesce(Rating.mark, 0)).label('avgmark') posts = db.session.query(Regex, views, avgmark).outerjoin( Rating, Regex.id == Rating.regex_id).group_by(Regex.id).subquery() user_posts = db.session.query(Rating.mark, posts).join( posts, posts.c.id == Rating.regex_id).filter( Rating.user_id == user_id).order_by(desc( posts.c.views), desc(posts.c.avgmark)).all() return [{ 'id': regex_id, 'expression': regex_expression, 'explanation': regex_explanation, 'date': regex_date, 'author_id': regex_author_id, 'views': regex_views, 'avg_mark': regex_avgmark, 'user_mark': user_mark } for user_mark, regex_id, regex_expression, regex_explanation, regex_date, regex_author_id, regex_views, regex_avgmark in user_posts]
def selectables(cls, bag, agg_spec): """ Create a list of statements from spec :type bag: mongosql.bag.ModelPropertyBags :rtype: list[sqlalchemy.sql.elements.ColumnElement] """ # TODO: calculation expressions for selection: http://docs.mongodb.org/manual/meta/aggregation-quick-reference/ selectables = [] for comp_field, comp_expression in agg_spec.items(): # Column reference if isinstance(comp_expression, basestring): selectables.append(bag.columns[comp_expression].label(comp_field)) continue # Computed expression assert isinstance(comp_expression, dict), 'Aggregate: Expression should be either a column name, or an object' assert len(comp_expression) == 1, 'Aggregate: expression can only contain a single operator' operator, expression = comp_expression.popitem() # Expression statement if isinstance(expression, int) and operator == '$sum': # Special case for count expression_stmt = expression elif isinstance(expression, basestring): # Column name expression_stmt = bag.columns[expression] # Json column? if bag.columns.is_column_json(expression): # PostgreSQL always returns text values from it, and for aggregation we usually need numbers :) expression_stmt = cast(expression_stmt, Float) elif isinstance(expression, dict): # Boolean expression expression_stmt = MongoCriteria.statement(bag, expression) # Need to cast it to int expression_stmt = cast(expression_stmt, Integer) else: raise AssertionError('Aggregate: expression should be either a column name, or an object') # Operator if operator == '$max': comp_stmt = func.max(expression_stmt) elif operator == '$min': comp_stmt = func.min(expression_stmt) elif operator == '$avg': comp_stmt = func.avg(expression_stmt) elif operator == '$sum': if isinstance(expression_stmt, int): # Special case for count comp_stmt = func.count() if expression_stmt != 1: comp_stmt *= expression_stmt else: comp_stmt = func.sum(expression_stmt) else: raise AssertionError('Aggregate: unsupported operator "{}"'.format(operator)) # Append selectables.append(comp_stmt.label(comp_field)) return selectables
def get(self): args = self.reqparse.parse_args() limit_by, offset = args['limit_by'], args['offset'] posts = Regex.query.outerjoin( Rating, Regex.id == Rating.regex_id).add_columns( func.count(Rating.regex_id).label('views'), func.avg(Rating.mark).label('avgmark')).group_by( Regex.id).order_by( func.count(Rating.regex_id).desc(), func.avg(Rating.mark).desc()).all( ) # .limit(limit_by).offset(0 + limit_by * offset) return [ post.to_dict(views=views, avg_mark=float(avgmark)) if avgmark else post.to_dict(views=views, avg_mark=0) for post, views, avgmark in posts ], 200
def get(self): args = self.reqparse.parse_args() limit_by, offset, author_id = args['limit_by'], args['offset'], args['author_id'] u = User.query.get_or_404(author_id) posts = Regex.query.outerjoin( Rating, Regex.id == Rating.regex_id ).add_columns( func.count(Rating.regex_id).label('views'), func.avg(func.coalesce(Rating.mark, 0)).label('avgmark') ).filter( Regex.author_id == u.id ).group_by( Regex.id ).order_by( func.count(Rating.regex_id).desc(), func.avg(func.coalesce(Rating.mark, 0)).desc() ).all() return [post.to_dict(views=views, avg_mark=float(avgmark)) for post, views, avgmark in posts], 200
def query_aggregate(self, ctx, drills, cuts, limit=5000): mappings = self.sql_mappings(ctx) joins = self.sql_joins(ctx, None) pk = self.pk(ctx) connection = self.sqltable.connection.connection() engine = self.sqltable.connection._engine # Build query Session = sessionmaker() Session.configure(bind=engine) session = Session() q = session.query() #q = q.add_columns(self.sqltable.sa_table.columns['is_bot_id'].label("x")) #q = q.add_entity(self.sqltable.sa_table) # Include measures for measure in [m for m in mappings if isinstance(m.field, Measure)]: sa_column = self.sqltable.sa_table.columns[measure.sqlcolumn.name] q = q.add_columns(func.avg(sa_column).label(measure.field.name + "_avg")) q = q.add_columns(func.sum(sa_column).label(measure.field.name + "_sum")) q = q.add_columns(func.count(self.sqltable.sa_table).label("record_count")) # Drills for dimension in [m for m in mappings if isinstance(m.field, Dimension)]: # We shoulld check the dimension-path here, with drills, and use key/lookup for drill if dimension.field.name in drills: sa_column = None try: sa_column = self.sqltable.sa_table.columns[dimension.sqlcolumn.name] except KeyError as e: raise ETLException("Unknown column in backend SQL table (table=%s, column=%s). Columns: %s" % (self.sqltable.sa_table, dimension.sqlcolumn.name, [c.name for c in self.sqltable.sa_table.columns])) q = q.add_columns(sa_column) q = q.group_by(sa_column) # Cuts # TODO: Filterng on any dimension attribute, not only the key # (ie filter cities with type A or icon B), but then again # that could be a different (nested) dimension. for dimension in [m for m in mappings if isinstance(m.field, Dimension)]: # We shoulld check the dimension-path here, with drills if dimension.field.name in cuts.keys(): sa_column = self.sqltable.sa_table.columns[dimension.sqlcolumn.name] cut_value = cuts[dimension.field.name] q = q.filter(sa_column==cut_value) # Limit q = q.limit(5000) statement = q.statement logger.debug("Statement: %s", str(statement).replace("\n", " ")) rows = connection.execute(statement).fetchall() return rows
def average_distance_travelled(self, start_date, end_date): submission_subclass = CompostSalesRegister query = (DBSession.query( func.avg(submission_subclass.json_data[ submission_subclass.DISTANCE_TRAVELLED].cast(Float))).join( MunicipalitySubmission, (MunicipalitySubmission.submission_id == submission_subclass.id)). filter(MunicipalitySubmission.municipality == self).filter( submission_subclass.status == Submission.APPROVED).filter( and_(Submission.date >= start_date, Submission.date <= end_date))) return query.first()[0]
def get(self): args = self.reqparse.parse_args() token, limit_by, offset, author_id = args['token'], args[ 'limit_by'], args['offset'], args['author_id'] if int(r[token]) != author_id: abort(403) u = User.query.get_or_404(author_id) posts = Regex.query.outerjoin( Rating, Regex.id == Rating.regex_id).add_columns( func.count(Rating.regex_id).label('views'), func.avg(Rating.mark).label('avgmark')).filter( Regex.author_id == u.id).group_by(Regex.id).order_by( func.count(Rating.regex_id).desc(), func.avg(Rating.mark).desc()).all() return [ post.to_dict(views=views, avg_mark=float(avgmark), author=u.username) if avgmark else post.to_dict( views=views, avg_mark=0, author=u.username) for post, views, avgmark in posts ], 200
def index(): # How many items? items = request.args.get('items', 10) test_runs = TestRun.query.filter_by(waved=False).join( OperatingSystem).filter().order_by( TestRun.timestamp.desc(), TestRun.name.desc(), OperatingSystem.major_version.desc() ).limit(items) rows = [] for row in test_runs: rows.append([row.name, row.passed, row.failed, row.skipped]) rows.reverse() # Average numbers avg_rows = db.session.query( TestRun.name, func.avg(TestRun.passed).label('passed'), func.avg(TestRun.failed).label('failed'), func.avg(TestRun.skipped).label('skipped'), ).group_by( TestRun.name).order_by( TestRun.timestamp.desc(), TestRun.name.desc(), ).limit(items) avg_data = [] for row in avg_rows: avg_data.append([row.name, row.passed, row.failed, row.skipped]) avg_data.reverse() return render_template( 'index.html', avg_data=json.dumps(avg_data), data=json.dumps(rows), test_runs=test_runs, )
def api_property_search(context, request): street_number = request.GET.get("street_number") street_name = request.GET.get("street_name") city = request.GET.get("city") state = request.GET.get("state") zip = request.GET.get("zip") q = DBSession.query(Property, func.avg(Review.rating_average)) if street_number is not None: q = q.filter(Property.street_number == int(street_number)) if street_name is not None: q = q.filter(func.lower(Property.street_name).like("%%%s%%" % street_name.lower())) if city is not None: q = q.filter(func.lower(Property.city).like("%%%s%%" % city.lower())) if state is not None: q = q.filter(func.lower(Property.state).like("%%%s%%" % state.lower())) if zip is not None: q = q.filter(Property.zip == zip) q = q.outerjoin(Property.reviews) q = q.group_by(Property.id) q = q.limit(7) results = [] for property, avg in q.all(): if avg is not None: property.overall = int(avg) else: property.overall = None results.append(property) return results
def compile(self): # Json column? if self.is_column_json: # PostgreSQL always returns text values from it, and for aggregation we usually need numbers :) column = cast(self.column, Float) else: # Simply use column = self.column # Now, handle the operator, and apply it to the expression if self.operator == '$max': stmt = func.max(column) elif self.operator == '$min': stmt = func.min(column) elif self.operator == '$avg': stmt = func.avg(column) elif self.operator == '$sum': stmt = func.sum(column) else: raise InvalidQueryError( 'Aggregate: unsupported operator "{}"'.format(self.operator)) return self.labeled_expression(stmt)
def get(self): args = self.reqparse.parse_args() token = args['token'] user_id = int(r[token]) views = func.count(Rating.regex_id).label('views') avgmark = func.avg(Rating.mark).label('avgmark') posts = db.session.query(Regex, views, avgmark).outerjoin( Rating, Regex.id == Rating.regex_id).group_by(Regex.id).subquery() user_posts = db.session.query(Rating.mark, posts).join( posts, posts.c.id == Rating.regex_id).filter( Rating.user_id == user_id).order_by(desc( posts.c.views), desc(posts.c.avgmark)).all() return [{ 'id': regex_id, 'expression': regex_expression, 'explanation': regex_explanation, 'date': str(regex_date), 'views': regex_views, 'avg_mark': float(regex_avgmark) if regex_avgmark else 0, 'user_mark': user_mark } for user_mark, regex_id, regex_expression, regex_explanation, regex_date, _, regex_views, regex_avgmark in user_posts], 200
def density_of_rejects_from_sieving(self, start_date, end_date): query = self.get_report_query( MonthlyRejectsDensity, start_date, end_date, func.avg(Report.report_json['density'].cast(Float))) return query.first()[0]
def conversion_factor_mature_to_sieved(self, start_date, end_date): query = self.get_report_query( MonthlyRejectsComposition, start_date, end_date, func.avg(Report.report_json['conversion_factor'].cast(Float))) return query.first()[0]
def density_of_mature_compost(self, start_date, end_date): query = self.get_report_query( MonthlyRejectsComposition, start_date, end_date, func.avg( Report.report_json['density_of_mature_compost'].cast(Float))) return query.first()[0]
def density_of_msw(self, start_date, end_date): query = self.get_report_query( DailyWaste, start_date, end_date, func.avg(Report.report_json['density'].cast(Float))) return query.first()[0]
def selectables(cls, bag, agg_spec): """ Create a list of statements from spec :type bag: mongosql.bag.ModelPropertyBags :rtype: list[sqlalchemy.sql.elements.ColumnElement] """ # TODO: calculation expressions for selection: http://docs.mongodb.org/manual/meta/aggregation-quick-reference/ selectables = [] for comp_field, comp_expression in agg_spec.items(): # Column reference if isinstance(comp_expression, string_types): selectables.append( bag.columns[comp_expression].label(comp_field)) continue # Computed expression assert isinstance( comp_expression, dict ), 'Aggregate: Expression should be either a column name, or an object' assert len( comp_expression ) == 1, 'Aggregate: expression can only contain a single operator' operator, expression = comp_expression.popitem() # Expression statement if isinstance(expression, int) and operator == '$sum': # Special case for count expression_stmt = expression elif isinstance(expression, string_types): # Column name expression_stmt = bag.columns[expression] # Json column? if bag.columns.is_column_json(expression): # PostgreSQL always returns text values from it, and for aggregation we usually need numbers :) expression_stmt = cast(expression_stmt, Float) elif isinstance(expression, dict): # Boolean expression expression_stmt = MongoCriteria.statement(bag, expression) # Need to cast it to int expression_stmt = cast(expression_stmt, Integer) else: raise AssertionError( 'Aggregate: expression should be either a column name, or an object' ) # Operator if operator == '$max': comp_stmt = func.max(expression_stmt) elif operator == '$min': comp_stmt = func.min(expression_stmt) elif operator == '$avg': comp_stmt = func.avg(expression_stmt) elif operator == '$sum': if isinstance(expression_stmt, int): # Special case for count comp_stmt = func.count() if expression_stmt != 1: comp_stmt *= expression_stmt else: comp_stmt = func.sum(expression_stmt) else: raise AssertionError( 'Aggregate: unsupported operator "{}"'.format(operator)) # Append selectables.append(comp_stmt.label(comp_field)) return selectables
def init_list_analysis(list_id, list_name, count, open_rate, api_key, data_center, user_email): # Try to pull the list stats from database existing_list = (ListStats.query. filter_by(list_id=list_id).first()) # Placeholder for list stats stats = None if existing_list is None: stats = import_analyze_store_list(list_id, count, open_rate, api_key, data_center, user_email) else: # Get list stats from database results # Deserialize the histogram data stats = {'subscribers': existing_list.subscribers, 'open_rate': existing_list.open_rate, 'hist_bin_counts': json.loads(existing_list.hist_bin_counts), 'subscribed_pct': existing_list.subscribed_pct, 'unsubscribed_pct': existing_list.unsubscribed_pct, 'cleaned_pct': existing_list.cleaned_pct, 'pending_pct': existing_list.pending_pct, 'high_open_rt_pct': existing_list.high_open_rt_pct, 'cur_yr_inactive_pct': existing_list.cur_yr_inactive_pct} # Log that the request occured current_user = AppUser(user_email=user_email, list_id=list_id) db.session.add(current_user) db.session.commit() # Generate averages avg_stats = db.session.query(func.avg(ListStats.subscribers), func.avg(ListStats.open_rate), func.avg(ListStats.subscribed_pct), func.avg(ListStats.unsubscribed_pct), func.avg(ListStats.cleaned_pct), func.avg(ListStats.pending_pct), func.avg(ListStats.high_open_rt_pct), func.avg(ListStats.cur_yr_inactive_pct)).first() # Generate charts # Using OrderedDict (for now) as Pygal occasionally seems to break with # The Python 3.5 dictionary standard which preserves order by default # Export them as pngs to /charts list_size_chart = BarChart('Chart A: List Size vs. ' 'Database Average (Mean)', OrderedDict([ ('Your List', [stats['subscribers']]), ('Average (Mean)', [avg_stats[0]])]), percentage=False) list_size_chart.render_png(list_id + '_size') list_breakdown_chart = BarChart('Chart B: List Composition vs. ' 'Database Average (Mean)', OrderedDict([ ('Subscribed %', [stats['subscribed_pct'], avg_stats[2]]), ('Unsubscribed %', [stats['unsubscribed_pct'], avg_stats[3]]), ('Cleaned %', [stats['cleaned_pct'], avg_stats[4]]), ('Pending %', [stats['pending_pct'], avg_stats[5]])]), x_labels=('Your List', 'Average (Mean)')) list_breakdown_chart.render_png(list_id + '_breakdown') open_rate_chart = BarChart('Chart C: List Open Rate vs. ' 'Database Average (Mean)', OrderedDict([ ('Your List', [stats['open_rate']]), ('Average (Mean)', [avg_stats[1]])])) open_rate_chart.render_png(list_id + '_open_rate') open_rate_hist_chart = Histogram('Chart D: Distribution of ' 'User Unique Open Rates', OrderedDict([ ('Your List', stats['hist_bin_counts'])])) open_rate_hist_chart.render_png(list_id + '_open_rate_histogram') high_open_rt_pct_chart = BarChart('Chart E: Percentage of ' 'Subscribers with User Unique Open Rate >80% vs. ' 'Database Average (Mean)', OrderedDict([ ('Your List', [stats['high_open_rt_pct']]), ('Average (Mean)', [avg_stats[6]])])) high_open_rt_pct_chart.render_png(list_id + '_high_open_rt') cur_yr_member_pct_chart = BarChart('Chart F: Percentage of Subscribers ' 'who did not Open in last 365 Days vs. Database Average (Mean)', OrderedDict([ ('Your List', [stats['cur_yr_inactive_pct']]), ('Average (Mean)', [avg_stats[7]])])) cur_yr_member_pct_chart.render_png(list_id + '_cur_yr_inactive_pct') # Send charts as an email report # Due to the way Flask-Mail works, reimport app_context first with app.app_context(): msg = Message('Your Email Benchmarking Report is Ready!', sender='*****@*****.**', recipients=[user_email], html=render_template('report_email.html', title='We\'ve analyzed the {} List!'.format(list_name), list_id=list_id)) mail.send(msg)
def send_report(stats, list_id, list_name, user_email_or_emails): """Generates charts using Plotly and emails them to the user. Args: stats: a dictionary containing analysis results for a list. list_id: the list's unique MailChimp id. list_name: the list's name. user_email_or_emails: a list of emails to send the report to. """ # This subquery generates the most recent stats # For each unique list_id in the database # Where store_aggregates is True subquery = ListStats.query.filter( ListStats.list.has(store_aggregates=True)).order_by( 'list_id', desc('analysis_timestamp')).distinct(ListStats.list_id).subquery() # Generate aggregates within the subquery agg_stats = db.session.query( func.avg(subquery.columns.subscribers), func.avg(subquery.columns.subscribed_pct), func.avg(subquery.columns.unsubscribed_pct), func.avg(subquery.columns.cleaned_pct), func.avg(subquery.columns.pending_pct), func.avg(subquery.columns.open_rate), func.avg(subquery.columns.high_open_rt_pct), func.avg(subquery.columns.cur_yr_inactive_pct)).first() # Make sure we have no 'None' values agg_stats = [agg if agg else 0 for agg in agg_stats] # Convert subscribers average to an integer agg_stats[0] = int(agg_stats[0]) # Generate epoch time (to get around image caching in webmail) epoch_time = str(int(time.time())) # Generate charts draw_bar(['Your List', 'Dataset Average'], [stats['subscribers'], agg_stats[0]], 'Chart A: List Size', list_id + '_size_' + epoch_time) draw_stacked_horizontal_bar( ['Dataset Average', 'Your List'], [('Subscribed %', [agg_stats[1], stats['subscribed_pct']]), ('Unsubscribed %', [agg_stats[2], stats['unsubscribed_pct']]), ('Cleaned %', [agg_stats[3], stats['cleaned_pct']]), ('Pending %', [agg_stats[4], stats['pending_pct']])], 'Chart B: List Composition', list_id + '_breakdown_' + epoch_time) draw_bar(['Your List', 'Dataset Average'], [stats['open_rate'], agg_stats[5]], 'Chart C: List Open Rate', list_id + '_open_rate_' + epoch_time, percentage_values=True) histogram_legend_uri = ('https://s3-us-west-2.amazonaws.com/email-' 'benchmarking-imgs/open_rate_histogram_legend.png') draw_histogram( { 'title': 'Open Rate by Decile', 'vals': np.linspace(.05, .95, num=10) }, { 'title': 'Subscribers', 'vals': stats['hist_bin_counts'] }, 'Chart D: Distribution of Subscribers by Open Rate', histogram_legend_uri, list_id + '_open_rate_histogram_' + epoch_time) draw_donuts(['Open Rate >80%', 'Open Rate <=80%'], [ ('Your List', [stats['high_open_rt_pct'], 1 - stats['high_open_rt_pct']]), ('Dataset Average', [agg_stats[6], 1 - agg_stats[6]]) ], 'Chart E: Percentage of Subscribers with User Unique Open Rate >80%', list_id + '_high_open_rt_pct_' + epoch_time) draw_donuts( ['Inactive in Past 365 Days', 'Active in Past 365 Days'], [('Your List', [stats['cur_yr_inactive_pct'], 1 - stats['cur_yr_inactive_pct']]), ('Dataset Average', [agg_stats[7], 1 - agg_stats[7]])], 'Chart F: Percentage of Subscribers who did not Open ' 'in last 365 Days', list_id + '_cur_yr_inactive_pct_' + epoch_time) # Send charts as an email report send_email('Your Email Benchmarking Report is Ready!', user_email_or_emails, 'report-email.html', { 'title': 'We\'ve analyzed the {} list!'.format(list_name), 'list_id': list_id, 'epoch_time': epoch_time }, configuration_set_name=(os.environ.get('SES_CONFIGURATION_SET') or None))
def query_aggregate(self, ctx, drills, cuts, limit=5000): mappings = self.sql_mappings(ctx) joins = self.sql_joins(ctx, None) pk = self.pk(ctx) connection = self.sqltable.connection.connection() engine = self.sqltable.connection._engine # Build query Session = sessionmaker() Session.configure(bind=engine) session = Session() q = session.query() #q = q.add_columns(self.sqltable.sa_table.columns['is_bot_id'].label("x")) #q = q.add_entity(self.sqltable.sa_table) # Include measures for measure in [m for m in mappings if isinstance(m.field, Measure)]: sa_column = self.sqltable.sa_table.columns[measure.sqlcolumn.name] q = q.add_columns( func.avg(sa_column).label(measure.field.name + "_avg")) q = q.add_columns( func.sum(sa_column).label(measure.field.name + "_sum")) q = q.add_columns( func.count(self.sqltable.sa_table).label("record_count")) # Drills for dimension in [ m for m in mappings if isinstance(m.field, Dimension) ]: # We shoulld check the dimension-path here, with drills, and use key/lookup for drill if dimension.field.name in drills: sa_column = None try: sa_column = self.sqltable.sa_table.columns[ dimension.sqlcolumn.name] except KeyError as e: raise ETLException( "Unknown column in backend SQL table (table=%s, column=%s). Columns: %s" % (self.sqltable.sa_table, dimension.sqlcolumn.name, [c.name for c in self.sqltable.sa_table.columns])) q = q.add_columns(sa_column) q = q.group_by(sa_column) # Cuts # TODO: Filterng on any dimension attribute, not only the key # (ie filter cities with type A or icon B), but then again # that could be a different (nested) dimension. for dimension in [ m for m in mappings if isinstance(m.field, Dimension) ]: # We shoulld check the dimension-path here, with drills if dimension.field.name in cuts.keys(): sa_column = self.sqltable.sa_table.columns[ dimension.sqlcolumn.name] cut_value = cuts[dimension.field.name] q = q.filter(sa_column == cut_value) # Limit q = q.limit(5000) statement = q.statement logger.debug("Statement: %s", str(statement).replace("\n", " ")) rows = connection.execute(statement).fetchall() return rows