def chart_query(user_id, filter_name): """charts filter""" filter_dict = { 'months': ('month', '%b %Y'), 'weeks': ('week', '%D'), 'days': ('day', '%D'), } trunc_type, time_format = filter_dict[filter_name] line_chart = (db.session.query( func.date_trunc(trunc_type, Water.time_updated), func.sum(Water.ounces)).group_by( func.date_trunc(trunc_type, Water.time_updated)).order_by( func.date_trunc(trunc_type, Water.time_updated)).filter( Water.user_id == user_id).all()) prev_date_time = None time_parameter = [] qty = [] for item in line_chart: if prev_date_time != None: cur_date = next_date(prev_date_time, trunc_type) while cur_date < item[0]: time_parameter.append(cur_date.strftime(time_format)) qty.append(0) cur_date = next_date(cur_date, trunc_type) time_parameter.append(item[0].strftime(time_format)) qty.append(item[1]) prev_date_time = item[0] return time_parameter, qty
def get(self, short): db = backend.Backend.instance().get_session() try: short_uri = db.query(models.ShortURI)\ .filter(models.ShortURI.short == short)\ .one() hits = db.query(func.date_trunc('day', models.Hit.created), func.count())\ .filter(models.Hit.short_id == short_uri.id)\ .group_by(func.date_trunc('day', models.Hit.created))\ .limit(100) params = { "short_uri": short_uri, "hits": hits } self.jinja_render("info.html", **params) self.finish() except NoResultFound: self.set_status(404) self.jinja_render("404.html") self.finish() finally: db.close()
def fetch_monthly_billing_for_year(service_id, year): year_start_datetime, year_end_datetime = get_financial_year(year) year_start_date = convert_utc_to_bst(year_start_datetime).date() year_end_date = convert_utc_to_bst(year_end_datetime).date() today = convert_utc_to_bst(datetime.utcnow()).date() # if year end date is less than today, we are calculating for data in the past and have no need for deltas. if year_end_date >= today: yesterday = today - timedelta(days=1) for day in [yesterday, today]: data = fetch_billing_data_for_day(process_day=day, service_id=service_id) for d in data: update_fact_billing(data=d, process_day=day) email_and_letters = db.session.query( func.date_trunc('month', FactBilling.bst_date).cast(Date).label("month"), func.sum(FactBilling.notifications_sent).label("notifications_sent"), func.sum(FactBilling.notifications_sent).label("billable_units"), FactBilling.rate.label('rate'), FactBilling.notification_type.label('notification_type'), FactBilling.postage ).filter( FactBilling.service_id == service_id, FactBilling.bst_date >= year_start_date, FactBilling.bst_date <= year_end_date, FactBilling.notification_type.in_([EMAIL_TYPE, LETTER_TYPE]) ).group_by( 'month', FactBilling.rate, FactBilling.notification_type, FactBilling.postage ) sms = db.session.query( func.date_trunc('month', FactBilling.bst_date).cast(Date).label("month"), func.sum(FactBilling.notifications_sent).label("notifications_sent"), func.sum(FactBilling.billable_units * FactBilling.rate_multiplier).label("billable_units"), FactBilling.rate, FactBilling.notification_type, FactBilling.postage ).filter( FactBilling.service_id == service_id, FactBilling.bst_date >= year_start_date, FactBilling.bst_date <= year_end_date, FactBilling.notification_type == SMS_TYPE ).group_by( 'month', FactBilling.rate, FactBilling.notification_type, FactBilling.postage ) yearly_data = email_and_letters.union_all(sms).order_by( 'month', 'notification_type', 'rate' ).all() return yearly_data
def _get_route_metrics(session, args): # Protocol dashboard optimization: # If we're in 'simple' mode (only asking for day/month bucket, no path or query string), # query the corresponding matview instead of hitting the DB. is_simple_args = (args.get('path') == "" and args.get('query_string') == None and args.get('start_time') and args.get('exact') == False and args.get('version') == None) bucket_size = args.get('bucket_size') if is_simple_args and bucket_size in ["day", "month"]: query = None if bucket_size == "day": # subtract 1 day from the start_time so that the last day is fully complete query = (session.query(RouteMetricsDayMatview).filter( RouteMetricsDayMatview.time > (args.get('start_time') - timedelta(days=1)))) else: query = (session.query(RouteMetricsMonthMatview).filter( RouteMetricsMonthMatview.time > (args.get('start_time')))) query = (query.order_by(desc('time')).limit(args.get('limit')).all()) metrics = list(map(_make_metrics_tuple, query)) return metrics metrics_query = (session.query( func.date_trunc(args.get('bucket_size'), RouteMetrics.timestamp).label('timestamp'), func.sum(RouteMetrics.count).label('count'), func.count(RouteMetrics.ip.distinct()).label('unique_count')).filter( RouteMetrics.timestamp > args.get('start_time'))) if args.get("exact") == True: metrics_query = (metrics_query.filter( RouteMetrics.route_path == args.get("path"))) else: metrics_query = (metrics_query.filter( RouteMetrics.route_path.like('{}%'.format(args.get("path"))))) if args.get("query_string", None) != None: metrics_query = (metrics_query.filter( or_( RouteMetrics.query_string.like('%{}'.format( args.get("query_string"))), RouteMetrics.query_string.like('%{}&%'.format( args.get("query_string")))))) metrics_query = (metrics_query.group_by( func.date_trunc(args.get('bucket_size'), RouteMetrics.timestamp)).order_by( desc('timestamp')).limit(args.get('limit'))) metrics = metrics_query.all() metrics = [{ 'timestamp': int(time.mktime(m[0].timetuple())), 'count': m[1], 'unique_count': m[2], } for m in metrics] return metrics
def day_report(session, aid, date_from=None, date_to=None): q = session.query(func.date_trunc('day', Transaction.date), Destination.direction, func.sum(Transaction.amount))\ .join(Transaction.accounts)\ .filter(Destination.account == aid)\ .filter(Transaction.canceled == False) if date_from: q = q.filter(Transaction.date >= date_from) if date_to: q = q.filter(Transaction.date < date_to) result = q.group_by(func.date_trunc('day', Transaction.date), Destination.direction) data = [] kredit = debet = 0 last_data = None for r in result: if last_data is not None and last_data != r[0]: data.append((last_data, Balance(debet, kredit))) kredit = debet = 0 last_data = r[0] if r[1]: debet = r[2] else: kredit = r[2] data.append((last_data, Balance(debet, kredit))) return data
def _get_app_name_metrics(session, app_name, args): metrics = ( session.query( func.date_trunc(args.get("bucket_size"), AppNameMetrics.timestamp).label( "timestamp" ), func.sum(AppNameMetrics.count).label("count"), func.count(AppNameMetrics.ip.distinct()).label("unique_count"), ) .filter( AppNameMetrics.application_name == app_name, AppNameMetrics.timestamp > args.get("start_time"), ) .group_by(func.date_trunc(args.get("bucket_size"), AppNameMetrics.timestamp)) .order_by(desc("timestamp")) .limit(args.get("limit")) .all() ) metrics = [ { "timestamp": int(time.mktime(m[0].timetuple())), "count": m[1], "unique_count": m[2], } for m in metrics ] return metrics
def conflicting_activities(self): if not isinstance(self.model, Period): return None session = self.request.session mindate = self.execution_start.data maxdate = self.execution_end.data if not (mindate and maxdate): return None # turn naive utc to aware utc to local timezone start = OccasionDate.start.op('AT TIME ZONE')(literal('UTC')) start = start.op('AT TIME ZONE')(OccasionDate.timezone) end = OccasionDate.end.op('AT TIME ZONE')(literal('UTC')) end = end.op('AT TIME ZONE')(OccasionDate.timezone) qd = session.query(OccasionDate) qd = qd.with_entities(OccasionDate.occasion_id) qd = qd.filter( or_( func.date_trunc('day', start) < mindate, func.date_trunc('day', start) > maxdate, func.date_trunc('day', end) < mindate, func.date_trunc('day', end) > maxdate)) q = session.query(OccasionDate).join(Occasion) q = q.with_entities(distinct(Occasion.activity_id)) q = q.filter(Occasion.period == self.model) q = q.filter(Occasion.id.in_(qd.subquery())) return tuple( session.query(Activity).filter(Activity.id.in_(q.subquery())))
def get_interval(self, start_date=None, end_date=None, product=None, interval='month'): query = db.session.query(func.sum(Record.quantity), Product.id, func.date_trunc(interval, Record.date)) query = query.join(Product).join(Account).filter(Account.id==self.id) # query.filter(Record.date>=func.cast('2013-10-15', sa.types.Date)).filter(Record.date<func.cast('2013-10-15', sa.types.Date)+func.cast('1 month', sa.types.Interval)) if start_date: query = query.filter(Record.date>=start_date) if end_date: query = query.filter(Record.date<=end_date) if product: query = query.filter(Product.id==product.id) query = query.group_by(Product.id).group_by(func.date_trunc(interval, Record.date)) try: ret_val = [] products = {} for x in query.all(): # print "foo", x if x[1] in products: prod = products[x[1]] else: prod = products[x[1]] = Product.query.filter(Product.id==x[1]).one() rates = prod.rate_pricing(x[0]) ret_val.append({ 'product_id': prod.id, 'quantity': x[0], 'date': x[2].strftime('%Y-%m'), 'rates': rates, 'price': sum([r['price_at_quantity'] for r in rates]) }) return ret_val except NoResultFound, e: return []
def traffic_history_query(): events = (select(func.sum(TrafficVolume.amount).label('amount'), literal_column('day'), cast(TrafficVolume.type, TEXT).label('type') ) .select_from( func.generate_series( func.date_trunc('day', literal_column('arg_start')), func.date_trunc('day', literal_column('arg_end')), '1 day' ).alias('day') .outerjoin(TrafficVolume.__table__, and_( func.date_trunc('day', TrafficVolume.timestamp) == literal_column('day'), TrafficVolume.user_id == literal_column('arg_user_id')) ) ) .group_by(literal_column('day'), literal_column('type')) ).cte() events_ingress = select(events).where(or_(events.c.type == 'Ingress', events.c.type == None)).cte() events_egress = select(events).where(or_(events.c.type == 'Egress', events.c.type == None)).cte() hist = (select(func.coalesce(events_ingress.c.day, events_egress.c.day).label('timestamp'), events_ingress.c.amount.label('ingress'), events_egress.c.amount.label('egress')) .select_from(events_ingress.join(events_egress, events_ingress.c.day == events_egress.c.day, full=true)) .order_by(literal_column('timestamp')) ) return hist
def get_approved_names_counter(cls): auto_approved_names_counter = db.session.query( func.count(Event.id).label('approvedNamesCounter'))\ .filter(Event.action == Event.PATCH + 'Payment Completed')\ .filter(Event.userId == EventUserId.SERVICE_ACCOUNT.value)\ .filter(Event.stateCd.in_(('APPROVED','CONDITIONAL')))\ .filter(func.date_trunc('day', Event.eventDate) == func.date_trunc('day', func.now()))\ .all() return auto_approved_names_counter.pop()
def V_A(): result = db.session.query( func.date_trunc('decade', Patient.birth_datetime), func.count(Visit.person_id)).filter( Patient.person_id == Visit.person_id).group_by( func.date_trunc('decade', Patient.birth_datetime)).order_by( func.date_trunc('decade', Patient.birth_datetime)).all() print(result) return render_template('visit/age.html', results=result)
def get_approved_names_counter(cls): auto_approved_names_counter = db.session.query( func.count(Event.id).label('approvedNamesCounter')).filter( Event.action == EventAction.PUT.value, Event.userId == EventUserId.SERVICE_ACCOUNT.value, Event.stateCd == EventState.APPROVED.value, func.date_trunc('day', Event.eventDate) == func.date_trunc( 'day', func.now())).all() return auto_approved_names_counter.pop()
def api_private_reports_per_day(): q = current_app.db_session.query( func.count(func.date_trunc('day', Report.test_start_time)), func.date_trunc('day', Report.test_start_time)).group_by( func.date_trunc('day', Report.test_start_time)).order_by( func.date_trunc('day', Report.test_start_time)) result = [] for count, date in q: result.append({'count': count, 'date': date.strftime("%Y-%m-%d")}) return jsonify(result)
def query(self): q = select([func.date_trunc(self.group, LogItem.log_date).label('day'), func.count().label('visitCount'), func.count(LogItem.log_host.distinct()).label('uniqueVisitCount')], and_(LogItem.log_hebpk == self.hebPk, LogItem.log_date.between(self.minDate, self.maxDate))) q = q.group_by(func.date_trunc(self.group, LogItem.log_date)) q = q.order_by(func.date_trunc(self.group, LogItem.log_date)) return q
def query_current_year(self, session): self.event_name = c.EVENT_NAME_AND_YEAR # TODO: we're hacking the timezone info out of ESCHATON (final day of event). probably not the right thing to do self.end_date = c.DATES['ESCHATON'].replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) # return registrations where people actually paid money # exclude: dealers reg_per_day = session.query( func.date_trunc(literal('day'), Attendee.registered), func.count(func.date_trunc(literal('day'), Attendee.registered)) ) \ .outerjoin(Attendee.group) \ .filter( ( (Attendee.group_id != None) & (Attendee.paid == c.PAID_BY_GROUP) & # if they're paid by group (Group.tables == 0) & # make sure they aren't dealers (Group.amount_paid > 0) # make sure they've paid something ) | ( # OR (Attendee.paid == c.HAS_PAID) # if they're an attendee, make sure they're fully paid ) ) \ .group_by(func.date_trunc(literal('day'), Attendee.registered)) \ .order_by(func.date_trunc(literal('day'), Attendee.registered)) \ .all() # noqa: E711 # now, convert the query's data into the format we need. # SQL will skip days without registrations # we need all self.num_days_to_report days to have data, even if it's zero # create 365 elements in the final array self.registrations_per_day = self.num_days_to_report * [0] for reg_data in reg_per_day: day = reg_data[0] reg_count = reg_data[1] day_offset = self.num_days_to_report - (self.end_date - day).days day_index = day_offset - 1 if day_index < 0 or day_index >= self.num_days_to_report: log.info( "Ignoring some analytics data because it's not in range of the year before c.ESCHATON. " "Either c.ESCHATON is set incorrectly or you have registrations starting 1 year before ESCHATON, " "or occuring after ESCHATON. day_index=" + str(day_index)) continue self.registrations_per_day[day_index] = reg_count self.compute_cumulative_sum_from_registrations_per_day()
def timeseries(self, agg_unit, start, end, geom=None, column_filters=None): # Reading this blog post # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html # inspired this implementation. t = self.point_table # Special case for the 'quarter' unit of aggregation. step = '3 months' if agg_unit == 'quarter' else '1 ' + agg_unit # Create a CTE to represent every time bucket in the timeseries # with a default count of 0 day_generator = func.generate_series(func.date_trunc(agg_unit, start), func.date_trunc(agg_unit, end), step) defaults = select([sa.literal_column("0").label('count'), day_generator.label('time_bucket')]) \ .alias('defaults') where_filters = [t.c.point_date >= start, t.c.point_date <= end] if column_filters is not None: # Column filters has to be iterable here, because the '+' operator # behaves differently for SQLAlchemy conditions. Instead of # combining the conditions together, it would try to build # something like :param1 + <column_filters> as a new condition. where_filters += [column_filters] # Create a CTE that grabs the number of records contained in each time # bucket. Will only have rows for buckets with records. actuals = select([func.count(t.c.hash).label('count'), func.date_trunc(agg_unit, t.c.point_date). label('time_bucket')]) \ .where(sa.and_(*where_filters)) \ .group_by('time_bucket') # Also filter by geometry if requested if geom: contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom)) actuals = actuals.where(contains) # Need to alias to make it usable in a subexpression actuals = actuals.alias('actuals') # Outer join the default and observed values # to create the timeseries select statement. # If no observed value in a bucket, use the default. name = sa.literal_column("'{}'".format(self.dataset_name)) \ .label('dataset_name') bucket = defaults.c.time_bucket.label('time_bucket') count = func.coalesce(actuals.c.count, defaults.c.count).label('count') ts = select([name, bucket, count]). \ select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket)) return ts
def timeseries(self, agg_unit, start, end, geom=None, column_filters=None): # Reading this blog post # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html # inspired this implementation. t = self.point_table # Special case for the 'quarter' unit of aggregation. step = '3 months' if agg_unit == 'quarter' else '1 ' + agg_unit # Create a CTE to represent every time bucket in the timeseries # with a default count of 0 day_generator = func.generate_series(func.date_trunc(agg_unit, start), func.date_trunc(agg_unit, end), step) defaults = select([sa.literal_column("0").label('count'), day_generator.label('time_bucket')])\ .alias('defaults') where_filters = [t.c.point_date >= start, t.c.point_date <= end] if column_filters is not None: # Column filters has to be iterable here, because the '+' operator # behaves differently for SQLAlchemy conditions. Instead of # combining the conditions together, it would try to build # something like :param1 + <column_filters> as a new condition. where_filters += [column_filters] # Create a CTE that grabs the number of records contained in each time # bucket. Will only have rows for buckets with records. actuals = select([func.count(t.c.hash).label('count'), func.date_trunc(agg_unit, t.c.point_date). label('time_bucket')])\ .where(sa.and_(*where_filters))\ .group_by('time_bucket') # Also filter by geometry if requested if geom: contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom)) actuals = actuals.where(contains) # Need to alias to make it usable in a subexpression actuals = actuals.alias('actuals') # Outer join the default and observed values # to create the timeseries select statement. # If no observed value in a bucket, use the default. name = sa.literal_column("'{}'".format(self.dataset_name))\ .label('dataset_name') bucket = defaults.c.time_bucket.label('time_bucket') count = func.coalesce(actuals.c.count, defaults.c.count).label('count') ts = select([name, bucket, count]).\ select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket)) return ts
def timeseries(self, agg_unit, start, end, geom=None, column_filters=None): # Reading this blog post # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html # inspired this implementation. t = self.point_table if agg_unit == 'quarter': step = '3 months' else: step = '1 ' + agg_unit # Create a CTE to represent every time bucket in the timeseries # with a default count of 0 day_generator = func.generate_series(func.date_trunc(agg_unit, start), func.date_trunc(agg_unit, end), step) defaults = select([sa.literal_column("0").label('count'), day_generator.label('time_bucket')])\ .alias('defaults') # Create a CTE that grabs the number of records # contained in each time bucket. # Will only have rows for buckets with records. where_filters = [t.c.point_date >= start, t.c.point_date <= end] if column_filters: where_filters += column_filters actuals = select([func.count(t.c.hash).label('count'), func.date_trunc(agg_unit, t.c.point_date). label('time_bucket')])\ .where(sa.and_(*where_filters))\ .group_by('time_bucket') # Also filter by geometry if requested if geom: contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom)) actuals = actuals.where(contains) # Need to alias to make it usable in a subexpression actuals = actuals.alias('actuals') # Outer join the default and observed values # to create the timeseries select statement. # If no observed value in a bucket, use the default. name = sa.literal_column("'{}'".format(self.dataset_name))\ .label('dataset_name') bucket = defaults.c.time_bucket.label('time_bucket') count = func.coalesce(actuals.c.count, defaults.c.count).label('count') ts = select([name, bucket, count]).\ select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket)) return ts
def fetch_hourly(self, page, rows, sidx, sord='asc', _search='false', searchOper=None, searchField=None, searchString=None, **kw): ''' Function called on AJAX request made by FlexGrid Fetch data from DB, return the list of rows + total + current page ''' if not in_any_group('admin','STATS'): return dict(page=0, total=0, rows=[]) try: page = int(page) rows = int(rows) offset = (page-1) * rows except: page = 1 rows = 24 offset = 0 log.info('fetch_hourly : page=%d, rows=%d, offset=%d, sidx=%s, sord=%s' % ( page, rows, offset, sidx, sord)) # Initialize data, in case no data is available for that time slice data = [{'id': x, 'cell': ['%d h 00 < %d h 00' % (x, x+1), 0, None]} for x in range(24)] # Count calls by hour if db_engine=='oracle': req = func.to_char(CDR.calldate, 'HH24') else: # PostgreSql req = func.date_trunc('hour', cast(CDR.calldate, TIME)) cdrs = DBSession.query(req, func.count(req), func.sum(CDR.billsec)) if self.stats_type: # Monthly stats d = datetime.datetime.strptime(self.stats_type, '%m/%d/%Y') if db_engine=='oracle': cdrs = cdrs.filter(func.trunc(CDR.calldate, 'month') == \ func.trunc(d, 'month')) else: # PostgreSql cdrs = cdrs.filter(func.date_trunc('month', CDR.calldate) == \ func.date_trunc('month', d)) cdrs = cdrs.group_by(req) # cdrs = cdrs.order_by(func.sum(CDR.billsec)) for i, c in enumerate(cdrs): if db_engine=='oracle': j = int(c[0]) else: # PostgreSql j = c[0].seconds / 3600 data[j] = {'id': j, 'cell': ['%d h 00 < %d h 00' % (j,j+1), c[1], hms(c[2])]} return dict(page=page, total=24, rows=data[offset:offset+page*rows])
def find_appointment_availability(cls, office_id: int, timezone: str, first_date: datetime, last_date: datetime): """Find appointment availability for dates in a month""" query = db.session.query(Appointment).filter( func.date_trunc( 'day', func.timezone(timezone, Appointment.start_time)).between( func.date_trunc('day', func.timezone(timezone, first_date)), func.date_trunc('day', func.timezone(timezone, last_date)))) query = query.filter(Appointment.office_id == office_id) query = query.order_by(Appointment.start_time.asc()) return query.all()
def get(self, id): if id is None: stock = Stock.query.all() report = Stock.query.with_entities(func.date_trunc("day", Stock.exp_date).label("date"), func.count( Stock.id_).label("count")).group_by(func.date_trunc("day", Stock.exp_date)).all() return jsonify(data=stocks_schema.dump(stock), report=report) else: stock = Stock.query.filter(Stock.id_ == id).first() if not stock: return response('Not Found', f'Item with Item Code {id} is not available.', 404) return stock_schema.jsonify(stock)
def get(self, item_code): if item_code is None: items = Items.query.all() report = Items.query.with_entities(func.date_trunc("day", Items.created_at).label("date"), func.count( Items.item_code).label("count")).group_by(func.date_trunc("day", Items.created_at)).all() return jsonify(data=items_schema.dump(items), report=report) else: item = Items.query.filter(Items.item_code == item_code).first() if not item: return response('Not Found', f'Item with Item Code {item_code} is not available.', 404) return item_schema.jsonify(item)
def get_vote_activity(session): """Create a plot showing the inline usage statistics.""" creation_date = func.date_trunc("day", Vote.created_at).label("creation_date") votes = (session.query(creation_date, func.count(Vote.id).label("count")).group_by( creation_date).order_by(creation_date).all()) total_votes = [("Total votes", q[0], q[1]) for q in votes] # Grid style plt.style.use("seaborn-whitegrid") # Combine the results in a single dataframe and name the columns dataframe = pandas.DataFrame(total_votes, columns=["type", "date", "votes"]) months = mdates.MonthLocator() # every month months_fmt = mdates.DateFormatter("%Y-%m") max_value = max([vote[2] for vote in total_votes]) magnitude = get_magnitude(max_value) # Plot each result set fig, ax = plt.subplots(figsize=(30, 15), dpi=120) for key, group in dataframe.groupby(["type"]): ax = group.plot(ax=ax, kind="bar", x="date", y="votes", label=key) ax.xaxis.set_major_locator(months) ax.xaxis.set_major_formatter(months_fmt) ax.yaxis.set_ticks(np.arange(0, max_value, math.pow(10, magnitude - 1))) image = image_from_figure(fig) image.name = "vote_statistics.png" return image
def dataset(): raw_query_params = request.args.copy() agg = raw_query_params.get('agg') if not agg: agg = 'day' else: del raw_query_params['agg'] datatype = 'json' if raw_query_params.get('datatype'): datatype = raw_query_params['datatype'] del raw_query_params['datatype'] valid_query, query_clauses, resp, status_code = make_query(MasterTable,raw_query_params) if valid_query: time_agg = func.date_trunc(agg, MasterTable.c['obs_date']) base_query = session.query(time_agg, func.count(MasterTable.c['obs_date']), MasterTable.c['dataset_name']) base_query = base_query.filter(MasterTable.c['current_flag'] == True) for clause in query_clauses: base_query = base_query.filter(clause) base_query = base_query.group_by(MasterTable.c['dataset_name'])\ .group_by(time_agg)\ .order_by(time_agg) values = [o for o in base_query.all()] results = [] for value in values: d = { 'dataset_name': value[2], 'group': value[0], 'count': value[1], } results.append(d) results = sorted(results, key=itemgetter('dataset_name')) for k,g in groupby(results, key=itemgetter('dataset_name')): d = {'dataset_name': ' '.join(k.split('_')).title()} d['temporal_aggregate'] = agg d['items'] = list(g) resp['objects'].append(d) resp['meta']['status'] = 'ok' if datatype == 'json': resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': if not raw_query_params.get('dataset_name'): resp = { 'meta': { 'status': 'error', 'message': 'If you want data in a CSV format, you also need to specify a dataset_name' }, 'objects': [] } else: data = resp['objects'][0] fields = data['items'][0].keys() resp = make_response(make_csv(data['items'], fields), 200) resp.headers['Content-Type'] = 'text/csv' dname = raw_query_params['dataset_name'] filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s_%s.csv' % (dname, filedate) return resp
def history__facebook(): grain = _get_grain() # Date filter date_group = func.date_trunc(grain, SnapshotOfFacebook.timestamp) # Grouped query S = SnapshotOfFacebook q = Session.query()\ .add_column( date_group )\ .add_column( func.max(S.likes) )\ .group_by(date_group)\ .order_by(date_group.desc()) response = _prepare(q.count()) q = q.offset( response['offset'] )\ .limit( response['per_page'] ) # Inner function transforms SELECT tuple into recognizable format _dictize = lambda x: { 'timestamp':x[0].isoformat(), 'likes':x[1] } results = { 'history': [ _dictize(x) for x in q ], 'likes' : Session.query(S).order_by(S.timestamp.desc()).first().likes } # Write response response['grain'] = grain response['data'] = results return response
def visit_datetime_op(self, expr): class_name = type(expr).__name__ input = self._expr_to_sqlalchemy[expr._input] if class_name in DATE_PARTS_DIC: if self._sa_engine and self._sa_engine.name == 'mysql': if class_name == 'UnixTimestamp': fun = func.unix_timestamp else: fun = getattr(func, class_name.lower()) sa_expr = fun(input).cast(types.df_type_to_sqlalchemy_type(expr.dtype)) else: sa_expr = func.date_part(DATE_PARTS_DIC[class_name], input)\ .cast(types.df_type_to_sqlalchemy_type(expr.dtype)) elif isinstance(expr, Date): if self._sa_engine and self._sa_engine.name == 'mysql': sa_expr = func.date(input).cast(types.df_type_to_sqlalchemy_type(expr.dtype)) else: sa_expr = func.date_trunc('day', input) elif isinstance(expr, WeekDay): if self._sa_engine and self._sa_engine.name == 'mysql': sa_expr = (func.dayofweek(input).cast(types.df_type_to_sqlalchemy_type(expr.dtype)) + 5) % 7 else: sa_expr = (func.date_part('dow', input).cast(types.df_type_to_sqlalchemy_type(expr.dtype)) + 6) % 7 else: raise NotImplementedError self._add(expr, sa_expr)
def get_admin_monthly_overview() -> List: monthly_stats = { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0 } month = func.date_trunc('month', func.cast(PhishingEmail.created_at, Date)) # Returns a list of PE in all email addresses that was detected # in the current year mails_detected_yearly = db.session.query(PhishingEmail) \ .filter(PhishingEmail.receiver_id==EmailAddress.email_id \ , PhishingEmail.created_at_year == datetime.now().year) \ .order_by(month).all() for pe in mails_detected_yearly: monthly_stats[pe.get_created_month()] = monthly_stats\ .get(pe.get_created_month(), 0)+1 monthly_stats = list(monthly_stats.values()) return monthly_stats
def expenses(request): data = request.data_manager expenses = request.db.query(Expenses) expenses = request.db.query( Expenses.retro, func.date_trunc('month', Expenses.registry_date).label('registry_month'), func.sum( Expenses.cost).label('total')).group_by('registry_month').group_by( Expenses.retro) result = {} for item in [i._asdict() for i in expenses]: key = item['registry_month'].isoformat()[:7] result.setdefault(key, {"retro": 0, "new": 0, "total": 0}) if item['retro'] is True: result[key]['retro'] += item['total'] result[key]['total'] += item['total'] else: result[key]['new'] += item['total'] result[key]['total'] += item['total'] data['navbar_active'] = 'expenses' data['expenses'] = result return data
def find_already_inserted(conn, timestamps, station_data, variables, original_data, table='observation_quality', hourly=False): """ Get from the QA results table information about already checked observations. """ LOG.info( 'Getting information about observations that have been evaluated before...' ) s_id_ref = {s['id']: s['ref'] for s in station_data} for key in s_id_ref.keys(): oq = conn.get_table(table) q = select([oq.c.time, oq.c.variable]) q = q.where(oq.c.station_id == key) q = q.where(oq.c.time.between(min(timestamps), max(timestamps))) q = q.where(oq.c.variable.in_(variables)) if hourly: q = q.where(oq.c.time == func.date_trunc('hour', oq.c.time)) with conn.trans() as wht: result = wht.get_data(q) for row in result: val = original_data[row['time'].date()][row['time']][ row['variable']][key][0] original_data[row['time'].date()][row['time']][ row['variable']][key] = (val, True)
def gold_revenue_on(date): NON_REVENUE_STATUSES = ("declined", "chargeback", "fudge") query = (select([sa_sum(gold_table.c.pennies)]) .where(~ gold_table.c.status.in_(NON_REVENUE_STATUSES)) .where(func.date_trunc('day', gold_table.c.date) == date)) rows = ENGINE.execute(query) return rows.fetchone()[0] or 0
def get_schedule(self, date: datetime): """ Returns schedule for one day :param date: date to show schedule :return: list of dictionary {'datetime': XXX, 'title': 'Movie title', 'movie_id': 'ID'} """ res = list() sess = self._session q = sess.\ query(ShowTime, Movie).\ filter(func.date_trunc('day', ShowTime.date) == date.date()).\ join(Movie, Movie.id == ShowTime.movie_id).\ order_by(ShowTime.date).all() for el in q: el = el._asdict() res.append({ 'id': el.get('ShowTime').id, 'movie_id': el.get('ShowTime').movie_id, 'title': el.get('Movie').name, 'datetime': el.get('ShowTime').date, 'genre': el.get('Movie').genre }) return res
def date_trunc_hour(*args, **kwargs): # sqlite doesn't support date_trunc if c.SQLALCHEMY_URL.startswith('sqlite'): return func.strftime(literal('%Y-%m-%d %H:00'), *args, **kwargs) else: return func.date_trunc(literal('hour'), *args, **kwargs)
def main(): """Load figure objects into database.""" with app.app_context(): root = os.path.join(app.config['TELEMETRY_ROOTDIRECTORY'], "ShaneAO") for filepath in glob.iglob(os.path.join(root, "*", "figures", "*", "*", "*.png")): # First, is this already in the database? c = app.session.query(Figure).filter(Figure.filepath==filepath).count() if c == 1: continue elif c > 1: # Purge them all, if we find more than one. app.session.query(Figure).filter(Figure.filepath==filepath).delete() # Set the dataset parts parts = filepath.split(os.path.sep) created = datetime.datetime.strptime(parts[-5], "%Y-%m-%d").date() sequence = int(parts[-3][1:]) telpath = parts[-2].replace(".","/") query = app.session.query(Dataset).filter(func.date_trunc("day",Dataset.created) == created) query = query.filter(Dataset.sequence == sequence) dataset = query.one_or_none() if dataset is None: click.echo("Dataset missing for '{0}'".format(filepath)) continue telemetry = dataset.telemetry[telpath] fig = Figure(filepath=filepath, telemetry=telemetry, figure_type=parts[-1].split(".")[0]) app.session.add(fig) click.echo("Added '{0}'".format(filepath)) app.session.commit()
def _generate_aggregate_selects(table, target_columns, agg_fn, agg_unit): """Return the select statements used to generate a time bucket and apply aggregation to each target column. :param table: (SQLAlchemy) reflected table object :param target_columns: (list) contains strings :param agg_fn: (function) compiles to a prepared statement :param agg_unit: (str) used by date_trunc to generate time buckets :returns: (list) containing SQLAlchemy prepared statements """ selects = [ func.date_trunc(agg_unit, table.c.datetime).label('time_bucket') ] meta_columns = ('node_id', 'datetime', 'meta_id', 'sensor') for col in table.c: if col.name in meta_columns: continue if col.name not in target_columns: continue if str(col.type).split('(')[0] != 'DOUBLE PRECISION': continue selects.append(agg_fn(col).label(col.name)) selects.append(func.count(col).label(col.name + '_count')) return selects
def get_vote_activity(session): """Create a plot showing the inline usage statistics.""" creation_date = func.date_trunc('day', Vote.created_at).label('creation_date') votes = session.query(creation_date, func.count(Vote.id).label('count')) \ .group_by(creation_date) \ .order_by(creation_date) \ .all() total_votes = [('Total votes', q[0], q[1]) for q in votes] # Grid style plt.style.use('seaborn-whitegrid') # Combine the results in a single dataframe and name the columns dataframe = pandas.DataFrame(total_votes, columns=['type', 'date', 'votes']) months = mdates.MonthLocator() # every month months_fmt = mdates.DateFormatter('%Y-%m') max_number = max([vote[2] for vote in total_votes]) # Plot each result set fig, ax = plt.subplots(figsize=(30, 15), dpi=120) for key, group in dataframe.groupby(['type']): ax = group.plot(ax=ax, kind='bar', x='date', y='votes', label=key) ax.xaxis.set_major_locator(months) ax.xaxis.set_major_formatter(months_fmt) ax.yaxis.set_ticks(np.arange(0, max_number, 100)) image = image_from_figure(fig) image.name = 'vote_statistics.png' return image
def find_next_day_appointments(cls): """Find next day appointments.""" from app.models.theq import Office, PublicUser, Citizen, Timezone tomorrow = datetime.now() + timedelta(days=1) tomorrow = tomorrow.astimezone(tz.tzlocal()) query = db.session.query(Appointment, Office, Timezone, PublicUser). \ join(Citizen, Citizen.citizen_id == Appointment.citizen_id). \ join(Office, Office.office_id == Appointment.office_id). \ join(Timezone, Timezone.timezone_id == Office.timezone_id). \ outerjoin(PublicUser, PublicUser.user_id == Citizen.user_id). \ filter(func.date_trunc('day', func.timezone(Timezone.timezone_name,Appointment.start_time)) == func.date_trunc('day', tomorrow)) return query.all()
def gold_revenue_on(date): NON_REVENUE_STATUSES = ("declined", "chargeback", "fudge") query = (select([ sa_sum(gold_table.c.pennies) ]).where(~gold_table.c.status.in_(NON_REVENUE_STATUSES)).where( func.date_trunc('day', gold_table.c.date) == date)) rows = ENGINE.execute(query) return rows.fetchone()[0] or 0
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table('severities_histogram', sa.Column('id', sa.Integer(), nullable=False), sa.Column('workspace_id', sa.Integer(), nullable=False), sa.Column('date', sa.Date(), nullable=False), sa.Column('medium', sa.Integer(), nullable=False), sa.Column('high', sa.Integer(), nullable=False), sa.Column('critical', sa.Integer(), nullable=False), sa.Column('confirmed', sa.Integer(), nullable=False), sa.ForeignKeyConstraint(['workspace_id'], ['workspace.id'], ), sa.PrimaryKeyConstraint('id') ) op.create_index(op.f('ix_severities_histogram_workspace_id'), 'severities_histogram', ['workspace_id'], unique=False) # ### end Alembic commands ### # Init histogram bind = op.get_bind() session = sa.orm.Session(bind=bind) workspaces = session.query(Workspace).all() for workspace in workspaces: vulnerabilities = session.query(VulnerabilityGeneric) \ .with_entities(func.date_trunc('day', VulnerabilityGeneric.create_date), VulnerabilityGeneric.severity, func.count(VulnerabilityGeneric.severity), func.sum(case([(VulnerabilityGeneric.confirmed, 1)], else_=0)))\ .filter(VulnerabilityGeneric.workspace_id == workspace.id, VulnerabilityGeneric.status.notin_(['closed', 'risk-accepted']), VulnerabilityGeneric.severity.in_(['medium', 'high', 'critical']))\ .group_by(func.date_trunc('day', VulnerabilityGeneric.create_date), VulnerabilityGeneric.severity).all() for histogram_date, severity_type, severity_count, confirmed_count in vulnerabilities: severity_histogram = session.query(SeveritiesHistogram)\ .filter(SeveritiesHistogram.date == histogram_date, SeveritiesHistogram.workspace_id == workspace.id).first() if severity_histogram is None: severity_histogram = SeveritiesHistogram(date=histogram_date, workspace=workspace, medium=0, high=0, critical=0, confirmed=0) session.add(severity_histogram) session.commit() if severity_type == 'medium': severity_histogram.medium = severity_count if severity_type == 'high': severity_histogram.high = severity_count if severity_type == 'critical': severity_histogram.critical = severity_count severity_histogram.confirmed += confirmed_count session.commit()
def generateMonthlyExpense(): result = session.query( func.sum(Expense.pre_tax).label('pre_tax'), func.sum(Expense.tax_amount).label('tax'), func.date_trunc('month', Expense.date).label('month'))\ .group_by(func.date_trunc('month', Expense.date))\ .order_by('month') monthlyExpenses = [] company.monthlyExpenses = [] for row in result.all(): company.monthlyExpenses.append( MonthlyExpense(formatMonth(row.month), row.pre_tax, row.tax)) monthlyExpenses.append( MonthlyExpense(formatMonth(row.month), row.pre_tax, row.tax)) return monthlyExpenses
def build_query_to_report(self, query, aggregate_table, params): assert params in self._known_units res = params truncated_time = func.date_trunc(res, aggregate_table.c.time_step) return (query .column(label("time_slice", func.extract("epoch", truncated_time))) .group_by(truncated_time))
def api_data_dates(): dates = ( db.session.query(func.date_trunc('day', AccessLog.created_at).label('date'), count(AccessLog.id).label('accesses')) .filter(AccessLog.user == current_user) .group_by('date').order_by('date').all() ) return flask.jsonify(dates=[(date.isoformat(), cnt) for date, cnt in dates])
def day(self): """Get field truncated to the day""" return NativeField( '{}_day'.format(self.name), 'Day of {}'.format(self.description), self.alchemy_column, alchemy_expression=cast(func.date_trunc('day', self.alchemy_expression), postgres.TIMESTAMP) )
def timeline(self): dbFacade = self.dbFacade() model = dbFacade.model conditions = self._get_base_conditions(use_resolution=True) if conditions is None: return "<graph></graph>" resolution = request.params.get('resolution', 'days') time_expression = { 'weeks': cast(func.date_trunc('week', model.BalanceChange.transaction_date), DATE), 'months': cast(func.date_trunc('month', model.BalanceChange.transaction_date), DATE) }.get(resolution, model.BalanceChange.transaction_date) timeline = dbFacade.db.execute(select([time_expression.label('time'), func.abs(func.coalesce(func.sum(model.BalanceChange.amount))).label('sum')], and_(*conditions), from_obj=[model.balance_changes_table], group_by=['time'])).fetchall() time2sums = dict([(row.time, row.sum) for row in timeline]) c.sets = [] if len(time2sums) > 0: (start_date, end_date) = h.get_dates() if resolution == 'months': for date in months_range(start_date, end_date): show = 1 sum = time2sums.get(date, 0) c.sets.append({ 'name': self._timeline_name(date), 'value': sum, 'showName': show}) elif resolution == 'weeks': for date in weeks_range(start_date, end_date): show = 1 sum = time2sums.get(date, 0) c.sets.append({ 'name': self._timeline_name(date), 'value': sum, 'showName': show}) else: for date in days_range(start_date, end_date): show = date.weekday() == 0 and 1 or 0 sum = time2sums.get(date, 0) c.sets.append({ 'name': self._timeline_name(date), 'value': sum, 'showName': show}) response.headers['Content-Type'] = 'text/xml; charset=utf-8' return render_jinja2('reports/timeline-xml.jinja')
def history__github(): grain = _get_grain() # Filtered list of github IDs repo = request.args.get('repo', None) repoFilter = None if repo is not None: repo = repo.split(',') repoFilter = SnapshotOfGithub.repo_name.in_(repo) # Date filter date_group = func.date_trunc(grain, SnapshotOfGithub.timestamp) # Query: Range of dates q1 = Session.query()\ .add_column( func.distinct(date_group).label('d') )\ .order_by(date_group.desc()) response = _prepare(q1.count()) q1 = q1.offset( response['offset'] )\ .limit( response['per_page'] ) if q1.count(): date_column = q1.subquery().columns.d (min_date,max_date) = Session.query(func.min(date_column), func.max(date_column)).first() else: # Impossible date range (min_date,max_date) = datetime.now()+timedelta(days=1),datetime.now() # Grouped query S = SnapshotOfGithub q = Session.query()\ .add_column( func.sum(S.watchers) )\ .add_column( func.max(S.forks) )\ .add_column( func.max(S.open_issues) )\ .add_column( func.max(S.size) )\ .add_column( date_group )\ .add_column( S.repo_name )\ .group_by(date_group)\ .group_by(S.repo_name)\ .order_by(date_group.desc())\ .filter( date_group>=min_date )\ .filter( date_group<=max_date )\ .filter( repoFilter ) results = {} _dictize = lambda x: { 'watchers':x[0], 'forks':x[1], 'issues':x[2], 'size':x[3], 'timestamp':x[4].date().isoformat(), } for x in q: repo_name = x[5] results[repo_name] = results.get(repo_name, { 'repo':repo_name, 'data':[] }) results[repo_name]['data'].append( _dictize(x) ) # Inner function transforms SELECT tuple into recognizable format response['grain'] = grain response['data'] = results response['repos'] = repo response['min_date'] = min_date.date().isoformat() response['max_date'] = max_date.date().isoformat() return response
def resources(self): # Get the oldest tracking date oldest_created_date = model.Session.query( Resource.created, ).order_by(Resource.created).limit(1).scalar() # If oldest date is none (no stats yet) we don't want to continue if oldest_created_date: # Calc difference between dates delta = datetime.now() - oldest_created_date # If we have data for more than 31 days, we'll show by month; otherwise segment by da if delta.days > 10: c.date_interval = 'month' label_formatter = '%b %Y' else: c.date_interval = 'day' label_formatter = '%d/%m/%y' date_func = func.date_trunc(c.date_interval, Resource.created) q = model.Session.query( date_func.label('date'), func.count().label('count') ) q = q.order_by(date_func) q = q.group_by(date_func) c.graph_options = { 'series': { 'lines': {'show': True}, 'points': {'show': True} }, 'xaxis': { 'mode': 'time', 'ticks': [] }, 'yaxis': { 'tickDecimals': 0 } } c.graph_data = [] total = 0 for i, stat in enumerate(q.all()): total += stat.count c.graph_data.append([i, total]) formatted_date = stat.date.strftime(label_formatter) c.graph_options['xaxis']['ticks'].append([i, formatted_date]) return p.toolkit.render('stats/resources.html', {'title': 'Resource statistics'})
def hours_with_calls(session, start, end): start = start.strftime(_STR_TIME_FMT) end = end.strftime(_STR_TIME_FMT) hours = (session .query(distinct(func.date_trunc('hour', cast(QueueLog.time, TIMESTAMP))).label('time')) .filter(between(QueueLog.time, start, end))) for hour in hours.all(): yield hour.time
def history__mailman(): grain = _get_grain() # Filtered list of mailman IDs lists = request.args.get('list') listFilter = None if lists is not None: lists = lists.split(',') listFilter = SnapshotOfMailman.list_name.in_(lists) # Date filter date_group = func.date_trunc(grain, SnapshotOfMailman.timestamp) # Query: Range of dates q1 = Session.query()\ .add_column( func.distinct(date_group).label('d') )\ .order_by(date_group.desc()) response = _prepare(q1.count()) q1 = q1.offset( response['offset'] )\ .limit( response['per_page'] ) if q1.count(): subquery = q1.subquery() (min_date,max_date) = Session.query(func.min(subquery.columns.d), func.max(subquery.columns.d)).first() else: # Impossible date range (min_date,max_date) = datetime.now()+timedelta(days=1),datetime.now() # Grouped query S = SnapshotOfMailman q = Session.query()\ .add_column( func.sum(S.posts_today) )\ .add_column( func.max(S.subscribers) )\ .add_column( date_group )\ .add_column( S.list_name )\ .group_by(date_group)\ .group_by(S.list_name)\ .order_by(date_group.desc())\ .filter( date_group>=min_date )\ .filter( date_group<=max_date )\ .filter( listFilter ) results = {} # Inner function transforms SELECT tuple into recognizable format _dictize = lambda x: { 'posts':x[0], 'subscribers':x[1], 'timestamp':x[2].isoformat(), } # Build output datastructure from rows for x in q: list_name = x[3] results[list_name] = results.get(list_name, { 'list_name':list_name, 'data':[] }) results[list_name]['data'].append( _dictize(x) ) # Write response response['grain'] = grain response['data'] = results response['list'] = lists response['min_date'] = min_date.isoformat() response['max_date'] = max_date.isoformat() return response
def get_historical_metrics(): metrics = {} metrics["briefs_total_count"] = [] brief_day = func.date_trunc('day', Brief.published_at) briefs_by_day = select([brief_day, func.count(brief_day)])\ .where(Brief.withdrawn_at.is_(None))\ .where(Brief.published_at.isnot(None))\ .order_by(brief_day)\ .group_by(brief_day) for (day, count) in db.session.execute(briefs_by_day): metrics["briefs_total_count"].append({"value": count, "ts": pendulum.instance(day).to_iso8601_string()}) metrics["brief_response_count"] = [] brief_responses_day = func.date_trunc('day', BriefResponse.created_at) brief_responses_by_day = select([brief_responses_day, func.count(brief_responses_day)]) \ .order_by(brief_responses_day) \ .group_by(brief_responses_day) for (day, count) in db.session.execute(brief_responses_by_day): metrics["brief_response_count"].append({"value": count, "ts": pendulum.instance(day).to_iso8601_string()}) metrics["buyer_count"] = [] buyer_day = func.date_trunc('day', User.created_at) buyers_by_day = select([buyer_day, func.count(buyer_day)])\ .where(User.email_address.contains("+").is_(False) | User.email_address.contains("digital.gov.au").is_(False))\ .where(User.active.is_(True)) \ .where(User.role == 'buyer') \ .order_by(buyer_day)\ .group_by(buyer_day) for (day, count) in db.session.execute(buyers_by_day): metrics["buyer_count"].append({"value": count, "ts": pendulum.instance(day).to_iso8601_string()}) metrics["supplier_count"] = [] supplier_day = func.date_trunc('day', Supplier.creation_time) suppliers_by_day = select([supplier_day, func.count(supplier_day)]) \ .where(Supplier.abn != Supplier.DUMMY_ABN) \ .order_by(supplier_day) \ .group_by(supplier_day) for (day, count) in db.session.execute(suppliers_by_day): metrics["supplier_count"].append({"value": count, "ts": pendulum.instance(day).to_iso8601_string()}) return jsonify(metrics)
def authored_month_counts_q(session): s = session # Careful with the datetime-truncation here - ensure we're working in UTC # before we bin by month! month_counts_qry = s.query( func.date_trunc('month', func.timezone('UTC',Voevent.author_datetime) ).distinct().label('month_id'), (func.count(Voevent.ivorn)).label('month_count'), ).select_from(Voevent).group_by('month_id') return month_counts_qry
def agg_date(crime, year): data = Crimes.query.with_entities( func.date_trunc('month', Crimes.datetime).label('month'), func.count(Crimes.cat) ).filter(Crimes.cat == crime ).filter(extract('year', Crimes.datetime) == year ).group_by('month' ).order_by('month' ).all() return jsonify({ 'crime': crime, 'aggregates': [ {'date': date, 'occurrences': occurrences} for date, occurrences in data ] })
def dao_fetch_weekly_historical_stats_for_service(service_id): monday_of_notification_week = func.date_trunc('week', NotificationHistory.created_at).label('week_start') return db.session.query( NotificationHistory.notification_type, NotificationHistory.status, monday_of_notification_week, func.count(NotificationHistory.id).label('count') ).filter( NotificationHistory.service_id == service_id ).group_by( NotificationHistory.notification_type, NotificationHistory.status, monday_of_notification_week ).order_by( asc(monday_of_notification_week), NotificationHistory.status ).all()
def history__analytics(): grain = _get_grain() websites = request.args.get('website',None) # Filter by account name websitefilter = None if websites is not None: websites = websites.split(',') websitefilter = SnapshotOfAnalytics.website.in_(websites) # Query: Range of dates date_group = func.date_trunc(grain, SnapshotOfAnalytics.timestamp) q1 = Session.query()\ .add_column( func.distinct(date_group).label('d') )\ .order_by(date_group.desc()) response = _prepare(q1.count()) q1 = q1.offset( response['offset'] )\ .limit( response['per_page'] ) if q1.count(): date_column = q1.subquery().columns.d (min_date,max_date) = Session.query(func.min(date_column), func.max(date_column)).first() else: # Impossible date range (min_date,max_date) = datetime.now()+timedelta(days=1),datetime.now() # Grouped query S = SnapshotOfAnalytics q = Session.query()\ .add_column( func.sum(S.hits) )\ .add_column( date_group )\ .add_column( S.website )\ .group_by(date_group)\ .group_by(S.website)\ .order_by(date_group.desc())\ .filter( date_group>=min_date )\ .filter( date_group<=max_date )\ .filter( websitefilter ) results = {} _dictize = lambda x: { 'hits':x[0], 'timestamp':x[1].date().isoformat(), } for x in q: website = x[2] x = _dictize(x) results[website] = results.get(website, { 'website':website,'data':[] }) results[website]['data'].append(x) response['data'] = results response['grain'] = grain return response
def by_complete_period(self, period, date_from, date_to): """ :param period: period unit (eg StatsQuestioner.PERIOD_MONTH) :param date_from: start date of concerned jobs :param date_to: end date of concerned jobs :return: sqlalchemy Query :rtype: sqlalchemy.orm.Query """ # TODO - B.S. - 20160204: date_trunc only compatible with postgresql date_trunc_func = func.date_trunc(period, Job.publication_datetime) return self._session.query(Job.source, func.count(Job.id), date_trunc_func) \ .filter(Job.publication_datetime >= date_from) \ .filter(Job.publication_datetime <= date_to) \ .group_by(Job.source) \ .group_by(date_trunc_func) \ .order_by(date_trunc_func)
def make_date_columns(date_column, start_date, end_date, delta, unit): ''' Produce a list of query columns suitable for a time series query. If you want to query by a series of time spans (e.g. how many records for each of the last 12 months), that isn't straightforward in SQL. You can use COUNT(*) and group by truncated dates, but any month where the count is zero will be omitted from the results. The solution is add a column for each timespan of interest; this ensures no timespans are omitted, but it also makes the query quite complicated. This function [hopefully] simplifies the task of building that query. ``date_column`` is the SQL Alchemy date column to be counted. ``start_date`` is the date at which to start making columns. IMPORTANT: this date needs to be truncated to the same precision as ``unit``: e.g. if ``unit`` is ``month``, then ``start_date`` should have days set to 1, and hours, minutes, and seconds set to 0. If this date is not truncated correctly, you'll probably get zeroes in all of your columns! ``end_date`` is the date at which to stop: the last column will include this date. ``delta`` is a ``relativedelta`` object which defines the timespan covered by each column. ``unit`` is any Postgres date_trunc unit, e.g. ``week``, ``month``, ``year``, etc. See: http://www.postgresql.org/docs/9.1/static/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC ''' columns = list() current_date = start_date while current_date <= end_date: # This crazy thing adds one column for each month of data that we # want to sum up. columns.append( func.sum(case(value=func.date_trunc('month', date_column), whens={current_date.isoformat(): 1}, else_=0)), ) current_date += delta return columns
def get(self, challenge_slug): start = None end = None from dateutil import parser as dateparser from datetime import datetime parser = reqparse.RequestParser() parser.add_argument('start', type=str, help='start datetime yyyymmddhhmm') parser.add_argument('end', type=str, help='end datetime yyyymmddhhmm') args = parser.parse_args() query = db.session.query( func.date_trunc('day', Action.timestamp).label('day'), Action.status, func.count(Action.id)).join(Task).filter_by( challenge_slug=challenge_slug).group_by( 'day', Action.status) # time slicing filters if args['start'] is not None: start = dateparser.parse(args['start']) if args['end'] is None: end = datetime.utcnow() else: end = dateparser.parse(args['end']) query = query.filter( Action.timestamp.between(start, end)) return as_stats_dict( query.all(), order=[1, 0, 2], start=start, end=end)
def detail_aggregate(): raw_query_params = request.args.copy() agg, datatype, queries = parse_join_query(raw_query_params) valid_query, base_clauses, resp, status_code = make_query(MasterTable, queries['base']) if valid_query: resp['meta']['status'] = 'ok' time_agg = func.date_trunc(agg, MasterTable.c['obs_date']) base_query = session.query(time_agg, func.count(MasterTable.c.dataset_row_id)) dname = raw_query_params['dataset_name'] dataset = Table('dat_%s' % dname, Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, detail_clauses, resp, status_code = make_query(dataset, queries['detail']) if valid_query: resp['meta']['status'] = 'ok' pk = [p.name for p in dataset.primary_key][0] base_query = base_query.join(dataset, MasterTable.c.dataset_row_id == dataset.c[pk]) for clause in base_clauses: base_query = base_query.filter(clause) for clause in detail_clauses: base_query = base_query.filter(clause) values = [r for r in base_query.group_by(time_agg).order_by(time_agg).all()] items = [] for value in values: d = { 'group': value[0], 'count': value[1] } items.append(d) resp['objects'].append({ 'temporal_aggregate': agg, 'dataset_name': ' '.join(dname.split('_')).title(), 'items': items }) resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def _generate_aggregate_selects(table, target_columns, agg_fn, agg_unit): """Return the select statements used to generate a time bucket and apply aggregation to each target column. :param table: (SQLAlchemy) reflected table object :param target_columns: (list) contains strings :param agg_fn: (function) compiles to a prepared statement :param agg_unit: (str) used by date_trunc to generate time buckets :returns: (list) containing SQLAlchemy prepared statements """ selects = [func.date_trunc(agg_unit, table.c.datetime).label('time_bucket')] meta_columns = ('node_id', 'datetime', 'meta_id', 'sensor') for col in table.c: if col.name in meta_columns: continue if col.name not in target_columns: continue if str(col.type).split('(')[0] != 'DOUBLE PRECISION': continue selects.append(agg_fn(col).label(col.name)) selects.append(func.count(col).label(col.name + '_count')) return selects
def account_balance_summary(account_type=None, account_id=None): """ get a summary of all account balances :param account_type: :return: """ sql = SQL() account_balances = sql.db_session.query( func.date_trunc('month', sql.transaction.c.transaction_date).\ label('date'), func.sum(sql.transaction_line.c.amount).\ label('balance')).\ join(sql.transaction_line, sql.transaction_line.c.transaction_id == sql.transaction.c.transaction_id). \ join(sql.account, sql.account.c.account_id == sql.transaction.c.account_id). \ join(sql.account_type, sql.account_type.c.account_type_id == sql.account.c.account_type_id). \ group_by('date').\ order_by(asc('date')) if account_id: account_balances = account_balances.filter( sql.account.c.account_id == account_id) elif account_type == 'debt': account_balances = account_balances.filter( or_(sql.account_type.c.account_type=='Loan', sql.account_type.c.account_type=='Store & Credit Card')) #loop through all balances and fill in any gaps in the date #with the balance for the previous month return account_balances
def dataset_metrics(self, id): data_dict = {'id': id} # check if package exists try: c.pkg_dict = get_action('package_show')(self.context, data_dict) c.pkg = self.context['package'] except NotFound: abort(404, _('Dataset not found')) except NotAuthorized: abort(401, _('Unauthorized to read package %s') % id) # If this is a new dataset, and we only have recent tracking metrics # We want to show stats per day, rather than per month # Get the oldest tracking date oldest_date = model.Session.query( TrackingSummary.tracking_date, ).filter(TrackingSummary.package_id == c.pkg_dict['id']).order_by(TrackingSummary.tracking_date).limit(1).scalar() # If oldest date is none (no stats yet) we don't want to continue if oldest_date: # Calc difference between dates delta = date.today() - oldest_date # If we have data for more than 31 days, we'll show by month; otherwise segment by da if delta.days > 10: c.date_interval = 'month' label_formatter = '%b %Y' rrule_interval = rrule.MONTHLY else: c.date_interval = 'day' label_formatter = '%d/%m/%y' rrule_interval = rrule.DAILY date_func = func.date_trunc(c.date_interval, TrackingSummary.tracking_date) q = model.Session.query( date_func.label('date'), func.sum(TrackingSummary.count).label('sum') ) q = q.filter(and_(TrackingSummary.package_id == c.pkg_dict['id'])) q = q.order_by(date_func) q = q.group_by(date_func) tracking_stats = {} # Create a dictionary of tracking stat results for stat in q.all(): # Keyed by formatted date formatted_date = stat.date.strftime(label_formatter) tracking_stats[formatted_date] = int(stat.sum) # https://github.com/joetsoi/flot-barnumbers c.pageviews = [] c.pageviews_options = { 'grid': { 'borderWidth': {'top': 0, 'right': 0, 'bottom': 1, 'left': 1}, 'borderColor': "#D4D4D4" }, 'xaxis': { 'ticks': [], 'tickLength': 0 }, 'yaxis': { 'tickLength': 0 }, 'bars': { 'show': 1, 'align': "center", 'zero': 1, 'lineWidth': 0.7, 'barWidth': 0.9, 'showNumbers': 1, 'numbers': { 'xAlign': 1, 'yAlign': 1, 'top': -15 # BS: Added this. Need to patch flot.barnumbers properly } } } for i, dt in enumerate(rrule.rrule(rrule_interval, dtstart=oldest_date, until=date.today())): formatted_date = dt.strftime(label_formatter) # Do we have a value from the tracking stats? try: count = tracking_stats[formatted_date] except KeyError: # No value - count is zero count = 0 # Add data c.pageviews.append([i, count]) # Add date label to ticks c.pageviews_options['xaxis']['ticks'].append([i, formatted_date]) # Try and get resource download metrics - these are per resource # So need to loop through all resources, looking up download stats # Post to /dataset with secret and resource_id, and receive back: # { # "status": "success", # "totals": { # "..the resource id you specified...": { # "emails": 2, # "errors": 0, # "requests": 2 # } # } # } c.resource_downloads = [] c.total_downloads = 0 endpoint = os.path.join(config.get("ckanpackager.url"), 'statistics') # FIXME: This does not work!! for resource in c.pkg_dict['resources']: params = { 'secret': config.get("ckanpackager.secret"), 'resource_id': resource['id'] } try: r = requests.post(endpoint, params) result = r.json() except ValueError: # includes simplejson.decoder.JSONDecodeError # Unable to retrieve download stats for this resource log.critical('ERROR %s: Unable to retrieve download stats for resource %s', r.status_code, resource['id']) except ConnectionError, e: log.critical(e) else: try: total = int(result['totals'][resource['id']]['emails']) except KeyError: # We do not have stats for this resource pass else: c.resource_downloads.append( { 'name': resource['name'], 'id': resource['id'], 'total': total } ) c.total_downloads += total