Example #1
0
    def test_generate_series_scalar(self, connection):
        x = func.generate_series(1, 2).alias("x")
        y = func.generate_series(1, 2).alias("y")

        stmt = select(x.column, y.column).join_from(x, y, true())

        eq_(connection.execute(stmt).all(), [(1, 1), (1, 2), (2, 1), (2, 2)])
Example #2
0
    def count_datasets_through_time_query(self, start, end, period, time_field,
                                          expressions):
        raw_expressions = self._alchemify_expressions(expressions)

        start_times = select(
            (func.generate_series(start, end, cast(
                period, INTERVAL)).label('start_time'), )).alias('start_times')

        time_range_select = (select((func.tstzrange(
            start_times.c.start_time,
            func.lead(start_times.c.start_time).over()).label('time_period'),
                                     ))).alias('all_time_ranges')

        # Exclude the trailing (end time to infinite) row. Is there a simpler way?
        time_ranges = (select(
            (time_range_select,
             )).where(~func.upper_inf(time_range_select.c.time_period))
                       ).alias('time_ranges')

        count_query = (select((func.count('*'), )).select_from(
            self._from_expression(DATASET, expressions)).where(
                and_(
                    time_field.alchemy_expression.overlaps(
                        time_ranges.c.time_period), DATASET.c.archived == None,
                    *raw_expressions)))

        return select(
            (time_ranges.c.time_period, count_query.label('dataset_count')))
Example #3
0
    def test_with_ordinality_star(self, connection):

        stmt = select("*").select_from(
            func.generate_series(
                4, 1, -1).table_valued(with_ordinality="ordinality"))

        eq_(connection.execute(stmt).all(), [(4, 1), (3, 2), (2, 3), (1, 4)])
Example #4
0
def traffic_history_query():
    events = (select(func.sum(TrafficVolume.amount).label('amount'),
                     literal_column('day'),
                     cast(TrafficVolume.type, TEXT).label('type')
                     )
              .select_from(
                    func.generate_series(
                        func.date_trunc('day', literal_column('arg_start')),
                        func.date_trunc('day', literal_column('arg_end')),
                        '1 day'
                    ).alias('day')
                    .outerjoin(TrafficVolume.__table__, and_(
                        func.date_trunc('day', TrafficVolume.timestamp) == literal_column('day'),
                        TrafficVolume.user_id == literal_column('arg_user_id'))
                    )
              )
              .group_by(literal_column('day'), literal_column('type'))
              ).cte()

    events_ingress = select(events).where(or_(events.c.type == 'Ingress', events.c.type == None)).cte()
    events_egress = select(events).where(or_(events.c.type == 'Egress', events.c.type == None)).cte()

    hist = (select(func.coalesce(events_ingress.c.day, events_egress.c.day).label('timestamp'),
                   events_ingress.c.amount.label('ingress'),
                   events_egress.c.amount.label('egress'))
            .select_from(events_ingress.join(events_egress,
                                             events_ingress.c.day == events_egress.c.day,
                                             full=true))
            .order_by(literal_column('timestamp'))
            )

    return hist
Example #5
0
    def get(self):
        now = pendulum.now()
        series = self.query(
            cast(
                func.generate_series(
                    cast(now.subtract(weeks=1), type_=DATE),
                    cast(now, type_=DATE),
                    cast("1 day", type_=Interval),
                ),
                type_=DATE,
            ).label("timestep")).subquery()

        stats = (self.query(series.c.timestep, func.count(
            Scrap.id), func.sum(Scrap.links_count)).outerjoin((
                Scrap,
                cast(Scrap.date, type_=DATE) == cast(series.c.timestep,
                                                     type_=DATE),
            )).group_by(series.c.timestep).order_by(series.c.timestep).all())

        return {
            str(date): {
                "scraps": int(scraps),
                "items": int(items)
            }
            for (date, scraps, items) in stats
        }
Example #6
0
    def test_with_ordinality_named(self, connection):

        stmt = select(
            func.generate_series(4, 1, -1).table_valued(
                "gs", with_ordinality="ordinality").render_derived())

        eq_(connection.execute(stmt).all(), [(4, 1), (3, 2), (2, 3), (1, 4)])
Example #7
0
def check_integrity(max_entry):
    series = select([func.generate_series(1, max_entry).label('seq_no')
                     ]).alias('series')
    res = session.query(series.c.seq_no).join(
        LocalLandChargeHistory,
        and_(series.c.seq_no == LocalLandChargeHistory.entry_number),
        isouter=True).filter(
            LocalLandChargeHistory.entry_number == None).all()
    return [row[0] for row in res]
Example #8
0
def compute_totals(summary_form):
    component_ids = component_names_to_ids(summary_form.component_names.data)
    from_date, to_date = summary_form.daterange.data
    resolution = summary_form.resolution.data
    table, date_column = get_history_target(summary_form.resolution.data)

    # Generate sequence of days/weeks/months in the specified range.
    from_date, to_date, delta = interval_delta(from_date, to_date, resolution)
    dates = (db.session.query(
        func.generate_series(from_date, to_date,
                             delta).label('date')).subquery())

    if summary_form.opsysreleases.data:
        # Query only requested opsys releases.
        releases = (db.session.query(OpSysRelease).filter(
            OpSysRelease.id.in_([
                osr.id for osr in summary_form.opsysreleases.data
            ])).subquery())
    else:
        # Query all active opsys releases.
        releases = (db.session.query(OpSysRelease).filter(
            OpSysRelease.status != 'EOL').subquery())

    # Sum daily counts for each date in the range and each opsys release.
    history = (db.session.query(
        date_column.label('date'),
        func.sum(table.count).label('count'),
        table.opsysrelease_id).filter(from_date <= date_column).filter(
            date_column <= to_date).group_by(table.opsysrelease_id,
                                             date_column))

    if component_ids:
        history = history.join(Report).filter(
            Report.component_id.in_(component_ids))

    history = history.subquery()

    q = (db.session.query(
        dates.c.date,
        func.coalesce(history.c.count, 0).label('count'), OpSys.name,
        releases.c.version).outerjoin(
            releases, dates.c.date == dates.c.date).outerjoin(
                history, (history.c.date == dates.c.date) &
                (history.c.opsysrelease_id == releases.c.id)).join(
                    OpSys, OpSys.id == releases.c.opsys_id).order_by(
                        OpSys.id, releases.c.version, dates.c.date))

    by_opsys = dict()
    groups = groupby(q.all(), lambda r: f'{r.name} {r.version}')
    for osr, rows in groups:
        counts = [(r.date, r.count) for r in rows]
        by_opsys[osr] = counts

    result = {'by_opsys': by_opsys, 'from_date': from_date, 'to_date': to_date}
    return result
Example #9
0
    def count_datasets_through_time(self, start, end, period, time_field, expressions):
        """
        :type period: str
        :type start: datetime.datetime
        :type end: datetime.datetime
        :type expressions: tuple[datacube.index.postgres._fields.PgExpression]
        :rtype: list[((datetime.datetime, datetime.datetime), int)]
        """

        raw_expressions = self._alchemify_expressions(expressions)

        start_times = select((
            func.generate_series(start, end, cast(period, INTERVAL)).label('start_time'),
        )).alias('start_times')

        time_range_select = (
            select((
                func.tstzrange(
                    start_times.c.start_time,
                    func.lead(start_times.c.start_time).over()
                ).label('time_period'),
            ))
        ).alias('all_time_ranges')

        # Exclude the trailing (end time to infinite) row. Is there a simpler way?
        time_ranges = (
            select((
                time_range_select,
            )).where(
                ~func.upper_inf(time_range_select.c.time_period)
            )
        ).alias('time_ranges')

        count_query = (
            select(
                (func.count('*'),)
            ).select_from(
                self._from_expression(DATASET, expressions)
            ).where(
                and_(
                    time_field.alchemy_expression.overlaps(time_ranges.c.time_period),
                    DATASET.c.archived == None,
                    *raw_expressions
                )
            )
        )

        results = self._connection.execute(select((
            time_ranges.c.time_period,
            count_query.label('dataset_count')
        )))

        for time_period, dataset_count in results:
            # if not time_period.upper_inf:
            yield Range(time_period.lower, time_period.upper), dataset_count
Example #10
0
    def timeseries(self, agg_unit, start, end, geom=None, column_filters=None):
        # Reading this blog post
        # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html
        # inspired this implementation.
        t = self.point_table

        # Special case for the 'quarter' unit of aggregation.
        step = '3 months' if agg_unit == 'quarter' else '1 ' + agg_unit

        # Create a CTE to represent every time bucket in the timeseries
        # with a default count of 0
        day_generator = func.generate_series(func.date_trunc(agg_unit, start),
                                             func.date_trunc(agg_unit, end),
                                             step)
        defaults = select([sa.literal_column("0").label('count'),
                           day_generator.label('time_bucket')])\
            .alias('defaults')

        where_filters = [t.c.point_date >= start, t.c.point_date <= end]
        if column_filters is not None:
            # Column filters has to be iterable here, because the '+' operator
            # behaves differently for SQLAlchemy conditions. Instead of
            # combining the conditions together, it would try to build
            # something like :param1 + <column_filters> as a new condition.
            where_filters += [column_filters]

        # Create a CTE that grabs the number of records contained in each time
        # bucket. Will only have rows for buckets with records.
        actuals = select([func.count(t.c.hash).label('count'),
                          func.date_trunc(agg_unit, t.c.point_date).
                         label('time_bucket')])\
            .where(sa.and_(*where_filters))\
            .group_by('time_bucket')

        # Also filter by geometry if requested
        if geom:
            contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom))
            actuals = actuals.where(contains)

        # Need to alias to make it usable in a subexpression
        actuals = actuals.alias('actuals')

        # Outer join the default and observed values
        # to create the timeseries select statement.
        # If no observed value in a bucket, use the default.
        name = sa.literal_column("'{}'".format(self.dataset_name))\
            .label('dataset_name')
        bucket = defaults.c.time_bucket.label('time_bucket')
        count = func.coalesce(actuals.c.count, defaults.c.count).label('count')
        ts = select([name, bucket, count]).\
            select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket))

        return ts
Example #11
0
    def timeseries(self, agg_unit, start, end, geom=None, column_filters=None):
        # Reading this blog post
        # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html
        # inspired this implementation.
        t = self.point_table

        # Special case for the 'quarter' unit of aggregation.
        step = '3 months' if agg_unit == 'quarter' else '1 ' + agg_unit

        # Create a CTE to represent every time bucket in the timeseries
        # with a default count of 0
        day_generator = func.generate_series(func.date_trunc(agg_unit, start),
                                             func.date_trunc(agg_unit, end),
                                             step)
        defaults = select([sa.literal_column("0").label('count'),
                           day_generator.label('time_bucket')]) \
            .alias('defaults')

        where_filters = [t.c.point_date >= start, t.c.point_date <= end]
        if column_filters is not None:
            # Column filters has to be iterable here, because the '+' operator
            # behaves differently for SQLAlchemy conditions. Instead of
            # combining the conditions together, it would try to build
            # something like :param1 + <column_filters> as a new condition.
            where_filters += [column_filters]

        # Create a CTE that grabs the number of records contained in each time
        # bucket. Will only have rows for buckets with records.
        actuals = select([func.count(t.c.hash).label('count'),
                          func.date_trunc(agg_unit, t.c.point_date).
                         label('time_bucket')]) \
            .where(sa.and_(*where_filters)) \
            .group_by('time_bucket')

        # Also filter by geometry if requested
        if geom:
            contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom))
            actuals = actuals.where(contains)

        # Need to alias to make it usable in a subexpression
        actuals = actuals.alias('actuals')

        # Outer join the default and observed values
        # to create the timeseries select statement.
        # If no observed value in a bucket, use the default.
        name = sa.literal_column("'{}'".format(self.dataset_name)) \
            .label('dataset_name')
        bucket = defaults.c.time_bucket.label('time_bucket')
        count = func.coalesce(actuals.c.count, defaults.c.count).label('count')
        ts = select([name, bucket, count]). \
            select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket))

        return ts
Example #12
0
def get_usage(db: Session,
              kwh: bool = True,
              count: bool = True,
              bucket: BucketType = BucketType.Days,
              chargepoint: str = None,
              from_timestamp: Optional[datetime] = None,
              to_timestamp: Optional[datetime] = None) -> List[KeyedTuple]:
    """
    Get the usage of the charging stations
    """

    args = [func.date(models.Transaction.start_timestamp).label('date')]
    if count:
        args.append(func.count(models.Transaction.id).label('count'))
    if kwh:
        args.append(func.sum(models.Transaction.meter_used).label('kwh'))

    query = db.query(*args)

    if chargepoint:
        # Transaction -> Connector -> ChargePoint
        query = query.filter(
            models.Transaction.connector.has(
                models.Connector.chargepoint.has(
                    models.ChargePoint.identity == chargepoint)))

    # Apply time filters
    if from_timestamp:
        query = query.filter(
            models.Transaction.start_timestamp > from_timestamp)
    if to_timestamp:
        query = query.filter(models.Transaction.end_timestamp < to_timestamp)

    # Generate date series
    timeseries = db.query(
        func.date(
            func.generate_series(
                func.min(from_timestamp or models.Transaction.start_timestamp),
                func.max(to_timestamp or utc_datetime()),
                timedelta(days=1))).label('date')).subquery()

    # Group values
    data = query.group_by('date').subquery()
    values = []
    if count:
        values.append(func.coalesce(data.c.count, 0).label('count'))
    if kwh:
        values.append(func.coalesce(data.c.kwh / 1000.0, 0).label('kWh'))

    return db.query(timeseries.c.date, *values).\
        outerjoin(data, data.c.date == timeseries.c.date).\
        order_by(timeseries.c.date).all()
Example #13
0
    def timeseries(self, agg_unit, start, end, geom=None, column_filters=None):
        # Reading this blog post
        # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html
        # inspired this implementation.
        t = self.point_table

        if agg_unit == 'quarter':
            step = '3 months'
        else:
            step = '1 ' + agg_unit
        # Create a CTE to represent every time bucket in the timeseries
        # with a default count of 0
        day_generator = func.generate_series(func.date_trunc(agg_unit, start),
                                             func.date_trunc(agg_unit, end),
                                             step)
        defaults = select([sa.literal_column("0").label('count'),
                           day_generator.label('time_bucket')])\
            .alias('defaults')

        # Create a CTE that grabs the number of records
        # contained in each time bucket.
        # Will only have rows for buckets with records.
        where_filters = [t.c.point_date >= start,
                         t.c.point_date <= end]
        if column_filters:
            where_filters += column_filters

        actuals = select([func.count(t.c.hash).label('count'),
                          func.date_trunc(agg_unit, t.c.point_date).
                         label('time_bucket')])\
            .where(sa.and_(*where_filters))\
            .group_by('time_bucket')

        # Also filter by geometry if requested
        if geom:
            contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom))
            actuals = actuals.where(contains)

        # Need to alias to make it usable in a subexpression
        actuals = actuals.alias('actuals')

        # Outer join the default and observed values
        # to create the timeseries select statement.
        # If no observed value in a bucket, use the default.
        name = sa.literal_column("'{}'".format(self.dataset_name))\
            .label('dataset_name')
        bucket = defaults.c.time_bucket.label('time_bucket')
        count = func.coalesce(actuals.c.count, defaults.c.count).label('count')
        ts = select([name, bucket, count]).\
            select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket))

        return ts
Example #14
0
def find_missing_row_for_job(job_id, job_size):
    expected_row_numbers = db.session.query(
        func.generate_series(0, job_size - 1).label('row')).subquery()

    query = db.session.query(
        Notification.job_row_number,
        expected_row_numbers.c.row.label('missing_row')).outerjoin(
            Notification,
            and_(expected_row_numbers.c.row == Notification.job_row_number,
                 Notification.job_id == job_id)).filter(
                     Notification.job_row_number == None  # noqa
                 )
    return query.all()
Example #15
0
    def get_report(self):
        """Данный очёт показывает количество тех задач которые не могут быть отрезолвены"""
        with self.c.session_context() as session:
            # todo create additional request for guaranty all month in timeline
            # date_list = func.generate_series(first_date, datetime.now(timezone.utc), '1 day').alias('end_month')

            User = self.User
            Issue = self.Issue

            # sq_created_count = self.count_for_month(session, Issue.created)
            # sq_resolved_count = self.count_for_month(session, Issue.resolutiondate)
            # created_year_month_column = sq_created_count.c[self.FIELD_YEAR_MONTH]
            # resolved_count_column = sq_resolved_count.c[self.FIELD_COUNT]
            # created_count_column = sq_created_count.c[self.FIELD_COUNT]
            # growth_column = created_count_column - resolved_count_column

            sq = session.query(
                User.jira_name,
                # begin_column,
                func.generate_series(
                    # to get only full periods before current date
                    User.working_start_date + self.SHIFT_INTERVAL,
                    datetime.now(timezone.utc),
                    '30 days',
                ).label(self.PERIOD_END_DATE),
                # (
                #         column(self.PERIOD_END_DATE) - column(self.SHIFT_INTERVAL)
                # ).label(self.PERIOD_START_DATE),
            ).filter(User.type.in_((UserType.backend, ))).subquery()

            column_period_end_date = sq.c[self.PERIOD_END_DATE]
            column_period_start_date = column_period_end_date - self.SHIFT_INTERVAL

            q = session.query(
                sq.c[UserAttrs.jira_name],
                # Numerate all periods of working in company from the begining
                func.row_number().over(
                    order_by=column_period_end_date,
                    partition_by=sq.c[UserAttrs.jira_name],
                ).label(self.PERIOD_NUMBER),
                column_period_end_date,
                column_period_start_date.label(self.PERIOD_START_DATE),
                Issue.resolutiondate,
            ).outerjoin(
                Issue,
                and_(
                    Issue.resolutiondate >= column_period_start_date,
                    Issue.resolutiondate < column_period_end_date,
                ))

            self._save_report(q)
Example #16
0
    def missing_blocks(cls, max_block_number=None):
        """
        Return missing blocks in the blocks table between 0 to block_number

        :param int max_block_number: Maximum block number that we want to find missing blocks from
        """
        current_session = get_current_session()
        if max_block_number is None:
            max_block_number = cls.get_max_block_number()
        logger.debug(max_block_number)
        with current_session.db_session_scope():
            stmt = current_session.db_session.query(
                func.generate_series(0, max_block_number).label('i')).subquery()
            joined = stmt.outerjoin(cls, cls.block_number == stmt.c.i)
            missing_blocks = current_session.db_session.query(stmt.c.i.label('block_number')).\
                select_from(joined).filter(cls.block_number == None).all()
        return missing_blocks
Example #17
0
def get_services_group_data_chart(services_group_id, time_from, time_until,
                                  metric_type):
    """
    Return data for billing cpu/vsize charts.
    If for time_from-time_until period number of data points more then
    `MAX_POINTS`, data will be aggregated to get less number of points.
    """
    delta = time_until - time_from
    # Compute interval (in hours) for grouping data, it can't be 0
    interval = delta.total_seconds() // (3600 * MAX_POINTS) or 1

    serie = db.session.query(
        func.generate_series(
            time_from, time_until,
            timedelta(hours=interval)).label("date")).subquery()

    metrics = (db.session.query(
        serie.c.date.label("time"),
        serie.c.date + timedelta(hours=interval),
        func.coalesce(func.avg(column("value")), 0).label("value"),
    ).outerjoin(
        BatchStoryBilling,
        and_(
            BatchStoryBilling.time >= serie.c.date,
            BatchStoryBilling.time <
            (serie.c.date + timedelta(hours=interval)),
        ),
    ).outerjoin(
        MetricBilling,
        and_(
            MetricBilling.batch_id == BatchStoryBilling.id,
            MetricBilling.type == metric_type,
            MetricBilling.services_group_id == services_group_id,
        ),
    ).group_by(serie.c.date).order_by(serie.c.date))

    return metrics.all()
Example #18
0
    def get_tweet_counts_by_division(self, div_id=None):
        series = self.session.query(
            func.generate_series(
            func.date_trunc('hour', datetime.utcnow() - timedelta(hours=24)),
            datetime.utcnow(),
            timedelta(hours=1)).label('time')).subquery()
        tw_count = self.session.query(
            func.date_trunc('hour', Tweet.created_at).label('time'),
            func.count('*').over(partition_by=func.date_trunc('hour', Tweet.created_at)).label('count')).\
            filter(Tweet.created_at > datetime.utcnow() - timedelta(hours=24)).\
            distinct()

        if div_id != None:
            tw_count = tw_count.filter(Tweet.division_id == div_id)

        tw_count = tw_count.subquery()

        res = self.session.query(
            series.c.time,
            func.coalesce(tw_count.c.count, 0).label('count')).\
            select_from(series).\
            outerjoin(tw_count, tw_count.c.time == series.c.time).all()

        return map(lambda x: { "time": x[0].isoformat(), "count": x[1] }, res)
Example #19
0
    def get_task_stats(
        start_date, end_date, org_id, org_name, campaign, project_id, country
    ):
        """ Creates tasks stats for a period using the TaskStatsDTO """

        query = (
            db.session.query(
                TaskHistory.task_id,
                TaskHistory.project_id,
                TaskHistory.action_text,
                func.DATE(TaskHistory.action_date).label("day"),
            )
            .distinct(
                tuple_(
                    TaskHistory.project_id, TaskHistory.task_id, TaskHistory.action_text
                )
            )
            .filter(
                TaskHistory.action == "STATE_CHANGE",
                or_(
                    TaskHistory.action_text == "MAPPED",
                    TaskHistory.action_text == "VALIDATED",
                    TaskHistory.action_text == "BADIMAGERY",
                ),
            )
            .order_by(
                TaskHistory.project_id,
                TaskHistory.task_id,
                TaskHistory.action_text,
                TaskHistory.action_date,
            )
        )

        if org_id:
            query = query.join(Project, Project.id == TaskHistory.project_id).filter(
                Project.organisation_id == org_id
            )
        if org_name:
            try:
                organisation_id = OrganisationService.get_organisation_by_name(
                    org_name
                ).id
            except NotFound:
                organisation_id = None
            query = query.join(Project, Project.id == TaskHistory.project_id).filter(
                Project.organisation_id == organisation_id
            )
        if campaign:
            try:
                campaign_id = CampaignService.get_campaign_by_name(campaign).id
            except NotFound:
                campaign_id = None
            query = query.join(
                campaign_projects,
                campaign_projects.c.project_id == TaskHistory.project_id,
            ).filter(campaign_projects.c.campaign_id == campaign_id)
        if project_id:
            query = query.filter(TaskHistory.project_id.in_(project_id))
        if country:
            # Unnest country column array.
            sq = Project.query.with_entities(
                Project.id, func.unnest(Project.country).label("country")
            ).subquery()

            query = query.filter(sq.c.country.ilike("%{}%".format(country))).filter(
                TaskHistory.project_id == sq.c.id
            )

        query = query.subquery()

        date_query = db.session.query(
            func.DATE(
                func.generate_series(start_date, end_date, timedelta(days=1))
            ).label("d_day")
        ).subquery()

        grouped_dates = (
            db.session.query(
                date_query.c.d_day,
                query.c.action_text,
                func.count(query.c.action_text).label("cnt"),
            )
            .join(date_query, date_query.c.d_day == query.c.day)
            .group_by(date_query.c.d_day, query.c.action_text)
            .order_by(date_query.c.d_day)
        ).subquery()

        mapped = (
            db.session.query(
                grouped_dates.c.d_day, grouped_dates.c.action_text, grouped_dates.c.cnt
            )
            .select_from(grouped_dates)
            .filter(grouped_dates.c.action_text == "MAPPED")
            .subquery()
        )
        validated = (
            db.session.query(
                grouped_dates.c.d_day, grouped_dates.c.action_text, grouped_dates.c.cnt
            )
            .select_from(grouped_dates)
            .filter(grouped_dates.c.action_text == "VALIDATED")
            .subquery()
        )
        badimagery = (
            db.session.query(
                grouped_dates.c.d_day, grouped_dates.c.action_text, grouped_dates.c.cnt
            )
            .select_from(grouped_dates)
            .filter(grouped_dates.c.action_text == "BADIMAGERY")
            .subquery()
        )

        result = (
            db.session.query(
                func.to_char(grouped_dates.c.d_day, "YYYY-MM-DD"),
                func.coalesce(mapped.c.cnt, 0).label("mapped"),
                func.coalesce(validated.c.cnt, 0).label("validated"),
                func.coalesce(badimagery.c.cnt, 0).label("badimagery"),
            )
            .select_from(grouped_dates)
            .distinct(grouped_dates.c.d_day)
            .filter(grouped_dates.c.d_day is not None)
            .outerjoin(mapped, mapped.c.d_day == grouped_dates.c.d_day)
            .outerjoin(validated, validated.c.d_day == grouped_dates.c.d_day)
            .outerjoin(badimagery, badimagery.c.d_day == grouped_dates.c.d_day)
        )

        day_stats_dto = list(map(StatsService.set_task_stats, result))

        results_dto = TaskStatsDTO()
        results_dto.stats = day_stats_dto

        return results_dto
Example #20
0
def traffic_history_query():
    timestamptz = TIMESTAMP(timezone=True)

    events = union_all(
        select([
            TrafficCredit.amount, TrafficCredit.timestamp,
            literal("Credit").label('type')
        ]).where(TrafficCredit.user_id == literal_column('arg_user_id')),
        select([
            (-TrafficVolume.amount).label('amount'), TrafficVolume.timestamp,
            cast(TrafficVolume.type, TEXT).label('type')
        ]).where(TrafficVolume.user_id == literal_column('arg_user_id'))).cte(
            'traffic_events')

    def round_time(time_expr, ceil=False):
        round_func = func.ceil if ceil else func.trunc
        step_epoch = func.extract('epoch', literal_column('arg_step'))
        return cast(
            func.to_timestamp(
                round_func(func.extract('epoch', time_expr) / step_epoch) *
                step_epoch), timestamptz)

    balance = select([TrafficBalance.amount, TrafficBalance.timestamp])\
        .select_from(User.__table__.outerjoin(TrafficBalance))\
        .where(User.id == literal_column('arg_user_id'))\
        .cte('balance')

    balance_amount = select([balance.c.amount]).as_scalar()
    balance_timestamp = select([balance.c.timestamp]).as_scalar()

    # Bucket layout
    # n = interval / step
    # 0: Aggregates all prior traffic_events so that the balance value can be calculated
    # 1 - n: Traffic history entry
    # n+1: Aggregates all data after the last point in time, will be discarded
    buckets = select([
        literal_column('bucket'),
        (func.row_number().over(order_by=literal_column('bucket')) -
         1).label('index')
    ]).select_from(
        func.generate_series(
            round_time(cast(literal_column('arg_start'), timestamptz)) -
            literal_column('arg_step'),
            round_time(
                cast(literal_column('arg_start'), timestamptz) +
                literal_column('arg_interval')),
            literal_column('arg_step')).alias('bucket')).order_by(
                literal_column('bucket')).cte('buckets')

    def cond_sum(condition, label, invert=False):
        return func.sum(
            case([(condition,
                   events.c.amount if not invert else -events.c.amount)],
                 else_=None)).label(label)

    hist = select([
        buckets.c.bucket,
        cond_sum(events.c.type == 'Credit', 'credit'),
        cond_sum(events.c.type == 'Ingress', 'ingress', invert=True),
        cond_sum(events.c.type == 'Egress', 'egress', invert=True),
        func.sum(events.c.amount).label('amount'),
        cond_sum(
            and_(balance_timestamp != None,
                 events.c.timestamp < balance_timestamp), 'before_balance'),
        cond_sum(
            or_(balance_timestamp == None,
                events.c.timestamp >= balance_timestamp), 'after_balance')
    ]).select_from(
        buckets.outerjoin(
            events,
            func.width_bucket(
                events.c.timestamp,
                select([
                    func.array(
                        select([buckets.c.bucket]).select_from(buckets).where(
                            buckets.c.index != 0).label('dummy'))
                ])) == buckets.c.index)).where(
                    # Discard bucket n+1
                    buckets.c.index < select([func.max(buckets.c.index)])
                ).group_by(buckets.c.bucket).order_by(
                    buckets.c.bucket).cte('traffic_hist')

    # Bucket is located before the balance and no traffic_events exist before it
    first_event_timestamp = select([func.min(events.c.timestamp)]).as_scalar()
    case_before_balance_no_data = (and_(
        balance_timestamp != None, hist.c.bucket < balance_timestamp,
        or_(first_event_timestamp == None,
            hist.c.bucket < first_event_timestamp)), None)

    # Bucket is located after the balance
    case_after_balance = (
        or_(balance_timestamp == None, hist.c.bucket >= balance_timestamp),
        func.coalesce(balance_amount, 0) + func.coalesce(
            func.sum(hist.c.after_balance).over(order_by=hist.c.bucket.asc(),
                                                rows=(None, 0)), 0))

    # Bucket is located before the balance, but there still exist traffic_events before it
    else_before_balance = (
        func.coalesce(balance_amount, 0) +
        func.coalesce(hist.c.after_balance, 0) - func.coalesce(
            func.sum(hist.c.before_balance).over(order_by=hist.c.bucket.desc(),
                                                 rows=(None, -1)), 0))

    agg_hist = select([
        hist.c.bucket, hist.c.credit, hist.c.ingress, hist.c.egress,
        case([case_before_balance_no_data, case_after_balance],
             else_=else_before_balance).label('balance')
    ]).alias('agg_hist')

    # Remove bucket 0
    result = select([agg_hist]).order_by(agg_hist.c.bucket).offset(1)

    return result
Example #21
0
def fetch_annual_billing(service_id, year):
    start_date, end_date = get_financial_year_range(year)
    fragments_free_limit = literal(
        fetch_sms_free_allowance_for_financial_year(service_id, year))
    query = dao_get_priced_billing_data().filter(
        FactBilling.service_id == service_id,
        FactBilling.aet_date >= start_date,
        FactBilling.aet_date <= end_date,
    )

    # break down usage by month with empty gaps by generating a series
    months_series = func.generate_series(start_date, end_date,
                                         '1 months').alias('month')
    months_series_c = column('month').cast(Date()).label('month')
    query_cte = query.cte()

    # calculate in credits in order to work backwards to get available units
    # after removing cost usage
    credits_free = fragments_free_limit * FRAGMENT_UNIT_RATE

    # coalesce and sum these totals by month
    cost = total(query_cte.c.cost)
    notifications = total(query_cte.c.notifications_sent)
    notifications_email_ = func.sum(query_cte.c.notifications_sent).filter(
        query_cte.c.notification_type == EMAIL_TYPE)
    notifications_sms_ = func.sum(query_cte.c.notifications_sent).filter(
        query_cte.c.notification_type == SMS_TYPE)
    notifications_email = func.coalesce(notifications_email_, 0)
    notifications_sms = func.coalesce(notifications_sms_, 0)

    fragments_domestic = total(query_cte.c.domestic_units)
    fragments_international = total(query_cte.c.international_units)

    # cumulative figures for entire year
    cost_cumulative = func.sum(cost).over(order_by=months_series_c)
    cost_starting = cost_cumulative - cost
    credits_remaining = func.greatest(credits_free - cost_cumulative, 0)
    credits_available = func.greatest(credits_free - cost_starting, 0)
    cost_chargeable = func.greatest(cost - credits_available, 0)
    cost_chargeable_cumulative = func.greatest(cost_cumulative - credits_free,
                                               0)

    credits_used = func.least(cost, credits_available)
    units = cost / FRAGMENT_UNIT_RATE
    units_cumulative = cost_cumulative / FRAGMENT_UNIT_RATE
    units_chargeable = cost_chargeable / FRAGMENT_UNIT_RATE
    units_free_used = credits_used / FRAGMENT_UNIT_RATE
    units_free_available = credits_available / FRAGMENT_UNIT_RATE
    units_free_remaining = credits_remaining / FRAGMENT_UNIT_RATE
    units_chargeable_cumulative = cost_chargeable_cumulative / FRAGMENT_UNIT_RATE

    gapfilled_query = db.session.query(
        months_series_c,
        fragments_free_limit.label('fragments_free_limit'),
        fragments_domestic.label('fragments_domestic'),
        fragments_international.label('fragments_international'),
        notifications.label('notifications'),
        notifications_sms.label('notifications_sms'),
        notifications_email.label('notifications_email'),
        cost.label('cost'),
        cost_chargeable.label('cost_chargeable'),
        cost_cumulative.label('cost_cumulative'),
        cost_chargeable_cumulative.label('cost_chargeable_cumulative'),
        units.label('units'),
        units_cumulative.label('units_cumulative'),
        units_chargeable.label('units_chargeable'),
        units_chargeable_cumulative.label('units_chargeable_cumulative'),
        units_free_available.label('units_free_available'),
        units_free_remaining.label('units_free_remaining'),
        units_free_used.label('units_free_used'),
        literal(DOMESTIC_UNIT_RATE * 100).label('unit_rate_domestic'),
        literal(INTERNATIONAL_UNIT_RATE *
                100).label('unit_rate_international'),
    ).select_from(months_series, ).outerjoin(
        query_cte,
        query_cte.c.aet_month == months_series_c,
    ).group_by(months_series_c, ).order_by(months_series_c)
    return gapfilled_query
Example #22
0
def traffic_history_query():
    timestamptz = TIMESTAMP(timezone=True)

    events = union_all(
        select([TrafficCredit.amount,
                TrafficCredit.timestamp,
                literal("Credit").label('type')]
               ).where(TrafficCredit.user_id == literal_column('arg_user_id')),

        select([(-TrafficVolume.amount).label('amount'),
                TrafficVolume.timestamp,
                cast(TrafficVolume.type, TEXT).label('type')]
               ).where(TrafficVolume.user_id == literal_column('arg_user_id'))
    ).cte('traffic_events')

    def round_time(time_expr, ceil=False):
        round_func = func.ceil if ceil else func.trunc
        step_epoch = func.extract('epoch', literal_column('arg_step'))
        return cast(func.to_timestamp(round_func(func.extract('epoch', time_expr) / step_epoch) * step_epoch), timestamptz)

    balance = select([TrafficBalance.amount, TrafficBalance.timestamp])\
        .select_from(User.__table__.outerjoin(TrafficBalance))\
        .where(User.id == literal_column('arg_user_id'))\
        .cte('balance')

    balance_amount = select([balance.c.amount]).as_scalar()
    balance_timestamp = select([balance.c.timestamp]).as_scalar()

    # Bucket layout
    # n = interval / step
    # 0: Aggregates all prior traffic_events so that the balance value can be calculated
    # 1 - n: Traffic history entry
    # n+1: Aggregates all data after the last point in time, will be discarded
    buckets = select([literal_column('bucket'),
            (func.row_number().over(order_by=literal_column('bucket')) - 1).label('index')]
    ).select_from(
        func.generate_series(
            round_time(cast(literal_column('arg_start'), timestamptz)) - literal_column('arg_step'),
            round_time(cast(literal_column('arg_start'), timestamptz) + literal_column('arg_interval')),
            literal_column('arg_step')
        ).alias('bucket')
    ).order_by(
        literal_column('bucket')
    ).cte('buckets')

    def cond_sum(condition, label, invert=False):
        return func.sum(case(
            [(condition, events.c.amount if not invert else -events.c.amount)],
            else_=None)).label(label)


    hist = select([buckets.c.bucket,
                   cond_sum(events.c.type == 'Credit', 'credit'),
                   cond_sum(events.c.type == 'Ingress', 'ingress', invert=True),
                   cond_sum(events.c.type == 'Egress', 'egress', invert=True),
                   func.sum(events.c.amount).label('amount'),
                   cond_sum(and_(balance_timestamp != None, events.c.timestamp < balance_timestamp), 'before_balance'),
                   cond_sum(or_(balance_timestamp == None, events.c.timestamp >= balance_timestamp), 'after_balance')]
    ).select_from(buckets.outerjoin(
        events, func.width_bucket(
            events.c.timestamp, select([func.array(select([buckets.c.bucket]).select_from(buckets).where(buckets.c.index != 0).label('dummy'))])
        ) == buckets.c.index
    )).where(
        # Discard bucket n+1
        buckets.c.index < select([func.max(buckets.c.index)])
    ).group_by(
        buckets.c.bucket
    ).order_by(
        buckets.c.bucket
    ).cte('traffic_hist')


    # Bucket is located before the balance and no traffic_events exist before it
    first_event_timestamp = select([func.min(events.c.timestamp)]).as_scalar()
    case_before_balance_no_data = (
        and_(balance_timestamp != None, hist.c.bucket < balance_timestamp,
        or_(first_event_timestamp == None,
            hist.c.bucket < first_event_timestamp
            )),
        None
    )

    # Bucket is located after the balance
    case_after_balance = (
        or_(balance_timestamp == None, hist.c.bucket >= balance_timestamp),
        func.coalesce(balance_amount, 0) + func.coalesce(
            func.sum(hist.c.after_balance).over(
                order_by=hist.c.bucket.asc(), rows=(None, 0)),
            0)
    )

    # Bucket is located before the balance, but there still exist traffic_events before it
    else_before_balance = (
            func.coalesce(balance_amount, 0) +
            func.coalesce(hist.c.after_balance, 0) -
            func.coalesce(
                func.sum(hist.c.before_balance).over(
                    order_by=hist.c.bucket.desc(), rows=(None, -1)
                ), 0)
    )

    agg_hist = select(
            [hist.c.bucket, hist.c.credit, hist.c.ingress, hist.c.egress, case(
            [case_before_balance_no_data, case_after_balance],
            else_=else_before_balance
        ).label('balance')]).alias('agg_hist')

    # Remove bucket 0
    result = select([agg_hist]).order_by(agg_hist.c.bucket).offset(1)

    return result