Example #1
0
def category_index(datasets):
    """
    Get a list of categories by count of datasets
    """
    # Get the dataset ids in the current list of datasets
    ds_ids = [d.id for d in datasets]
    if len(ds_ids):
        # If we have dataset ids we count the dataset by category
        q = db.select(
            [Dataset.category, db.func.count(Dataset.id)],
            Dataset.id.in_(ds_ids),
            group_by=Dataset.category,
            order_by=db.func.count(Dataset.id).desc())

        # Execute the queery to the the list of categories
        categories = db.session.bind.execute(q).fetchall()
        # Return a list of categories as dicts with category, count, url
        # and label
        return [{
            'category':
            category,
            'count':
            count,
            'url':
            h.url_for(controller='dataset', action='index', category=category),
            'label':
            CATEGORIES.get(category, category)
        } for (category, count) in categories if category is not None]

    # We return an empty string if no datasets found
    return []
Example #2
0
 def dataset_counts(cls, datasets):
     ds_ids = [d.id for d in datasets]
     q = db.select([cls.code, db.func.count(cls.dataset_id)],
                   cls.dataset_id.in_(ds_ids),
                   group_by=cls.code,
                   order_by=db.func.count(cls.dataset_id).desc())
     return db.session.bind.execute(q).fetchall()
Example #3
0
 def times(self, attribute='year'):
     """ Get all distinct times mentioned in the dataset. """
     # TODO: make this a more generic distinct_attribute function
     field = self['time'][attribute].column_alias
     query = db.select([field.label(attribute)], self['time'].alias, distinct=True)
     rp = self.bind.execute(query)
     return sorted([r[attribute] for r in rp.fetchall()])
Example #4
0
 def dataset_counts(cls, datasets):
     ds_ids = [d.id for d in datasets]
     if not len(ds_ids):
         return []
     q = db.select([cls.code, db.func.count(cls.dataset_id)],
         cls.dataset_id.in_(ds_ids), group_by=cls.code,
         order_by=db.func.count(cls.dataset_id).desc())
     return db.session.bind.execute(q).fetchall()
Example #5
0
 def num_entries(self, conditions="1=1"):
     """ Return the count of entries on the dataset fact table having the
     dimension set to a value matching the filter given by ``conditions``.
     """
     query = db.select([db.func.count(db.func.distinct(self.column_alias))],
                       conditions)
     rp = self.dataset.bind.execute(query)
     return rp.fetchone()[0]
Example #6
0
 def num_entries(self, conditions="1=1"):
     """ Return the count of entries on the dataset fact table having the
     dimension set to a value matching the filter given by ``conditions``.
     """
     query = db.select([db.func.count(db.func.distinct(self.column_alias))],
                       conditions)
     rp = self.dataset.bind.execute(query)
     return rp.fetchone()[0]
Example #7
0
    def entries(self, conditions="1=1", order_by=None, limit=None,
            offset=0, step=10000):
        """ Generate a fully denormalized view of the entries on this
        table. This view is nested so that each dimension will be a hash
        of its attributes.

        This is somewhat similar to the entries collection in the fully
        denormalized schema before OpenSpending 0.11 (MongoDB).
        """
        if not self.is_generated:
            return

        joins = self.alias
        for d in self.dimensions:
            joins = d.join(joins)
        selects = [f.selectable for f in self.fields] + [self.alias.c.id]

        # enforce stable sorting:
        if order_by is None:
            order_by = [self.alias.c.id.asc()]

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = db.select(selects, conditions, joins, order_by=order_by,
                              use_labels=True, limit=qlimit, offset=qoffset)
            rp = self.bind.execute(query)

            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                result = {}
                for k, v in row.items():
                    field, attr = k.split('_', 1)
                    field = field.replace(ALIAS_PLACEHOLDER, '_')
                    if field == 'entry':
                        result[attr] = v
                    else:
                        if not field in result:
                            result[field] = dict()

                            # TODO: backwards-compat?
                            if isinstance(self[field], CompoundDimension):
                                result[field]['taxonomy'] = \
                                    self[field].taxonomy
                        result[field][attr] = v
                yield result
Example #8
0
 def times(self, attribute='year'):
     """ Get all distinct times mentioned in the dataset. """
     # TODO: make this a more generic distinct_attribute function
     field = self['time'][attribute].column_alias
     query = db.select([field.label(attribute)],
                       self['time'].alias,
                       distinct=True)
     rp = self.bind.execute(query)
     return sorted([r[attribute] for r in rp.fetchall()])
Example #9
0
    def entries(self,
                conditions="1=1",
                order_by=None,
                limit=None,
                offset=0,
                step=10000,
                fields=None):
        """ Generate a fully denormalized view of the entries on this
        table. This view is nested so that each dimension will be a hash
        of its attributes.

        This is somewhat similar to the entries collection in the fully
        denormalized schema before OpenSpending 0.11 (MongoDB).
        """
        if not self.is_generated:
            return

        if fields is None:
            fields = self.fields

        joins = self.alias
        for d in self.dimensions:
            if d in fields:
                joins = d.join(joins)
        selects = [f.selectable for f in fields] + [self.alias.c.id]

        # enforce stable sorting:
        if order_by is None:
            order_by = [self.alias.c.id.asc()]

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = db.select(selects,
                              conditions,
                              joins,
                              order_by=order_by,
                              use_labels=True,
                              limit=qlimit,
                              offset=qoffset)
            rp = self.bind.execute(query)

            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                yield decode_row(row, self)
Example #10
0
 def members(self, conditions="1=1", limit=None, offset=0):
     """ Get a listing of all the members of the dimension (i.e. all the
     distinct values) matching the filter in ``conditions``. """
     query = db.select([self.column_alias], conditions,
                       limit=limit, offset=offset, distinct=True)
     rp = self.dataset.bind.execute(query)
     while True:
         row = rp.fetchone()
         if row is None:
             break
         yield row[0]
Example #11
0
 def territories(self):
     q = db.select([DatasetTerritory.code, 
                    db.func.count(DatasetTerritory.dataset_id)],
         group_by=DatasetTerritory.code,
         order_by=db.func.count(DatasetTerritory.dataset_id).desc())
     result = {}
     for territory, count in db.session.bind.execute(q).fetchall():
         result[territory] = {'count': count,
                              'label': h.COUNTRIES[territory],
                              'url': h.url_for(controller='dataset',
                                  action='index', territories=territory)}
     return to_jsonp(result)
Example #12
0
 def members(self, conditions="1=1", limit=0, offset=0):
     """ Get a listing of all the members of the dimension (i.e. all the
     distinct values) matching the filter in ``conditions``. This can also be
     used to find a single individual member, e.g. a dimension value
     identified by its name. """
     query = db.select([self.alias], conditions, limit=limit, offset=offset)
     rp = self.dataset.bind.execute(query)
     while True:
         row = rp.fetchone()
         if row is None:
             break
         member = dict(row.items())
         member['taxonomy'] = self.taxonomy
         yield member
Example #13
0
 def members(self, conditions="1=1", limit=None, offset=0):
     """ Get a listing of all the members of the dimension (i.e. all the
     distinct values) matching the filter in ``conditions``. """
     query = db.select([self.column_alias],
                       conditions,
                       limit=limit,
                       offset=offset,
                       distinct=True)
     rp = self.dataset.bind.execute(query)
     while True:
         row = rp.fetchone()
         if row is None:
             break
         yield row[0]
Example #14
0
 def members(self, conditions="1=1", limit=None, offset=0):
     """ Get a listing of all the members of the dimension (i.e. all the
     distinct values) matching the filter in ``conditions``. This can also be
     used to find a single individual member, e.g. a dimension value
     identified by its name. """
     query = db.select([self.alias], conditions,
                       limit=limit, offset=offset)
     rp = self.dataset.bind.execute(query)
     while True:
         row = rp.fetchone()
         if row is None:
             break
         member = dict(row.items())
         member['taxonomy'] = self.taxonomy
         yield member
Example #15
0
    def entries(self, conditions="1=1", order_by=None, limit=None,
            offset=0, step=10000, fields=None):
        """ Generate a fully denormalized view of the entries on this
        table. This view is nested so that each dimension will be a hash
        of its attributes.

        This is somewhat similar to the entries collection in the fully
        denormalized schema before OpenSpending 0.11 (MongoDB).
        """
        if not self.is_generated:
            return

        if fields is None:
            fields = self.fields

        joins = self.alias
        for d in self.dimensions:
            if d in fields:
                joins = d.join(joins)
        selects = [f.selectable for f in fields] + [self.alias.c.id]

        # enforce stable sorting:
        if order_by is None:
            order_by = [self.alias.c.id.asc()]

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = db.select(selects, conditions, joins, order_by=order_by,
                              use_labels=True, limit=qlimit, offset=qoffset)
            rp = self.bind.execute(query)

            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                yield decode_row(row, self)
Example #16
0
def category_index(datasets):
    """
    Get a list of categories by count of datasets
    """
    # Get the dataset ids in the current list of datasets
    ds_ids = [d.id for d in datasets]
    if len(ds_ids):
        # If we have dataset ids we count the dataset by category
        q = db.select([Dataset.category, db.func.count(Dataset.id)],
                      Dataset.id.in_(ds_ids), group_by=Dataset.category,
                      order_by=db.func.count(Dataset.id).desc())

        # Execute the queery to the the list of categories
        categories = db.session.bind.execute(q).fetchall()
        # Return a list of categories as dicts with category, count, url
        # and label 
        return [{'category': category, 'count': count,
                 'url': h.url_for(controller='dataset',
                                  action='index', category=category),
                 'label': CATEGORIES.get(category, category)}
                for (category, count) in categories if category is not None]
    
    # We return an empty string if no datasets found
    return []
Example #17
0
    def aggregate(self,
                  measures=['amount'],
                  drilldowns=[],
                  cuts=[],
                  page=1,
                  pagesize=10000,
                  order=[]):
        """ Query the dataset for a subset of cells based on cuts and
        drilldowns. It returns a structure with a list of drilldown items
        and a summary about the slice cutted by the query.

        ``measures``
            The numeric units to be aggregated over, defaults to
            [``amount``]. (type: `list`)
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary:

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """

        # Get the joins (aka alias) and the dataset
        joins = alias = self.alias
        dataset = self

        # Aggregation fields are all of the measures, so we create individual
        # summary fields with the sum function of SQLAlchemy
        fields = [db.func.sum(alias.c[m]).label(m) for m in measures]
        # We append an aggregation field that counts the number of entries
        fields.append(db.func.count(alias.c.id).label("entries"))
        # Create a copy of the statistics fields (for later)
        stats_fields = list(fields)

        # Create label map for time columns (year and month) for lookup
        # since they are found under the time attribute
        labels = {
            'year': dataset['time']['year'].column_alias.label('year'),
            'month': dataset['time']['yearmonth'].column_alias.label('month'),
        }

        # Get the dimensions we're interested in. These would be the drilldowns
        # and the cuts. For compound dimensions we are only interested in the
        # most significant one (e.g. for from.name we're interested in from)
        dimensions = drilldowns + [k for k, v in cuts]
        dimensions = [d.split('.')[0] for d in dimensions]

        # Loop over the dimensions as a set (to avoid multiple occurances)
        for dimension in set(dimensions):
            # If the dimension is year or month we're interested in 'time'
            if dimension in labels:
                dimension = 'time'
            # If the dimension table isn't in the automatic joins we add it
            if dimension not in [c.table.name for c in joins.columns]:
                joins = dataset[dimension].join(joins)

        # Drilldowns are performed using group_by SQL functions
        group_by = []
        for key in drilldowns:
            # If drilldown is in labels we append its mapped column to fields
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                # Get the column from the dataset
                column = dataset.key(key)
                # If the drilldown is a compound dimension or the columns table
                # is in the joins we're already fetching the column so we just
                # append it to fields and the group_by
                if '.' in key or column.table == alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    # If not we add the column table to the fields and add all
                    # of that tables columns to the group_by
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        # Cuts are managed using AND statements and we use a dict with set as
        # the default value to create the filters (cut on various values)
        conditions = db.and_()
        filters = defaultdict(set)

        for key, value in cuts:
            # If the key is in labels (year or month) we get the mapped column
            # else we get the column from the dataset
            if key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            # We add the value to the set for that particular column
            filters[column].add(value)

        # Loop over the columns in the filter and add that to the conditions
        # For every value in the set we create and OR statement so we get e.g.
        # year=2007 AND (from.who == 'me' OR from.who == 'you')
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr == v for v in values]))

        # Ordering can be set by a parameter or ordered by measures by default
        order_by = []
        # If no order is defined we default to order of the measures in the
        # order they occur (furthest to the left is most significant)
        if order is None or not len(order):
            order = [(m, True) for m in measures]

        # We loop through the order list to add the columns themselves
        for key, direction in order:
            # If it's a part of the measures we have to order by the
            # aggregated values (the sum of the measure)
            if key in measures:
                column = db.func.sum(alias.c[key]).label(key)
            # If it's in the labels we have to get the mapped column
            elif key in labels:
                column = labels[key]
            # ...if not we just get the column from the dataset
            else:
                column = dataset.key(key)
            # We append the column and set the direction (True == descending)
            order_by.append(column.desc() if direction else column.asc())

        # query 1: get overall sums.
        # Here we use the stats_field we saved earlier
        query = db.select(stats_fields, conditions, joins)
        rp = dataset.bind.execute(query)
        # Execute the query and turn them to a list so we can pop the
        # entry count and then zip the measurements and the totals together
        stats = list(rp.fetchone())
        num_entries = stats.pop()
        total = zip(measures, stats)

        # query 2: get total count of drilldowns
        if len(group_by):
            # Select 1 for each group in the group_by and count them
            query = db.select(['1'], conditions, joins, group_by=group_by)
            query = db.select([db.func.count('1')], '1=1', query.alias('q'))
            rp = dataset.bind.execute(query)
            num_drilldowns, = rp.fetchone()
        else:
            # If there are no drilldowns we still have to do one
            num_drilldowns = 1

        # The drilldown result list
        drilldown = []
        # The offset in the db, based on the page and pagesize (we have to
        # modify it since page counts starts from 1 but we count from 0
        offset = int((page - 1) * pagesize)

        # query 3: get the actual data
        query = db.select(fields,
                          conditions,
                          joins,
                          order_by=order_by,
                          group_by=group_by,
                          use_labels=True,
                          limit=pagesize,
                          offset=offset)
        rp = dataset.bind.execute(query)

        while True:
            # Get each row in the db result and append it, decoded, to the
            # drilldown result. The decoded version is a json represenation
            row = rp.fetchone()
            if row is None:
                break
            result = decode_row(row, dataset)
            drilldown.append(result)

        # Create the summary based on the stats_fields and other things
        # First we add a the total for each measurement in the root of the
        # summary (watch out!) and then we add various other, self-explanatory
        # statistics such as page, number of entries. The currency value is
        # strange since it's redundant for multiple measures but is left as is
        # for backwards compatibility
        summary = {key: value for (key, value) in total}
        summary.update({
            'num_entries':
            num_entries,
            'currency': {m: dataset.currency
                         for m in measures},
            'num_drilldowns':
            num_drilldowns,
            'page':
            page,
            'pages':
            int(math.ceil(num_drilldowns / float(pagesize))),
            'pagesize':
            pagesize
        })

        return {'drilldown': drilldown, 'summary': summary}
Example #18
0
    def aggregate(self,
                  measure='amount',
                  drilldowns=None,
                  cuts=None,
                  page=1,
                  pagesize=10000,
                  order=None):
        """ Query the dataset for a subset of cells based on cuts and
        drilldowns. It returns a structure with a list of drilldown items
        and a summary about the slice cutted by the query.

        ``measure``
            The numeric unit to be aggregated over, defaults to ``amount``.
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary::

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """
        cuts = cuts or []
        drilldowns = drilldowns or []
        order = order or []
        joins = alias = self.alias
        dataset = self
        fields = [
            db.func.sum(alias.c[measure]).label(measure),
            db.func.count(alias.c.id).label("entries")
        ]
        stats_fields = list(fields)
        labels = {
            'year': dataset['time']['year'].column_alias.label('year'),
            'month': dataset['time']['yearmonth'].column_alias.label('month'),
        }
        dimensions = drilldowns + [k for k, v in cuts]
        dimensions = [d.split('.')[0] for d in dimensions]
        for dimension in set(dimensions):
            if dimension in labels:
                dimension = 'time'
            if dimension not in [c.table.name for c in joins.columns]:
                joins = dataset[dimension].join(joins)

        group_by = []
        for key in drilldowns:
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                column = dataset.key(key)
                if '.' in key or column.table == alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        conditions = db.and_()
        filters = defaultdict(set)
        for key, value in cuts:
            if key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            filters[column].add(value)
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr == v for v in values]))

        order_by = []
        if order is None or not len(order):
            order = [(measure, True)]
        for key, direction in order:
            if key == measure:
                column = db.func.sum(alias.c[measure]).label(measure)
            elif key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            order_by.append(column.desc() if direction else column.asc())

        # query 1: get overall sums.
        query = db.select(stats_fields, conditions, joins)
        rp = dataset.bind.execute(query)
        total, num_entries = rp.fetchone()

        # query 2: get total count of drilldowns
        if len(group_by):
            query = db.select(['1'], conditions, joins, group_by=group_by)
            query = db.select([db.func.count('1')], '1=1', query.alias('q'))
            rp = dataset.bind.execute(query)
            num_drilldowns, = rp.fetchone()
        else:
            num_drilldowns = 1

        drilldown = []
        offset = int((page - 1) * pagesize)

        # query 3: get the actual data
        query = db.select(fields,
                          conditions,
                          joins,
                          order_by=order_by,
                          group_by=group_by,
                          use_labels=True,
                          limit=pagesize,
                          offset=offset)
        rp = dataset.bind.execute(query)
        while True:
            row = rp.fetchone()
            if row is None:
                break
            result = decode_row(row, dataset)
            drilldown.append(result)

        return {
            'drilldown': drilldown,
            'summary': {
                measure: total,
                'num_entries': num_entries,
                'currency': {
                    measure: dataset.currency
                },
                'num_drilldowns': num_drilldowns,
                'page': page,
                'pages': int(math.ceil(num_drilldowns / float(pagesize))),
                'pagesize': pagesize
            }
        }
Example #19
0
    def aggregate(self, measure='amount', drilldowns=None, cuts=None, 
            page=1, pagesize=10000, order=None):
        """ Query the dataset for a subset of cells based on cuts and 
        drilldowns. It returns a structure with a list of drilldown items 
        and a summary about the slice cutted by the query.

        ``measure``
            The numeric unit to be aggregated over, defaults to ``amount``.
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute 
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary::

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """
        cuts = cuts or []
        drilldowns = drilldowns or []
        order = order or []
        joins = self.alias
        fields = [db.func.sum(self.alias.c[measure]).label(measure), 
                  db.func.count(self.alias.c.id).label("entries")]
        labels = {
            'year': self['time']['year'].column_alias.label('year'),
            'month': self['time']['yearmonth'].column_alias.label('month'),
            }
        dimensions = set(drilldowns + [k for k,v in cuts] + [o[0] for o in order])
        for dimension in dimensions:
            if dimension in labels:
                _name = 'time'
            else:
                _name = dimension.split('.')[0]
            if _name not in [c.table.name for c in joins.columns]:
                joins = self[_name].join(joins)

        group_by = []
        for key in dimensions:
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                column = self.key(key)
                if '.' in key or column.table == self.alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        conditions = db.and_()
        filters = defaultdict(set)
        for key, value in cuts:
            if key in labels:
                column = labels[key]
            else:
                column = self.key(key)
            filters[column].add(value)
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr==v for v in values]))

        order_by = []
        for key, direction in order:
            if key in labels:
                column = labels[key]
            else:
                column = self.key(key)
            order_by.append(column.desc() if direction else column.asc())

        query = db.select(fields, conditions, joins,
                       order_by=order_by or [measure + ' desc'],
                       group_by=group_by, use_labels=True)
        summary = {measure: 0.0, 'num_entries': 0}
        drilldown = []
        rp = self.bind.execute(query)
        while True:
            row = rp.fetchone()
            if row is None:
                break
            result = {}
            for key, value in row.items():
                if key == measure:
                    summary[measure] += value or 0
                if key == 'entries':
                    summary['num_entries'] += value or 0
                if '_' in key:
                    dimension, attribute = key.split('_', 1)
                    dimension = dimension.replace(ALIAS_PLACEHOLDER, '_')
                    if dimension == 'entry':
                        result[attribute] = value
                    else:
                        if not dimension in result:
                            result[dimension] = {}

                            # TODO: backwards-compat?
                            if isinstance(self[dimension], CompoundDimension):
                                result[dimension]['taxonomy'] = \
                                        self[dimension].taxonomy
                        result[dimension][attribute] = value
                else:
                    if key == 'entries':
                        key = 'num_entries'
                    result[key] = value
            drilldown.append(result)
        offset = ((page-1)*pagesize)

        # do we really need all this:
        summary['num_drilldowns'] = len(drilldown)
        summary['page'] = page
        summary['pages'] = int(math.ceil(len(drilldown)/float(pagesize)))
        summary['pagesize'] = pagesize

        return {'drilldown': drilldown[offset:offset+pagesize],
                'summary': summary}
Example #20
0
    def entries(self,
                conditions="1=1",
                order_by=None,
                limit=None,
                offset=0,
                step=10000):
        """ Generate a fully denormalized view of the entries on this 
        table. This view is nested so that each dimension will be a hash
        of its attributes. 

        This is somewhat similar to the entries collection in the fully
        denormalized schema before OpenSpending 0.11 (MongoDB).
        """
        if not self.is_generated:
            return

        joins = self.alias
        for d in self.dimensions:
            joins = d.join(joins)
        selects = [f.selectable for f in self.fields] + [self.alias.c.id]

        # enforce stable sorting:
        if order_by is None:
            order_by = [self.alias.c.id.asc()]

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = db.select(selects,
                              conditions,
                              joins,
                              order_by=order_by,
                              use_labels=True,
                              limit=qlimit,
                              offset=qoffset)
            rp = self.bind.execute(query)

            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                result = {}
                for k, v in row.items():
                    field, attr = k.split('_', 1)
                    field = field.replace(ALIAS_PLACEHOLDER, '_')
                    if field == 'entry':
                        result[attr] = v
                    else:
                        if not field in result:
                            result[field] = dict()

                            # TODO: backwards-compat?
                            if isinstance(self[field], CompoundDimension):
                                result[field]['taxonomy'] = self[
                                    field].taxonomy
                        result[field][attr] = v
                yield result
Example #21
0
    def index(self, format="html"):
        c.query = request.params.items()
        c.add_filter = lambda f, v: "?" + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query)
        c.del_filter = lambda f, v: "?" + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)])
        c.results = c.datasets
        for language in request.params.getall("languages"):
            l = db.aliased(DatasetLanguage)
            c.results = c.results.join(l, Dataset._languages)
            c.results = c.results.filter(l.code == language)
        for territory in request.params.getall("territories"):
            t = db.aliased(DatasetTerritory)
            c.results = c.results.join(t, Dataset._territories)
            c.results = c.results.filter(t.code == territory)
        category = request.params.get("category")
        if category:
            c.results = c.results.filter(Dataset.category == category)

        c.results = list(c.results)
        c.territory_options = [
            {
                "code": code,
                "count": count,
                "url": h.url_for(controller="dataset", action="index", territories=code),
                "label": COUNTRIES.get(code, code),
            }
            for (code, count) in DatasetTerritory.dataset_counts(c.results)
        ]
        c.language_options = [
            {
                "code": code,
                "count": count,
                "url": h.url_for(controller="dataset", action="index", languages=code),
                "label": LANGUAGES.get(code, code),
            }
            for (code, count) in DatasetLanguage.dataset_counts(c.results)
        ]

        # TODO: figure out where to put this:
        ds_ids = [d.id for d in c.results]
        if len(ds_ids):
            q = db.select(
                [Dataset.category, db.func.count(Dataset.id)],
                Dataset.id.in_(ds_ids),
                group_by=Dataset.category,
                order_by=db.func.count(Dataset.id).desc(),
            )
            c.category_options = [
                {
                    "category": category,
                    "count": count,
                    "url": h.url_for(controller="dataset", action="index", category=category),
                    "label": CATEGORIES.get(category, category),
                }
                for (category, count) in db.session.bind.execute(q).fetchall()
                if category is not None
            ]
        else:
            c.category_options = []

        c._must_revalidate = True
        if len(c.results):
            dt = max([r.updated_at for r in c.results])
            etag_cache_keygen(dt)

        if format == "json":
            results = map(lambda d: d.as_dict(), c.results)
            results = [dataset_apply_links(r) for r in results]
            return to_jsonp(
                {
                    "datasets": results,
                    "categories": c.category_options,
                    "territories": c.territory_options,
                    "languages": c.language_options,
                }
            )
        elif format == "csv":
            results = map(lambda d: d.as_dict(), c.results)
            return write_csv(results, response)
        return render("dataset/index.html")
Example #22
0
    def aggregate(self,
                  measure='amount',
                  drilldowns=None,
                  cuts=None,
                  page=1,
                  pagesize=10000,
                  order=None):
        """ Query the dataset for a subset of cells based on cuts and 
        drilldowns. It returns a structure with a list of drilldown items 
        and a summary about the slice cutted by the query.

        ``measure``
            The numeric unit to be aggregated over, defaults to ``amount``.
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute 
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary::

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """
        cuts = cuts or []
        drilldowns = drilldowns or []
        order = order or []
        joins = self.alias
        fields = [
            db.func.sum(self.alias.c[measure]).label(measure),
            db.func.count(self.alias.c.id).label("entries")
        ]
        labels = {
            'year': self['time']['year'].column_alias.label('year'),
            'month': self['time']['yearmonth'].column_alias.label('month'),
        }
        dimensions = set(drilldowns + [k for k, v in cuts] +
                         [o[0] for o in order])
        for dimension in dimensions:
            if dimension in labels:
                _name = 'time'
            else:
                _name = dimension.split('.')[0]
            if _name not in [c.table.name for c in joins.columns]:
                joins = self[_name].join(joins)

        group_by = []
        for key in dimensions:
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                column = self.key(key)
                if '.' in key or column.table == self.alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        conditions = db.and_()
        filters = defaultdict(set)
        for key, value in cuts:
            if key in labels:
                column = labels[key]
            else:
                column = self.key(key)
            filters[column].add(value)
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr == v for v in values]))

        order_by = []
        for key, direction in order:
            if key in labels:
                column = labels[key]
            else:
                column = self.key(key)
            order_by.append(column.desc() if direction else column.asc())

        query = db.select(fields,
                          conditions,
                          joins,
                          order_by=order_by or [measure + ' desc'],
                          group_by=group_by,
                          use_labels=True)
        summary = {measure: 0.0, 'num_entries': 0}
        drilldown = []
        rp = self.bind.execute(query)
        while True:
            row = rp.fetchone()
            if row is None:
                break
            result = {}
            for key, value in row.items():
                if key == measure:
                    summary[measure] += value or 0
                if key == 'entries':
                    summary['num_entries'] += value or 0
                if '_' in key:
                    dimension, attribute = key.split('_', 1)
                    dimension = dimension.replace(ALIAS_PLACEHOLDER, '_')
                    if dimension == 'entry':
                        result[attribute] = value
                    else:
                        if not dimension in result:
                            result[dimension] = {}

                            # TODO: backwards-compat?
                            if isinstance(self[dimension], CompoundDimension):
                                result[dimension]['taxonomy'] = \
                                        self[dimension].taxonomy
                        result[dimension][attribute] = value
                else:
                    if key == 'entries':
                        key = 'num_entries'
                    result[key] = value
            drilldown.append(result)
        offset = ((page - 1) * pagesize)

        # do we really need all this:
        summary['num_drilldowns'] = len(drilldown)
        summary['page'] = page
        summary['pages'] = int(math.ceil(len(drilldown) / float(pagesize)))
        summary['pagesize'] = pagesize

        return {
            'drilldown': drilldown[offset:offset + pagesize],
            'summary': summary
        }
Example #23
0
    def aggregate(self, measures=['amount'], drilldowns=[], cuts=[],
            page=1, pagesize=10000, order=[]):
        """ Query the dataset for a subset of cells based on cuts and
        drilldowns. It returns a structure with a list of drilldown items
        and a summary about the slice cutted by the query.

        ``measures``
            The numeric units to be aggregated over, defaults to 
            [``amount``]. (type: `list`)
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary:

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """

        # Get the joins (aka alias) and the dataset
        joins = alias = self.alias
        dataset = self

        # Aggregation fields are all of the measures, so we create individual
        # summary fields with the sum function of SQLAlchemy
        fields = [db.func.sum(alias.c[m]).label(m) for m in measures]
        # We append an aggregation field that counts the number of entries
        fields.append(db.func.count(alias.c.id).label("entries"))
        # Create a copy of the statistics fields (for later)
        stats_fields = list(fields)

        # Create label map for time columns (year and month) for lookup
        # since they are found under the time attribute
        labels = {
            'year': dataset['time']['year'].column_alias.label('year'),
            'month': dataset['time']['yearmonth'].column_alias.label('month'),
            }

        # Get the dimensions we're interested in. These would be the drilldowns
        # and the cuts. For compound dimensions we are only interested in the
        # most significant one (e.g. for from.name we're interested in from)
        dimensions = drilldowns + [k for k, v in cuts]
        dimensions = [d.split('.')[0] for d in dimensions]

        # Loop over the dimensions as a set (to avoid multiple occurances)
        for dimension in set(dimensions):
            # If the dimension is year or month we're interested in 'time'
            if dimension in labels:
                dimension = 'time'
            # If the dimension table isn't in the automatic joins we add it
            if dimension not in [c.table.name for c in joins.columns]:
                joins = dataset[dimension].join(joins)

        # Drilldowns are performed using group_by SQL functions
        group_by = []
        for key in drilldowns:
            # If drilldown is in labels we append its mapped column to fields
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                # Get the column from the dataset
                column = dataset.key(key)
                # If the drilldown is a compound dimension or the columns table
                # is in the joins we're already fetching the column so we just
                # append it to fields and the group_by
                if '.' in key or column.table == alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    # If not we add the column table to the fields and add all
                    # of that tables columns to the group_by
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        # Cuts are managed using AND statements and we use a dict with set as
        # the default value to create the filters (cut on various values)
        conditions = db.and_()
        filters = defaultdict(set)

        for key, value in cuts:
            # If the key is in labels (year or month) we get the mapped column
            # else we get the column from the dataset
            if key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            # We add the value to the set for that particular column
            filters[column].add(value)

        # Loop over the columns in the filter and add that to the conditions
        # For every value in the set we create and OR statement so we get e.g.
        # year=2007 AND (from.who == 'me' OR from.who == 'you')
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr == v for v in values]))

        # Ordering can be set by a parameter or ordered by measures by default
        order_by = []
        # If no order is defined we default to order of the measures in the
        # order they occur (furthest to the left is most significant)
        if order is None or not len(order):
            order = [(m, True) for m in measures]

        # We loop through the order list to add the columns themselves
        for key, direction in order:
            # If it's a part of the measures we have to order by the
            # aggregated values (the sum of the measure)
            if key in measures:
                column = db.func.sum(alias.c[key]).label(key)
            # If it's in the labels we have to get the mapped column
            elif key in labels:
                column = labels[key]
            # ...if not we just get the column from the dataset
            else:
                column = dataset.key(key)
            # We append the column and set the direction (True == descending)
            order_by.append(column.desc() if direction else column.asc())

        # query 1: get overall sums.
        # Here we use the stats_field we saved earlier
        query = db.select(stats_fields, conditions, joins)
        rp = dataset.bind.execute(query)
        # Execute the query and turn them to a list so we can pop the
        # entry count and then zip the measurements and the totals together
        stats = list(rp.fetchone())
        num_entries = stats.pop()
        total = zip(measures, stats)

        # query 2: get total count of drilldowns
        if len(group_by):
            # Select 1 for each group in the group_by and count them
            query = db.select(['1'], conditions, joins, group_by=group_by)
            query = db.select([db.func.count('1')], '1=1', query.alias('q'))
            rp = dataset.bind.execute(query)
            num_drilldowns, = rp.fetchone()
        else:
            # If there are no drilldowns we still have to do one
            num_drilldowns = 1

        # The drilldown result list
        drilldown = []
        # The offset in the db, based on the page and pagesize (we have to
        # modify it since page counts starts from 1 but we count from 0
        offset = int((page - 1) * pagesize)

        # query 3: get the actual data
        query = db.select(fields, conditions, joins, order_by=order_by,
                          group_by=group_by, use_labels=True,
                          limit=pagesize, offset=offset)
        rp = dataset.bind.execute(query)

        while True:
            # Get each row in the db result and append it, decoded, to the
            # drilldown result. The decoded version is a json represenation
            row = rp.fetchone()
            if row is None:
                break
            result = decode_row(row, dataset)
            drilldown.append(result)

        # Create the summary based on the stats_fields and other things
        # First we add a the total for each measurement in the root of the
        # summary (watch out!) and then we add various other, self-explanatory
        # statistics such as page, number of entries. The currency value is
        # strange since it's redundant for multiple measures but is left as is
        # for backwards compatibility
        summary = {key: value for (key,value) in total}
        summary.update({
                'num_entries': num_entries,
                'currency': {m: dataset.currency for m in measures},
                'num_drilldowns': num_drilldowns,
                'page': page,
                'pages': int(math.ceil(num_drilldowns / float(pagesize))),
                'pagesize': pagesize
                })

        return { 'drilldown': drilldown, 'summary': summary }
Example #24
0
    def index(self, format='html'):
        c.query = request.params.items()
        c.add_filter = lambda f, v: '?' + urlencode(c.query + [(f, v)] if (
            f, v) not in c.query else c.query)
        c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in c.query
                                                     if (k, x) != (f, v)])
        c.results = c.datasets
        for language in request.params.getall('languages'):
            l = db.aliased(DatasetLanguage)
            c.results = c.results.join(l, Dataset._languages)
            c.results = c.results.filter(l.code == language)
        for territory in request.params.getall('territories'):
            t = db.aliased(DatasetTerritory)
            c.results = c.results.join(t, Dataset._territories)
            c.results = c.results.filter(t.code == territory)
        category = request.params.get('category')
        if category:
            c.results = c.results.filter(Dataset.category == category)

        c.results = list(c.results)
        c.territory_options = [{'code': code,
                                'count': count,
                                'url': h.url_for(controller='dataset',
                                    action='index', territories=code),
                                'label': COUNTRIES.get(code, code)} \
            for (code, count) in DatasetTerritory.dataset_counts(c.results)]
        c.language_options = [{'code': code,
                               'count': count,
                               'url': h.url_for(controller='dataset',
                                    action='index', languages=code),
                               'label': LANGUAGES.get(code, code)} \
            for (code, count) in DatasetLanguage.dataset_counts(c.results)]

        # TODO: figure out where to put this:
        ds_ids = [d.id for d in c.results]
        if len(ds_ids):
            q = db.select(
                [Dataset.category, db.func.count(Dataset.id)],
                Dataset.id.in_(ds_ids),
                group_by=Dataset.category,
                order_by=db.func.count(Dataset.id).desc())
            c.category_options = [{'category': category,
                                   'count': count,
                                   'url': h.url_for(controller='dataset',
                                        action='index', category=category),
                                   'label': CATEGORIES.get(category, category)} \
                for (category, count) in db.session.bind.execute(q).fetchall() \
                if category is not None]
        else:
            c.category_options = []

        c._must_revalidate = True
        if len(c.results):
            dt = max([r.updated_at for r in c.results])
            etag_cache_keygen(dt)

        if format == 'json':
            results = map(lambda d: d.as_dict(), c.results)
            results = [dataset_apply_links(r) for r in results]
            return to_jsonp({
                'datasets': results,
                'categories': c.category_options,
                'territories': c.territory_options,
                'languages': c.language_options
            })
        elif format == 'csv':
            results = map(lambda d: d.as_dict(), c.results)
            return write_csv(results, response)
        c.show_rss = True
        return templating.render('dataset/index.html')
Example #25
0
    def aggregate(self, measure='amount', drilldowns=None, cuts=None,
            page=1, pagesize=10000, order=None):
        """ Query the dataset for a subset of cells based on cuts and
        drilldowns. It returns a structure with a list of drilldown items
        and a summary about the slice cutted by the query.

        ``measure``
            The numeric unit to be aggregated over, defaults to ``amount``.
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary::

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """
        cuts = cuts or []
        drilldowns = drilldowns or []
        order = order or []
        joins = alias = self.alias
        dataset = self
        fields = [db.func.sum(alias.c[measure]).label(measure),
                  db.func.count(alias.c.id).label("entries")]
        stats_fields = list(fields)
        labels = {
            'year': dataset['time']['year'].column_alias.label('year'),
            'month': dataset['time']['yearmonth'].column_alias.label('month'),
            }
        dimensions = drilldowns + [k for k, v in cuts]
        dimensions = [d.split('.')[0] for d in dimensions]
        for dimension in set(dimensions):
            if dimension in labels:
                dimension = 'time'
            if dimension not in [c.table.name for c in joins.columns]:
                joins = dataset[dimension].join(joins)

        group_by = []
        for key in drilldowns:
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                column = dataset.key(key)
                if '.' in key or column.table == alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        conditions = db.and_()
        filters = defaultdict(set)
        for key, value in cuts:
            if key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            filters[column].add(value)
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr == v for v in values]))

        order_by = []
        if order is None or not len(order):
            order = [(measure, True)]
        for key, direction in order:
            if key == measure:
                column = db.func.sum(alias.c[measure]).label(measure)
            elif key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            order_by.append(column.desc() if direction else column.asc())

        # query 1: get overall sums.
        query = db.select(stats_fields, conditions, joins)
        rp = dataset.bind.execute(query)
        total, num_entries = rp.fetchone()

        # query 2: get total count of drilldowns
        if len(group_by):
            query = db.select(['1'], conditions, joins, group_by=group_by)
            query = db.select([db.func.count('1')], '1=1', query.alias('q'))
            rp = dataset.bind.execute(query)
            num_drilldowns, = rp.fetchone()
        else:
            num_drilldowns = 1

        drilldown = []
        offset = int((page - 1) * pagesize)

        # query 3: get the actual data
        query = db.select(fields, conditions, joins, order_by=order_by,
                          group_by=group_by, use_labels=True,
                          limit=pagesize, offset=offset)
        rp = dataset.bind.execute(query)
        while True:
            row = rp.fetchone()
            if row is None:
                break
            result = decode_row(row, dataset)
            drilldown.append(result)

        return {
                'drilldown': drilldown,
                'summary': {
                    measure: total,
                    'num_entries': num_entries,
                    'currency': {measure: dataset.currency},
                    'num_drilldowns': num_drilldowns,
                    'page': page,
                    'pages': int(math.ceil(num_drilldowns / float(pagesize))),
                    'pagesize': pagesize
                    }
                }
Example #26
0
    def index(self, format='html'):
        c.query = request.params.items()
        c.add_filter = lambda f, v: '?' + urlencode(c.query +
                [(f, v)] if (f, v) not in c.query else c.query)
        c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in
            c.query if (k, x) != (f, v)])
        c.results = c.datasets
        for language in request.params.getall('languages'):
            l = db.aliased(DatasetLanguage)
            c.results = c.results.join(l, Dataset._languages)
            c.results = c.results.filter(l.code == language)
        for territory in request.params.getall('territories'):
            t = db.aliased(DatasetTerritory)
            c.results = c.results.join(t, Dataset._territories)
            c.results = c.results.filter(t.code == territory)
        category = request.params.get('category')
        if category:
            c.results = c.results.filter(Dataset.category == category)

        c.results = list(c.results)
        c.territory_options = [{'code': code,
                                'count': count,
                                'url': h.url_for(controller='dataset',
                                    action='index', territories=code),
                                'label': COUNTRIES.get(code, code)} \
            for (code, count) in DatasetTerritory.dataset_counts(c.results)]
        c.language_options = [{'code': code,
                               'count': count,
                               'url': h.url_for(controller='dataset',
                                    action='index', languages=code),
                               'label': LANGUAGES.get(code, code)} \
            for (code, count) in DatasetLanguage.dataset_counts(c.results)]

        # TODO: figure out where to put this:
        ds_ids = [d.id for d in c.results]
        if len(ds_ids):
            q = db.select([Dataset.category, db.func.count(Dataset.id)],
                Dataset.id.in_(ds_ids), group_by=Dataset.category,
                order_by=db.func.count(Dataset.id).desc())
            c.category_options = [{'category': category,
                                   'count': count,
                                   'url': h.url_for(controller='dataset',
                                        action='index', category=category),
                                   'label': CATEGORIES.get(category, category)} \
                for (category, count) in db.session.bind.execute(q).fetchall() \
                if category is not None]
        else:
            c.category_options = []

        c._must_revalidate = True
        if len(c.results):
            dt = max([r.updated_at for r in c.results])
            etag_cache_keygen(dt)

        if format == 'json':
            results = map(lambda d: d.as_dict(), c.results)
            results = [dataset_apply_links(r) for r in results]
            return to_jsonp({
                'datasets': results,
                'categories': c.category_options,
                'territories': c.territory_options,
                'languages': c.language_options
                })
        elif format == 'csv':
            results = map(lambda d: d.as_dict(), c.results)
            return write_csv(results, response)
        c.show_rss = True
        return templating.render('dataset/index.html')