Ejemplo n.º 1
0
    def aggregate(self):
        parser = AggregateParamParser(request.params)
        params, errors = parser.parse()

        if errors:
            response.status = 400
            return {'errors': errors}

        params['cuts'] = params.pop('cut')
        params['drilldowns'] = params.pop('drilldown')
        dataset = params.pop('dataset')
        format = params.pop('format')
        require.dataset.read(dataset)

        try:
            cache = AggregationCache(dataset)
            result = cache.aggregate(**params)
            if 'drilldown' in result:
                result['drilldown'] = drilldowns_apply_links(dataset.name,
                    result['drilldown'])

            response.last_modified = dataset.updated_at
            if cache.cache_enabled and 'cache_key' in result['summary']:
                etag_cache(result['summary']['cache_key'])

        except (KeyError, ValueError) as ve:
            log.exception(ve)
            response.status = 400
            return {'errors': ['Invalid aggregation query: %r' % ve]}

        if format == 'csv':
            return write_csv(result['drilldown'], response,
                filename=dataset.name + '.csv')
        return to_jsonp(result)
Ejemplo n.º 2
0
    def aggregate(self):
        parser = AggregateParamParser(request.params)
        params, errors = parser.parse()

        if errors:
            response.status = 400
            return {'errors': errors}

        params['cuts'] = params.pop('cut')
        params['drilldowns'] = params.pop('drilldown')
        dataset = params.pop('dataset')
        require.dataset.read(dataset)

        try:
            cache = AggregationCache(dataset)
            result = cache.aggregate(**params)
            if 'drilldown' in result:
                result['drilldown'] = drilldowns_apply_links(dataset.name,
                    result['drilldown'])

            if cache.cache_enabled and 'cache_key' in result['summary']:
                if 'Pragma' in response.headers:
                    del response.headers['Pragma']
                response.cache_control = 'public; max-age: 84600'
                etag_cache(result['summary']['cache_key'])

        except (KeyError, ValueError) as ve:
            log.exception(ve)
            response.status = 400
            return {'errors': ['Invalid aggregation query: %r' % ve]}

        return result
Ejemplo n.º 3
0
    def view(self, dataset, dimension, format='html'):
        self._get_dataset(dataset)
        try:
            c.dimension = c.dataset[dimension]
        except KeyError:
            abort(404, _('This is not a dimension'))
        if not isinstance(c.dimension, model.Dimension):
            abort(404, _('This is not a dimension'))

        page = self._get_page('page')
        cache = AggregationCache(c.dataset)
        result = cache.aggregate(drilldowns=[dimension], page=page, 
                                 pagesize=PAGE_SIZE)
        items = result.get('drilldown', [])
        c.values = [(d.get(dimension), d.get('amount')) for d in items]

        if format == 'json':
            return to_jsonp({
                "values": c.values,
                "meta": c.dimension.as_dict()})

        c.page = Page(c.values, page=page,
                      item_count=result['summary']['num_drilldowns'],
                      items_per_page=PAGE_SIZE,
                      presliced_list=True)
        return render('dimension/view.html')
Ejemplo n.º 4
0
    def aggregate(self):
        parser = AggregateParamParser(request.params)
        params, errors = parser.parse()

        if errors:
            response.status = 400
            return {'errors': errors}

        params['cuts'] = params.pop('cut')
        params['drilldowns'] = params.pop('drilldown')
        dataset = params.pop('dataset')
        format = params.pop('format')
        require.dataset.read(dataset)

        try:
            cache = AggregationCache(dataset)
            result = cache.aggregate(**params)
            if 'drilldown' in result:
                result['drilldown'] = drilldowns_apply_links(
                    dataset.name, result['drilldown'])

            response.last_modified = dataset.updated_at
            if cache.cache_enabled and 'cache_key' in result['summary']:
                etag_cache(result['summary']['cache_key'])

        except (KeyError, ValueError) as ve:
            log.exception(ve)
            response.status = 400
            return {'errors': ['Invalid aggregation query: %r' % ve]}

        if format == 'csv':
            return write_csv(result['drilldown'],
                             response,
                             filename=dataset.name + '.csv')
        return to_jsonp(result)
Ejemplo n.º 5
0
 def aggregates(self):
     if self._aggregates is None:
         if self.view.drilldown is None:
             return []
         res = defaultdict(dict)
         drilldowns = {}
         query = ['year', self.view.drilldown]
         cache = AggregationCache(self.dataset)
         results = cache.aggregate(drilldowns=query,
                                   cuts=self.cuts)
         for entry in results.get('drilldown'):
             d = entry.get(self.view.drilldown)
             # Get a hashable key for the drilldown
             key = d['id'] if isinstance(d, dict) else d
             # Store a reference to this drilldown
             drilldowns[key] = d
             # Store drilldown value for this year
             res[key][str(entry.get('year'))] = entry.get('amount')
         self._aggregates = [(drilldowns[k], v) for k, v in res.items()]
         # sort aggregations by time
         if self.time is not None:
             self._aggregates = sorted(self._aggregates,
                                       reverse=True,
                                       key=lambda (k, v): v.get(self.time, 0))
     return self._aggregates
Ejemplo n.º 6
0
    def view(self, dataset, dimension, format='html'):
        self._get_dataset(dataset)
        try:
            c.dimension = c.dataset[dimension]
        except KeyError:
            abort(404, _('This is not a dimension'))
        if not isinstance(c.dimension, model.Dimension):
            abort(404, _('This is not a dimension'))

        page = self._get_page('page')
        cache = AggregationCache(c.dataset)
        result = cache.aggregate(drilldowns=[dimension], page=page, 
                                 pagesize=PAGE_SIZE)
        items = result.get('drilldown', [])
        c.values = [(d.get(dimension), d.get('amount')) for d in items]

        if format == 'json':
            return to_jsonp({
                "values": c.values,
                "meta": c.dimension.as_dict()})

        c.page = Page(c.values, page=page,
                      item_count=result['summary']['num_drilldowns'],
                      items_per_page=PAGE_SIZE,
                      presliced_list=True)
        return render('dimension/view.html')
Ejemplo n.º 7
0
    def aggregate(self):
        errors = []
        params = request.params

        # get and check parameters
        dataset = self._dataset(params, errors)
        drilldowns = self._drilldowns(params, errors)
        cuts = self._cuts(params, errors)
        order = self._order(params, errors)
        measure = self._measure(params, dataset, errors)
        page = self._to_int('page', params.get('page', 1), errors)
        pagesize = self._to_int('pagesize', params.get('pagesize', 10000),
                                errors)
        if errors:
            return {'errors': errors}

        try:
            cache = AggregationCache(dataset)
            result = cache.aggregate(measure=measure, 
                                     drilldowns=drilldowns, 
                                     cuts=cuts, page=page, 
                                     pagesize=pagesize, order=order)

            if cache.cache_enabled and 'cache_key' in result['summary']:
                if 'Pragma' in response.headers:
                    del response.headers['Pragma']
                response.cache_control = 'public; max-age: 84600'
                etag_cache(result['summary']['cache_key'])

        except (KeyError, ValueError) as ve:
            log.exception(ve)
            return {'errors': ['Invalid aggregation query: %r' % ve]}

        return result
Ejemplo n.º 8
0
 def aggregates(self):
     if self._aggregates is None:
         if self.view.drilldown is None:
             return []
         res = defaultdict(dict)
         drilldowns = {}
         query = ['year', self.view.drilldown]
         cache = AggregationCache(self.dataset)
         results = cache.aggregate(drilldowns=query, cuts=self.cuts)
         for entry in results.get('drilldown'):
             d = entry.get(self.view.drilldown)
             # Get a hashable key for the drilldown
             key = d['id'] if isinstance(d, dict) else d
             # Store a reference to this drilldown
             drilldowns[key] = d
             # Store drilldown value for this year
             res[key][str(entry.get('year'))] = entry.get('amount')
         self._aggregates = [(drilldowns[k], v) for k, v in res.items()]
         # sort aggregations by time
         if self.time is not None:
             self._aggregates = sorted(self._aggregates,
                                       reverse=True,
                                       key=lambda
                                       (k, v): v.get(self.time, 0))
     return self._aggregates
Ejemplo n.º 9
0
 def totals(self):
     if self._totals is None:
         self._totals = {}
         cache = AggregationCache(self.dataset)
         results = cache.aggregate(drilldowns=['year'], cuts=self.cuts)
         for entry in results.get('drilldown'):
             self._totals[str(entry.get('year'))] = entry.get('amount')
     return self._totals
Ejemplo n.º 10
0
 def totals(self):
     if self._totals is None:
         self._totals = {}
         cache = AggregationCache(self.dataset)
         results = cache.aggregate(drilldowns=['year'], 
                                   cuts=self.cuts)
         for entry in results.get('drilldown'):
             self._totals[str(entry.get('year'))] = entry.get('amount')
     return self._totals
Ejemplo n.º 11
0
    def aggregate(self):
        dataset_name = request.params.get(
            'dataset',
            request.params.get('slice'))
        dataset = model.Dataset.by_name(dataset_name)
        if dataset is None:
            abort(400, "Dataset %s not found" % dataset_name)
        require.dataset.read(dataset)

        drilldowns, cuts, statistics = [], [], []
        for key, value in sorted(request.params.items()):
            if '-' not in key:
                continue
            op, key = key.split('-', 1)
            if 'include' == op:
                cuts.append((key, value))
            elif 'per' == op:
                if 'time' == key:
                    abort(400, "Time series are no longer supported")
                statistics.append((key, value))
            elif 'breakdown' == op:
                drilldowns.append(key)
        cache = AggregationCache(dataset)
        result = cache.aggregate(drilldowns=drilldowns + ['time'],
                                 cuts=cuts)
        # TODO: handle statistics as key-values ??? what's the point?
        for k, v in statistics:
            result = statistic_normalize(dataset, result, v, k)

        # translate to old format: group by drilldown, then by date.
        translated_result = defaultdict(dict)
        for cell in result['drilldown']:
            key = tuple([cellget(cell, d) for d in drilldowns])
            translated_result[key][cell['time']['name']] = \
                cell['amount']
        dates = sorted(set([d['time']['name'] for d in
                            result['drilldown']]))
        # give a value (or 0) for each present date in sorted order
        translated_result = [(k, [v.get(d, 0.0) for d in dates])
                             for k, v in translated_result.items()]
        return {'results': translated_result,
                'metadata': {
                    'dataset': dataset.name,
                    'include': cuts,
                    'dates': map(unicode, dates),
                    'axes': drilldowns,
                    'per': statistics,
                    'per_time': []
                }
                }
Ejemplo n.º 12
0
 def drop(self, dataset):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     c.dataset.drop()
     c.dataset.init()
     c.dataset.generate()
     AggregationCache(c.dataset).invalidate()
     db.session.commit()
     h.flash_success(_("The dataset has been cleared."))
     redirect(h.url_for(controller='editor', action='index', 
                        dataset=c.dataset.name))
Ejemplo n.º 13
0
 def retract(self, dataset):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     if c.dataset.private:
         abort(400, _("This dataset is already private!"))
     c.dataset.private = True
     AggregationCache(c.dataset).invalidate()
     db.session.commit()
     h.flash_success(_("The dataset has been retracted. " \
             "It is no longer visible to others."))
     redirect(h.url_for(controller='editor', action='index', 
                        dataset=c.dataset.name))
Ejemplo n.º 14
0
    def drop(self, dataset):
        self._get_dataset(dataset)
        require.dataset.update(c.dataset)
        c.dataset.updated_at = datetime.utcnow()
        c.dataset.drop()
        solr.drop_index(c.dataset.name)
        c.dataset.init()
        c.dataset.generate()
        AggregationCache(c.dataset).invalidate()

        # For every source in the dataset we set the status to removed
        for source in c.dataset.sources:
            for run in source.runs:
                run.status = Run.STATUS_REMOVED

        db.session.commit()
        h.flash_success(_("The dataset has been cleared."))
        redirect(h.url_for(controller='editor', action='index',
                           dataset=c.dataset.name))
Ejemplo n.º 15
0
    def aggregate(self):
        """
        Aggregation of a dataset based on URL parameters. It serves the 
        aggregation from a cache if possible, and if not it computes it (it's
        performed in the aggregation cache for some reason).
        """

        # Parse the aggregation parameters to get them into the right format
        parser = AggregateParamParser(request.params)
        params, errors = parser.parse()

        # If there were parsing errors we return them with status code 400
        # as jsonp, irrespective of what format was asked for.
        if errors:
            response.status = 400
            return to_jsonp({'errors': errors})

        # URL parameters are always singular nouns but we work with some
        # as plural nouns so we pop them into the plural version
        params['cuts'] = params.pop('cut')
        params['drilldowns'] = params.pop('drilldown')
        params['measures'] = params.pop('measure')

        # Get the dataset and the format and remove from the parameters
        dataset = params.pop('dataset')
        format = params.pop('format')

        # User must have the right to read the dataset to perform aggregation
        require.dataset.read(dataset)

        # Create response headers from the parameters
        self._response_params(params)

        try:
            # Create an aggregation cache for the dataset and aggregate its
            # results. The cache will perform the aggreagation if it doesn't
            # have a cached result
            cache = AggregationCache(dataset)
            result = cache.aggregate(**params)

            # If the result has drilldown we create html_url values for its
            # dimensions (linked data).
            if 'drilldown' in result:
                result['drilldown'] = drilldowns_apply_links(dataset.name,
                    result['drilldown'])

            # Do the ETag caching based on the cache_key in the summary
            # this is a weird place to do it since the heavy lifting has
            # already been performed above. TODO: Needs rethinking.
            response.last_modified = dataset.updated_at
            if cache.cache_enabled and 'cache_key' in result['summary']:
                etag_cache(result['summary']['cache_key'])

        except (KeyError, ValueError) as ve:
            # We log possible errors and return them with status code 400
            log.exception(ve)
            response.status = 400
            return to_jsonp({'errors': [unicode(ve)]})

        # If the requested format is csv we write the drilldown results into
        # a csv file and return it, if not we return a jsonp result (default)
        if format == 'csv':
            return write_csv(result['drilldown'], response,
                filename=dataset.name + '.csv')
        return to_jsonp(result)
Ejemplo n.º 16
0
    def aggregate(self):
        """
        Aggregation of a dataset based on URL parameters. It serves the
        aggregation from a cache if possible, and if not it computes it (it's
        performed in the aggregation cache for some reason).
        """

        # Parse the aggregation parameters to get them into the right format
        parser = AggregateParamParser(request.params)
        params, errors = parser.parse()

        # If there were parsing errors we return them with status code 400
        # as jsonp, irrespective of what format was asked for.
        if errors:
            response.status = 400
            return to_jsonp({'errors': errors})

        # URL parameters are always singular nouns but we work with some
        # as plural nouns so we pop them into the plural version
        params['cuts'] = params.pop('cut')
        params['drilldowns'] = params.pop('drilldown')
        params['measures'] = params.pop('measure')

        # Get the dataset and the format and remove from the parameters
        dataset = params.pop('dataset')
        format = params.pop('format')

        # User must have the right to read the dataset to perform aggregation
        require.dataset.read(dataset)

        # Create response headers from the parameters
        self._response_params(params)

        try:
            # Create an aggregation cache for the dataset and aggregate its
            # results. The cache will perform the aggreagation if it doesn't
            # have a cached result
            cache = AggregationCache(dataset)
            result = cache.aggregate(**params)

            # If the result has drilldown we create html_url values for its
            # dimensions (linked data).
            if 'drilldown' in result:
                result['drilldown'] = drilldowns_apply_links(
                    dataset.name, result['drilldown'])

            # Do the ETag caching based on the cache_key in the summary
            # this is a weird place to do it since the heavy lifting has
            # already been performed above. TODO: Needs rethinking.
            response.last_modified = dataset.updated_at
            if cache.cache_enabled and 'cache_key' in result['summary']:
                etag_cache(result['summary']['cache_key'])

        except (KeyError, ValueError) as ve:
            # We log possible errors and return them with status code 400
            log.exception(ve)
            response.status = 400
            return to_jsonp({'errors': [unicode(ve)]})

        # If the requested format is csv we write the drilldown results into
        # a csv file and return it, if not we return a jsonp result (default)
        if format == 'csv':
            return write_csv(result['drilldown'],
                             response,
                             filename=dataset.name + '.csv')
        return to_jsonp(result)