def aggregate(self): parser = AggregateParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return {'errors': errors} params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') dataset = params.pop('dataset') format = params.pop('format') require.dataset.read(dataset) try: cache = AggregationCache(dataset) result = cache.aggregate(**params) if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links(dataset.name, result['drilldown']) response.last_modified = dataset.updated_at if cache.cache_enabled and 'cache_key' in result['summary']: etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: log.exception(ve) response.status = 400 return {'errors': ['Invalid aggregation query: %r' % ve]} if format == 'csv': return write_csv(result['drilldown'], response, filename=dataset.name + '.csv') return to_jsonp(result)
def aggregate(self): parser = AggregateParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return {'errors': errors} params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') dataset = params.pop('dataset') require.dataset.read(dataset) try: cache = AggregationCache(dataset) result = cache.aggregate(**params) if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links(dataset.name, result['drilldown']) if cache.cache_enabled and 'cache_key' in result['summary']: if 'Pragma' in response.headers: del response.headers['Pragma'] response.cache_control = 'public; max-age: 84600' etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: log.exception(ve) response.status = 400 return {'errors': ['Invalid aggregation query: %r' % ve]} return result
def view(self, dataset, dimension, format='html'): self._get_dataset(dataset) try: c.dimension = c.dataset[dimension] except KeyError: abort(404, _('This is not a dimension')) if not isinstance(c.dimension, model.Dimension): abort(404, _('This is not a dimension')) page = self._get_page('page') cache = AggregationCache(c.dataset) result = cache.aggregate(drilldowns=[dimension], page=page, pagesize=PAGE_SIZE) items = result.get('drilldown', []) c.values = [(d.get(dimension), d.get('amount')) for d in items] if format == 'json': return to_jsonp({ "values": c.values, "meta": c.dimension.as_dict()}) c.page = Page(c.values, page=page, item_count=result['summary']['num_drilldowns'], items_per_page=PAGE_SIZE, presliced_list=True) return render('dimension/view.html')
def aggregate(self): parser = AggregateParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return {'errors': errors} params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') dataset = params.pop('dataset') format = params.pop('format') require.dataset.read(dataset) try: cache = AggregationCache(dataset) result = cache.aggregate(**params) if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links( dataset.name, result['drilldown']) response.last_modified = dataset.updated_at if cache.cache_enabled and 'cache_key' in result['summary']: etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: log.exception(ve) response.status = 400 return {'errors': ['Invalid aggregation query: %r' % ve]} if format == 'csv': return write_csv(result['drilldown'], response, filename=dataset.name + '.csv') return to_jsonp(result)
def aggregates(self): if self._aggregates is None: if self.view.drilldown is None: return [] res = defaultdict(dict) drilldowns = {} query = ['year', self.view.drilldown] cache = AggregationCache(self.dataset) results = cache.aggregate(drilldowns=query, cuts=self.cuts) for entry in results.get('drilldown'): d = entry.get(self.view.drilldown) # Get a hashable key for the drilldown key = d['id'] if isinstance(d, dict) else d # Store a reference to this drilldown drilldowns[key] = d # Store drilldown value for this year res[key][str(entry.get('year'))] = entry.get('amount') self._aggregates = [(drilldowns[k], v) for k, v in res.items()] # sort aggregations by time if self.time is not None: self._aggregates = sorted(self._aggregates, reverse=True, key=lambda (k, v): v.get(self.time, 0)) return self._aggregates
def aggregate(self): errors = [] params = request.params # get and check parameters dataset = self._dataset(params, errors) drilldowns = self._drilldowns(params, errors) cuts = self._cuts(params, errors) order = self._order(params, errors) measure = self._measure(params, dataset, errors) page = self._to_int('page', params.get('page', 1), errors) pagesize = self._to_int('pagesize', params.get('pagesize', 10000), errors) if errors: return {'errors': errors} try: cache = AggregationCache(dataset) result = cache.aggregate(measure=measure, drilldowns=drilldowns, cuts=cuts, page=page, pagesize=pagesize, order=order) if cache.cache_enabled and 'cache_key' in result['summary']: if 'Pragma' in response.headers: del response.headers['Pragma'] response.cache_control = 'public; max-age: 84600' etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: log.exception(ve) return {'errors': ['Invalid aggregation query: %r' % ve]} return result
def totals(self): if self._totals is None: self._totals = {} cache = AggregationCache(self.dataset) results = cache.aggregate(drilldowns=['year'], cuts=self.cuts) for entry in results.get('drilldown'): self._totals[str(entry.get('year'))] = entry.get('amount') return self._totals
def aggregate(self): dataset_name = request.params.get( 'dataset', request.params.get('slice')) dataset = model.Dataset.by_name(dataset_name) if dataset is None: abort(400, "Dataset %s not found" % dataset_name) require.dataset.read(dataset) drilldowns, cuts, statistics = [], [], [] for key, value in sorted(request.params.items()): if '-' not in key: continue op, key = key.split('-', 1) if 'include' == op: cuts.append((key, value)) elif 'per' == op: if 'time' == key: abort(400, "Time series are no longer supported") statistics.append((key, value)) elif 'breakdown' == op: drilldowns.append(key) cache = AggregationCache(dataset) result = cache.aggregate(drilldowns=drilldowns + ['time'], cuts=cuts) # TODO: handle statistics as key-values ??? what's the point? for k, v in statistics: result = statistic_normalize(dataset, result, v, k) # translate to old format: group by drilldown, then by date. translated_result = defaultdict(dict) for cell in result['drilldown']: key = tuple([cellget(cell, d) for d in drilldowns]) translated_result[key][cell['time']['name']] = \ cell['amount'] dates = sorted(set([d['time']['name'] for d in result['drilldown']])) # give a value (or 0) for each present date in sorted order translated_result = [(k, [v.get(d, 0.0) for d in dates]) for k, v in translated_result.items()] return {'results': translated_result, 'metadata': { 'dataset': dataset.name, 'include': cuts, 'dates': map(unicode, dates), 'axes': drilldowns, 'per': statistics, 'per_time': [] } }
def drop(self, dataset): self._get_dataset(dataset) require.dataset.update(c.dataset) c.dataset.drop() c.dataset.init() c.dataset.generate() AggregationCache(c.dataset).invalidate() db.session.commit() h.flash_success(_("The dataset has been cleared.")) redirect(h.url_for(controller='editor', action='index', dataset=c.dataset.name))
def retract(self, dataset): self._get_dataset(dataset) require.dataset.update(c.dataset) if c.dataset.private: abort(400, _("This dataset is already private!")) c.dataset.private = True AggregationCache(c.dataset).invalidate() db.session.commit() h.flash_success(_("The dataset has been retracted. " \ "It is no longer visible to others.")) redirect(h.url_for(controller='editor', action='index', dataset=c.dataset.name))
def drop(self, dataset): self._get_dataset(dataset) require.dataset.update(c.dataset) c.dataset.updated_at = datetime.utcnow() c.dataset.drop() solr.drop_index(c.dataset.name) c.dataset.init() c.dataset.generate() AggregationCache(c.dataset).invalidate() # For every source in the dataset we set the status to removed for source in c.dataset.sources: for run in source.runs: run.status = Run.STATUS_REMOVED db.session.commit() h.flash_success(_("The dataset has been cleared.")) redirect(h.url_for(controller='editor', action='index', dataset=c.dataset.name))
def aggregate(self): """ Aggregation of a dataset based on URL parameters. It serves the aggregation from a cache if possible, and if not it computes it (it's performed in the aggregation cache for some reason). """ # Parse the aggregation parameters to get them into the right format parser = AggregateParamParser(request.params) params, errors = parser.parse() # If there were parsing errors we return them with status code 400 # as jsonp, irrespective of what format was asked for. if errors: response.status = 400 return to_jsonp({'errors': errors}) # URL parameters are always singular nouns but we work with some # as plural nouns so we pop them into the plural version params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') params['measures'] = params.pop('measure') # Get the dataset and the format and remove from the parameters dataset = params.pop('dataset') format = params.pop('format') # User must have the right to read the dataset to perform aggregation require.dataset.read(dataset) # Create response headers from the parameters self._response_params(params) try: # Create an aggregation cache for the dataset and aggregate its # results. The cache will perform the aggreagation if it doesn't # have a cached result cache = AggregationCache(dataset) result = cache.aggregate(**params) # If the result has drilldown we create html_url values for its # dimensions (linked data). if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links(dataset.name, result['drilldown']) # Do the ETag caching based on the cache_key in the summary # this is a weird place to do it since the heavy lifting has # already been performed above. TODO: Needs rethinking. response.last_modified = dataset.updated_at if cache.cache_enabled and 'cache_key' in result['summary']: etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: # We log possible errors and return them with status code 400 log.exception(ve) response.status = 400 return to_jsonp({'errors': [unicode(ve)]}) # If the requested format is csv we write the drilldown results into # a csv file and return it, if not we return a jsonp result (default) if format == 'csv': return write_csv(result['drilldown'], response, filename=dataset.name + '.csv') return to_jsonp(result)
def aggregate(self): """ Aggregation of a dataset based on URL parameters. It serves the aggregation from a cache if possible, and if not it computes it (it's performed in the aggregation cache for some reason). """ # Parse the aggregation parameters to get them into the right format parser = AggregateParamParser(request.params) params, errors = parser.parse() # If there were parsing errors we return them with status code 400 # as jsonp, irrespective of what format was asked for. if errors: response.status = 400 return to_jsonp({'errors': errors}) # URL parameters are always singular nouns but we work with some # as plural nouns so we pop them into the plural version params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') params['measures'] = params.pop('measure') # Get the dataset and the format and remove from the parameters dataset = params.pop('dataset') format = params.pop('format') # User must have the right to read the dataset to perform aggregation require.dataset.read(dataset) # Create response headers from the parameters self._response_params(params) try: # Create an aggregation cache for the dataset and aggregate its # results. The cache will perform the aggreagation if it doesn't # have a cached result cache = AggregationCache(dataset) result = cache.aggregate(**params) # If the result has drilldown we create html_url values for its # dimensions (linked data). if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links( dataset.name, result['drilldown']) # Do the ETag caching based on the cache_key in the summary # this is a weird place to do it since the heavy lifting has # already been performed above. TODO: Needs rethinking. response.last_modified = dataset.updated_at if cache.cache_enabled and 'cache_key' in result['summary']: etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: # We log possible errors and return them with status code 400 log.exception(ve) response.status = 400 return to_jsonp({'errors': [unicode(ve)]}) # If the requested format is csv we write the drilldown results into # a csv file and return it, if not we return a jsonp result (default) if format == 'csv': return write_csv(result['drilldown'], response, filename=dataset.name + '.csv') return to_jsonp(result)