def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return {'errors': errors} expand_facets = params.pop('expand_facet_dimensions') datasets = params.pop('dataset', None) if datasets is None or not len(datasets): q = model.Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not len(datasets): return {'errors': [_("No dataset available.")]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) b = Browser(**params) try: stats, facets, entries = b.execute() except SolrException, e: return {'errors': [unicode(e)]}
def about(self, dataset, format="html"): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) handle_request(request, c, c.dataset) c.sources = list(c.dataset.sources) c.managers = list(c.dataset.managers) return render("dataset/about.html")
def about(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) handle_request(request, c, c.dataset) c.sources = list(c.dataset.sources) c.managers = list(c.dataset.managers) return render('dataset/about.html')
def index(self, dataset, format="html"): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at, format) if format == "json": dimensions = [dimension_apply_links(dataset, d.as_dict()) for d in c.dataset.dimensions] return to_jsonp(dimensions) else: return templating.render("dimension/index.html")
def view(self, dataset, dimension, format="html"): self._get_dimension(dataset, dimension) etag_cache_keygen(c.dataset.updated_at, format) if format == "json": dimension = dimension_apply_links(dataset, c.dimension.as_dict()) return to_jsonp(dimension) c.widget = get_widget("aggregate_table") c.widget_state = {"drilldowns": [c.dimension.name]} return templating.render("dimension/view.html")
def view(self, dataset, dimension, format='html'): self._get_dimension(dataset, dimension) etag_cache_keygen(c.dataset.updated_at, format) if format == 'json': dimension = dimension_apply_links(dataset, c.dimension.as_dict()) return to_jsonp(dimension) c.widget = get_widget('aggregate_table') c.widget_state = {'drilldowns': [c.dimension.name]} return templating.render('dimension/view.html')
def index(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at, format) if format == 'json': dimensions = [dimension_apply_links(dataset, d.as_dict()) \ for d in c.dataset.dimensions] return to_jsonp(dimensions) else: return render('dimension/index.html')
def about(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) handle_request(request, c, c.dataset) c.sources = list(c.dataset.sources) c.managers = list(c.dataset.managers) # Get all badges if user is admin because they can then # give badges to the dataset on its about page. if c.account and c.account.admin: c.badges = list(Badge.all()) return templating.render('dataset/about.html')
def sitemap(self, dataset, dimension): self._get_dimension(dataset, dimension) etag_cache_keygen(c.dataset.updated_at, 'xml') pages = [] # TODO: Make this work for dimensions with more than 30,000 members. for member in c.dimension.members(limit=30000): pages.append({ 'loc': url_for(controller='dimension', action='member', dataset=dataset, dimension=dimension, name=member.get('name'), qualified=True), 'lastmod': c.dataset.updated_at }) return sitemap(pages)
def distinct(self, dataset, dimension, format="json"): self._get_dimension(dataset, dimension) parser = DistinctFieldParamParser(c.dimension, request.params) params, errors = parser.parse() etag_cache_keygen(c.dataset.updated_at, format, parser.key()) if errors: response.status = 400 return {"errors": errors} q = params.get("attribute").column_alias.ilike(params.get("q") + "%") offset = int((params.get("page") - 1) * params.get("pagesize")) members = c.dimension.members(q, offset=offset, limit=params.get("pagesize")) return to_jsonp({"results": list(members), "count": c.dimension.num_entries(q)})
def view(self, dataset, format='html'): """ Dataset viewer. Default format is html. This will return either an entry index if there is no default view or the defaul view. If a request parameter embed is given the default view is returned as an embeddable page. If json is provided as a format the json representation of the dataset is returned. """ # Get the dataset (will be placed in c.dataset) self._get_dataset(dataset) # Generate the etag for the cache based on updated_at value etag_cache_keygen(c.dataset.updated_at) # Compute the number of entries in the dataset c.num_entries = len(c.dataset) # Handle the request for the dataset, this will return # a default view in c.view if there is any handle_request(request, c, c.dataset) if format == 'json': # If requested format is json we return the json representation return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: (earliest_timestamp, latest_timestamp) = c.dataset.timerange() if earliest_timestamp is not None: c.timerange = { 'from': earliest_timestamp, 'to': latest_timestamp } if c.view is None: # If handle request didn't return a view we return the # entry index return EntryController().index(dataset, format) if 'embed' in request.params: # If embed is requested using the url parameters we return # a redirect to an embed page for the default view return redirect( h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) # Return the dataset view (for the default view) return templating.render('dataset/view.html')
def sitemap(self, dataset, page): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at, 'xml') limit = 30000 pages = [] for entry in c.dataset.entries(limit=limit, offset=(int(page) - 1) * limit, step=limit, fields=[]): pages.append({ 'loc': h.url_for(controller='entry', action='view', dataset=dataset, id=entry.get('id'), qualified=True), 'lastmod': c.dataset.updated_at }) return sitemap(pages)
def sitemap(self, dataset, page): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at, "xml") limit = 30000 pages = [] for entry in c.dataset.entries(limit=limit, offset=(int(page) - 1) * limit, step=limit, fields=[]): pages.append( { "loc": h.url_for( controller="entry", action="view", dataset=dataset, id=entry.get("id"), qualified=True ), "lastmod": c.dataset.updated_at, } ) return sitemap(pages)
def view(self, dataset, format='html'): """ Dataset viewer. Default format is html. This will return either an entry index if there is no default view or the defaul view. If a request parameter embed is given the default view is returned as an embeddable page. If json is provided as a format the json representation of the dataset is returned. """ # Get the dataset (will be placed in c.dataset) self._get_dataset(dataset) # Generate the etag for the cache based on updated_at value etag_cache_keygen(c.dataset.updated_at) # Compute the number of entries in the dataset c.num_entries = len(c.dataset) # Handle the request for the dataset, this will return # a default view in c.view if there is any handle_request(request, c, c.dataset) if format == 'json': # If requested format is json we return the json representation return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: (earliest_timestamp, latest_timestamp) = c.dataset.timerange() if earliest_timestamp is not None: c.timerange = {'from': earliest_timestamp, 'to': latest_timestamp} if c.view is None: # If handle request didn't return a view we return the # entry index return EntryController().index(dataset, format) if 'embed' in request.params: # If embed is requested using the url parameters we return # a redirect to an embed page for the default view return redirect( h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) # Return the dataset view (for the default view) return templating.render('dataset/view.html')
def distinct(self, dataset, dimension, format='json'): self._get_dimension(dataset, dimension) parser = DistinctFieldParamParser(c.dimension, request.params) params, errors = parser.parse() etag_cache_keygen(c.dataset.updated_at, format, parser.key()) if errors: response.status = 400 return {'errors': errors} q = params.get('attribute').column_alias.ilike(params.get('q') + '%') offset = int((params.get('page') - 1) * params.get('pagesize')) members = c.dimension.members(q, offset=offset, limit=params.get('pagesize')) return to_jsonp({ 'results': list(members), 'count': c.dimension.num_entries(q) })
def view(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if format == 'json': return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: if c.view is None: return EntryController().index(dataset, format) if 'embed' in request.params: return redirect(h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) return templating.render('dataset/view.html')
def view(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if format == 'json': return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: if c.view is None: return EntryController().index(dataset, format) if 'embed' in request.params: return redirect( h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) return templating.render('dataset/view.html')
def sitemap(self, dataset, dimension): self._get_dimension(dataset, dimension) etag_cache_keygen(c.dataset.updated_at, "xml") pages = [] # TODO: Make this work for dimensions with more than 30,000 members. for member in c.dimension.members(limit=30000): pages.append( { "loc": url_for( controller="dimension", action="member", dataset=dataset, dimension=dimension, name=member.get("name"), qualified=True, ), "lastmod": c.dataset.updated_at, } ) return sitemap(pages)
def view(self, dataset, format="html"): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if format == "json": return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: if c.view is None: return EntryController().index(dataset, format) if "embed" in request.params: return redirect( h.url_for( controller="view", action="embed", dataset=c.dataset.name, widget=c.view.vis_widget.get("name"), state=json.dumps(c.view.vis_state), ) ) return render("dataset/view.html")
def index(self, format='html'): c.query = request.params.items() c.add_filter = lambda f, v: '?' + urlencode(c.query + [(f, v)] if ( f, v) not in c.query else c.query) c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall('languages'): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall('territories'): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) category = request.params.get('category') if category: c.results = c.results.filter(Dataset.category == category) c.results = list(c.results) c.territory_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', territories=code), 'label': COUNTRIES.get(code, code)} \ for (code, count) in DatasetTerritory.dataset_counts(c.results)] c.language_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', languages=code), 'label': LANGUAGES.get(code, code)} \ for (code, count) in DatasetLanguage.dataset_counts(c.results)] # TODO: figure out where to put this: ds_ids = [d.id for d in c.results] if len(ds_ids): q = db.select( [Dataset.category, db.func.count(Dataset.id)], Dataset.id.in_(ds_ids), group_by=Dataset.category, order_by=db.func.count(Dataset.id).desc()) c.category_options = [{'category': category, 'count': count, 'url': h.url_for(controller='dataset', action='index', category=category), 'label': CATEGORIES.get(category, category)} \ for (category, count) in db.session.bind.execute(q).fetchall() \ if category is not None] else: c.category_options = [] c._must_revalidate = True if len(c.results): dt = max([r.updated_at for r in c.results]) etag_cache_keygen(dt) if format == 'json': results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp({ 'datasets': results, 'categories': c.category_options, 'territories': c.territory_options, 'languages': c.language_options }) elif format == 'csv': results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) c.show_rss = True return templating.render('dataset/index.html')
def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) expand_facets = params.pop('expand_facet_dimensions') format = params.pop('format') if format == 'csv': params['stats'] = False params['facet_field'] = None datasets = params.pop('dataset', None) if datasets is None or not datasets: q = model.Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not datasets: return {'errors': ["No dataset available."]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) self._response_params(params) if params['pagesize'] > parser.defaults['pagesize']: # http://wiki.nginx.org/X-accel#X-Accel-Buffering response.headers['X-Accel-Buffering'] = 'no' if format == 'csv': csv_headers(response, 'entries.csv') streamer = CSVStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize']) return streamer.response() else: json_headers(filename='entries.json') streamer = JSONStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'], expand_facets=_expand_facets if expand_facets else None, callback=request.params.get('callback')) return streamer.response() b = Browser(**params) try: b.execute() except SolrException, e: return {'errors': [unicode(e)]}
def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) expand_facets = params.pop('expand_facet_dimensions') format = params.pop('format') if format == 'csv': params['stats'] = False params['facet_field'] = None datasets = params.pop('dataset', None) if datasets is None or not datasets: q = model.Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not datasets: return {'errors': ["No dataset available."]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) self._response_params(params) if params['pagesize'] > parser.defaults['pagesize']: # http://wiki.nginx.org/X-accel#X-Accel-Buffering response.headers['X-Accel-Buffering'] = 'no' if format == 'csv': csv_headers(response, 'entries.csv') streamer = CSVStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'] ) return streamer.response() else: json_headers(filename='entries.json') streamer = JSONStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'], expand_facets=_expand_facets if expand_facets else None, callback=request.params.get('callback') ) return streamer.response() b = Browser(**params) try: b.execute() except SolrException, e: return {'errors': [unicode(e)]}
def index(self, format="html"): c.query = request.params.items() c.add_filter = lambda f, v: "?" + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: "?" + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall("languages"): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall("territories"): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) category = request.params.get("category") if category: c.results = c.results.filter(Dataset.category == category) c.results = list(c.results) c.territory_options = [ { "code": code, "count": count, "url": h.url_for(controller="dataset", action="index", territories=code), "label": COUNTRIES.get(code, code), } for (code, count) in DatasetTerritory.dataset_counts(c.results) ] c.language_options = [ { "code": code, "count": count, "url": h.url_for(controller="dataset", action="index", languages=code), "label": LANGUAGES.get(code, code), } for (code, count) in DatasetLanguage.dataset_counts(c.results) ] # TODO: figure out where to put this: ds_ids = [d.id for d in c.results] if len(ds_ids): q = db.select( [Dataset.category, db.func.count(Dataset.id)], Dataset.id.in_(ds_ids), group_by=Dataset.category, order_by=db.func.count(Dataset.id).desc(), ) c.category_options = [ { "category": category, "count": count, "url": h.url_for(controller="dataset", action="index", category=category), "label": CATEGORIES.get(category, category), } for (category, count) in db.session.bind.execute(q).fetchall() if category is not None ] else: c.category_options = [] c._must_revalidate = True if len(c.results): dt = max([r.updated_at for r in c.results]) etag_cache_keygen(dt) if format == "json": results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp( { "datasets": results, "categories": c.category_options, "territories": c.territory_options, "languages": c.language_options, } ) elif format == "csv": results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) return render("dataset/index.html")
def index(self, format='html'): """ Get a list of all datasets along with territory, language, and category counts (amount of datasets for each). """ # Create facet filters (so we can look at a single country, # language etc.) c.query = request.params.items() c.add_filter = lambda f, v: \ '?' + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: \ '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) # Parse the request parameters to get them into the right format parser = DatasetIndexParamParser(request.params) params, errors = parser.parse() if errors: concatenated_errors = ', '.join(errors) abort(400, _('Parameter values not supported: %s') % concatenated_errors) # We need to pop the page and pagesize parameters since they're not # used for the cache (we have to get all of the datasets to do the # language, territory, and category counts (these are then only used # for the html response) params.pop('page') pagesize = params.pop('pagesize') # Get cached indices (this will also generate them if there are no # cached results (the cache is invalidated when a dataset is published # or retracted cache = DatasetIndexCache() results = cache.index(**params) # Generate the ETag from the last modified timestamp of the first # dataset (since they are ordered in descending order by last # modified). It doesn't matter that this happens if it has (possibly) # generated the index (if not cached) since if it isn't cached then # the ETag is definitely modified. We wrap it in a try clause since # if there are no public datasets we'll get an index error. # We also don't set c._must_revalidate to True since we don't care # if the index needs a hard refresh try: etag_cache_keygen( results['datasets'][0]['timestamps']['last_modified']) except IndexError: etag_cache_keygen(None) # Assign the results to template context variables c.language_options = results['languages'] c.territory_options = results['territories'] c.category_options = results['categories'] if format == 'json': # Apply links to the dataset lists before returning the json results['datasets'] = [dataset_apply_links(r) for r in results['datasets']] return to_jsonp(results) elif format == 'csv': # The CSV response only shows datasets, not languages, # territories, etc. return write_csv(results['datasets'], response) # If we're here then it's an html format so we show rss, do the # pagination and render the template c.show_rss = True # The page parameter we popped earlier is part of request.params but # we now know it was parsed. We have to send in request.params to # retain any parameters already supplied (filters) c.page = templating.Page(results['datasets'], items_per_page=pagesize, item_count=len(results['datasets']), **request.params) return templating.render('dataset/index.html')
def model(self, dataset, format='json'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) model = c.dataset.model model['dataset'] = dataset_apply_links(model['dataset']) return to_jsonp(model)
def index(self, format='html'): c.query = request.params.items() c.add_filter = lambda f, v: '?' + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall('languages'): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall('territories'): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) category = request.params.get('category') if category: c.results = c.results.filter(Dataset.category == category) c.results = list(c.results) c.territory_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', territories=code), 'label': COUNTRIES.get(code, code)} \ for (code, count) in DatasetTerritory.dataset_counts(c.results)] c.language_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', languages=code), 'label': LANGUAGES.get(code, code)} \ for (code, count) in DatasetLanguage.dataset_counts(c.results)] # TODO: figure out where to put this: ds_ids = [d.id for d in c.results] if len(ds_ids): q = db.select([Dataset.category, db.func.count(Dataset.id)], Dataset.id.in_(ds_ids), group_by=Dataset.category, order_by=db.func.count(Dataset.id).desc()) c.category_options = [{'category': category, 'count': count, 'url': h.url_for(controller='dataset', action='index', category=category), 'label': CATEGORIES.get(category, category)} \ for (category, count) in db.session.bind.execute(q).fetchall() \ if category is not None] else: c.category_options = [] c._must_revalidate = True if len(c.results): dt = max([r.updated_at for r in c.results]) etag_cache_keygen(dt) if format == 'json': results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp({ 'datasets': results, 'categories': c.category_options, 'territories': c.territory_options, 'languages': c.language_options }) elif format == 'csv': results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) c.show_rss = True return templating.render('dataset/index.html')