def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Check if the params are there ('metadata', 'csv_file') if len(request.params) != 2: abort(status_code=400, detail='incorrect number of params') metadata = request.params['metadata'] \ if 'metadata' in request.params \ else abort(status_code=400, detail='metadata is missing') csv_file = request.params['csv_file'] \ if 'csv_file' in request.params \ else abort(status_code=400, detail='csv_file is missing') # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: abort(status_code=400, detail='JSON model could not be parsed') try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) abort(status_code=400, detail='Model is not well formed') dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = True # Default value db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Check if the params are there ('metadata', 'csv_file') if len(request.params) != 2: abort(status_code=400, detail='incorrect number of params') metadata = request.params['metadata'] \ if 'metadata' in request.params \ else abort(status_code=400, detail='metadata is missing') csv_file = request.params['csv_file'] \ if 'csv_file' in request.params \ else abort(status_code=400, detail='csv_file is missing') # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: abort(status_code=400, detail='JSON model could not be parsed') try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) abort(status_code=400, detail='Model is not well formed') dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = True # Default value db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def load_with_model_and_csv(self, metadata, csv_file, private): """ Load a dataset using a metadata model file and a csv file """ if metadata is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if csv_file is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = private db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def load_with_model_and_csv(self, metadata, csv_file, private): """ Load a dataset using a metadata model file and a csv file """ if metadata is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if csv_file is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = private db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def view(self, dataset, format='html'): self._get_dataset(dataset) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if c.view is None and format == 'html': return EntryController().index(dataset, format) if format == 'json': return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: return render('dataset/view.html')
def view(self, dataset, format='html'): """ Dataset viewer. Default format is html. This will return either an entry index if there is no default view or the defaul view. If a request parameter embed is given the default view is returned as an embeddable page. If json is provided as a format the json representation of the dataset is returned. """ # Get the dataset (will be placed in c.dataset) self._get_dataset(dataset) # Generate the etag for the cache based on updated_at value etag_cache_keygen(c.dataset.updated_at) # Compute the number of entries in the dataset c.num_entries = len(c.dataset) # Handle the request for the dataset, this will return # a default view in c.view if there is any handle_request(request, c, c.dataset) if format == 'json': # If requested format is json we return the json representation return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: (earliest_timestamp, latest_timestamp) = c.dataset.timerange() if earliest_timestamp is not None: c.timerange = { 'from': earliest_timestamp, 'to': latest_timestamp } if c.view is None: # If handle request didn't return a view we return the # entry index return EntryController().index(dataset, format) if 'embed' in request.params: # If embed is requested using the url parameters we return # a redirect to an embed page for the default view return redirect( h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) # Return the dataset view (for the default view) return templating.render('dataset/view.html')
def view(self, dataset, format='html'): """ Dataset viewer. Default format is html. This will return either an entry index if there is no default view or the defaul view. If a request parameter embed is given the default view is returned as an embeddable page. If json is provided as a format the json representation of the dataset is returned. """ # Get the dataset (will be placed in c.dataset) self._get_dataset(dataset) # Generate the etag for the cache based on updated_at value etag_cache_keygen(c.dataset.updated_at) # Compute the number of entries in the dataset c.num_entries = len(c.dataset) # Handle the request for the dataset, this will return # a default view in c.view if there is any handle_request(request, c, c.dataset) if format == 'json': # If requested format is json we return the json representation return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: (earliest_timestamp, latest_timestamp) = c.dataset.timerange() if earliest_timestamp is not None: c.timerange = {'from': earliest_timestamp, 'to': latest_timestamp} if c.view is None: # If handle request didn't return a view we return the # entry index return EntryController().index(dataset, format) if 'embed' in request.params: # If embed is requested using the url parameters we return # a redirect to an embed page for the default view return redirect( h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) # Return the dataset view (for the default view) return templating.render('dataset/view.html')
def view(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if format == 'json': return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: if c.view is None: return EntryController().index(dataset, format) if 'embed' in request.params: return redirect(h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) return templating.render('dataset/view.html')
def view(self, dataset, format='html'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if format == 'json': return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: if c.view is None: return EntryController().index(dataset, format) if 'embed' in request.params: return redirect( h.url_for(controller='view', action='embed', dataset=c.dataset.name, widget=c.view.vis_widget.get('name'), state=json.dumps(c.view.vis_state))) return templating.render('dataset/view.html')
def index(self, format='html'): c.query = request.params.items() c.add_filter = lambda f, v: '?' + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall('languages'): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall('territories'): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) c.results = list(c.results) c.territory_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', territories=code), 'label': COUNTRIES.get(code, code)} \ for (code, count) in DatasetTerritory.dataset_counts(c.results)] c.language_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', languages=code), 'label': LANGUAGES.get(code, code)} \ for (code, count) in DatasetLanguage.dataset_counts(c.results)] if format == 'json': results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp({ 'datasets': results, 'territories': c.territory_options, 'languages': c.language_options }) elif format == 'csv': results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) return render('dataset/index.html')
def view(self, dataset, format="html"): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) c.num_entries = len(c.dataset) handle_request(request, c, c.dataset) if format == "json": return to_jsonp(dataset_apply_links(c.dataset.as_dict())) else: if c.view is None: return EntryController().index(dataset, format) if "embed" in request.params: return redirect( h.url_for( controller="view", action="embed", dataset=c.dataset.name, widget=c.view.vis_widget.get("name"), state=json.dumps(c.view.vis_state), ) ) return render("dataset/view.html")
def model(self, dataset, format='json'): self._get_dataset(dataset) etag_cache_keygen(c.dataset.updated_at) model = c.dataset.model model['dataset'] = dataset_apply_links(model['dataset']) return to_jsonp(model)
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Parse the loading api parameters to get them into the right format parser = LoadingAPIParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) if params['metadata'] is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if params['csv_file'] is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(params['metadata'])) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = params['private'] db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=params['csv_file']) log.info(source) for source_ in dataset.sources: if source_.url == params['csv_file']: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) expand_facets = params.pop('expand_facet_dimensions') format = params.pop('format') if format == 'csv': params['stats'] = False params['facet_field'] = None datasets = params.pop('dataset', None) if datasets is None or not datasets: q = Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not datasets: return {'errors': ["No dataset available."]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) if params['pagesize'] > parser.defaults['pagesize']: # http://wiki.nginx.org/X-accel#X-Accel-Buffering response.headers['X-Accel-Buffering'] = 'no' if format == 'csv': csv_headers(response, 'entries.csv') streamer = CSVStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'] ) return streamer.response() else: json_headers(filename='entries.json') streamer = JSONStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'], expand_facets=util.expand_facets if expand_facets else None, callback=request.params.get('callback') ) return streamer.response() solr_browser = Browser(**params) try: solr_browser.execute() except SolrException as e: return {'errors': [unicode(e)]} entries = [] for dataset, entry in solr_browser.get_entries(): entry = entry_apply_links(dataset.name, entry) entry['dataset'] = dataset_apply_links(dataset.as_dict()) entries.append(entry) if format == 'csv': return write_csv(entries, response, filename='entries.csv') if expand_facets and len(datasets) == 1: facets = solr_browser.get_expanded_facets(datasets[0]) else: facets = solr_browser.get_facets() return to_jsonp({ 'stats': solr_browser.get_stats(), 'facets': facets, 'results': entries })
def index(self, format='html'): c.query = request.params.items() c.add_filter = lambda f, v: '?' + urlencode(c.query + [(f, v)] if ( f, v) not in c.query else c.query) c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall('languages'): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall('territories'): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) category = request.params.get('category') if category: c.results = c.results.filter(Dataset.category == category) c.results = list(c.results) c.territory_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', territories=code), 'label': COUNTRIES.get(code, code)} \ for (code, count) in DatasetTerritory.dataset_counts(c.results)] c.language_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', languages=code), 'label': LANGUAGES.get(code, code)} \ for (code, count) in DatasetLanguage.dataset_counts(c.results)] # TODO: figure out where to put this: ds_ids = [d.id for d in c.results] if len(ds_ids): q = db.select( [Dataset.category, db.func.count(Dataset.id)], Dataset.id.in_(ds_ids), group_by=Dataset.category, order_by=db.func.count(Dataset.id).desc()) c.category_options = [{'category': category, 'count': count, 'url': h.url_for(controller='dataset', action='index', category=category), 'label': CATEGORIES.get(category, category)} \ for (category, count) in db.session.bind.execute(q).fetchall() \ if category is not None] else: c.category_options = [] c._must_revalidate = True if len(c.results): dt = max([r.updated_at for r in c.results]) etag_cache_keygen(dt) if format == 'json': results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp({ 'datasets': results, 'categories': c.category_options, 'territories': c.territory_options, 'languages': c.language_options }) elif format == 'csv': results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) c.show_rss = True return templating.render('dataset/index.html')
def make_entries(self, entries): for dataset, entry in entries: entry = entry_apply_links(dataset.name, entry) entry['dataset'] = dataset_apply_links(dataset.as_dict()) yield entry
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Parse the loading api parameters to get them into the right format parser = LoadingAPIParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) if params['metadata'] is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if params['csv_file'] is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(params['metadata'])) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = params['private'] db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=params['csv_file']) log.info(source) for source_ in dataset.sources: if source_.url == params['csv_file']: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def index(self, format="html"): c.query = request.params.items() c.add_filter = lambda f, v: "?" + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: "?" + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall("languages"): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall("territories"): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) category = request.params.get("category") if category: c.results = c.results.filter(Dataset.category == category) c.results = list(c.results) c.territory_options = [ { "code": code, "count": count, "url": h.url_for(controller="dataset", action="index", territories=code), "label": COUNTRIES.get(code, code), } for (code, count) in DatasetTerritory.dataset_counts(c.results) ] c.language_options = [ { "code": code, "count": count, "url": h.url_for(controller="dataset", action="index", languages=code), "label": LANGUAGES.get(code, code), } for (code, count) in DatasetLanguage.dataset_counts(c.results) ] # TODO: figure out where to put this: ds_ids = [d.id for d in c.results] if len(ds_ids): q = db.select( [Dataset.category, db.func.count(Dataset.id)], Dataset.id.in_(ds_ids), group_by=Dataset.category, order_by=db.func.count(Dataset.id).desc(), ) c.category_options = [ { "category": category, "count": count, "url": h.url_for(controller="dataset", action="index", category=category), "label": CATEGORIES.get(category, category), } for (category, count) in db.session.bind.execute(q).fetchall() if category is not None ] else: c.category_options = [] c._must_revalidate = True if len(c.results): dt = max([r.updated_at for r in c.results]) etag_cache_keygen(dt) if format == "json": results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp( { "datasets": results, "categories": c.category_options, "territories": c.territory_options, "languages": c.language_options, } ) elif format == "csv": results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) return render("dataset/index.html")
require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict())) def permissions(self): """ Check a user's permissions for a given dataset. This could also be done via request to the user, but since we're not really doing a RESTful service we do this via the api instead. """ # Check the parameters. Since we only use one parameter we check it # here instead of creating a specific parameter parser if len(request.params) != 1 or 'dataset' not in request.params: return to_jsonp({'error': 'Parameter dataset missing'}) # Get the dataset we want to check permissions for dataset = Dataset.by_name(request.params['dataset'])
def index(self, format='html'): """ Get a list of all datasets along with territory, language, and category counts (amount of datasets for each). """ # Create facet filters (so we can look at a single country, # language etc.) c.query = request.params.items() c.add_filter = lambda f, v: \ '?' + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: \ '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) # Parse the request parameters to get them into the right format parser = DatasetIndexParamParser(request.params) params, errors = parser.parse() if errors: concatenated_errors = ', '.join(errors) abort(400, _('Parameter values not supported: %s') % concatenated_errors) # We need to pop the page and pagesize parameters since they're not # used for the cache (we have to get all of the datasets to do the # language, territory, and category counts (these are then only used # for the html response) params.pop('page') pagesize = params.pop('pagesize') # Get cached indices (this will also generate them if there are no # cached results (the cache is invalidated when a dataset is published # or retracted cache = DatasetIndexCache() results = cache.index(**params) # Generate the ETag from the last modified timestamp of the first # dataset (since they are ordered in descending order by last # modified). It doesn't matter that this happens if it has (possibly) # generated the index (if not cached) since if it isn't cached then # the ETag is definitely modified. We wrap it in a try clause since # if there are no public datasets we'll get an index error. # We also don't set c._must_revalidate to True since we don't care # if the index needs a hard refresh try: etag_cache_keygen( results['datasets'][0]['timestamps']['last_modified']) except IndexError: etag_cache_keygen(None) # Assign the results to template context variables c.language_options = results['languages'] c.territory_options = results['territories'] c.category_options = results['categories'] if format == 'json': # Apply links to the dataset lists before returning the json results['datasets'] = [dataset_apply_links(r) for r in results['datasets']] return to_jsonp(results) elif format == 'csv': # The CSV response only shows datasets, not languages, # territories, etc. return write_csv(results['datasets'], response) # If we're here then it's an html format so we show rss, do the # pagination and render the template c.show_rss = True # The page parameter we popped earlier is part of request.params but # we now know it was parsed. We have to send in request.params to # retain any parameters already supplied (filters) c.page = templating.Page(results['datasets'], items_per_page=pagesize, item_count=len(results['datasets']), **request.params) return templating.render('dataset/index.html')
def model(self, dataset, format='json'): self._get_dataset(dataset) model = c.dataset.model model['dataset'] = dataset_apply_links(model['dataset']) return to_jsonp(model)
def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) expand_facets = params.pop('expand_facet_dimensions') format = params.pop('format') if format == 'csv': params['stats'] = False params['facet_field'] = None datasets = params.pop('dataset', None) if datasets is None or not datasets: q = Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not datasets: return {'errors': ["No dataset available."]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) if params['pagesize'] > parser.defaults['pagesize']: # http://wiki.nginx.org/X-accel#X-Accel-Buffering response.headers['X-Accel-Buffering'] = 'no' if format == 'csv': csv_headers(response, 'entries.csv') streamer = CSVStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize']) return streamer.response() else: json_headers(filename='entries.json') streamer = JSONStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'], expand_facets=util.expand_facets if expand_facets else None, callback=request.params.get('callback')) return streamer.response() solr_browser = Browser(**params) try: solr_browser.execute() except SolrException as e: return {'errors': [unicode(e)]} entries = [] for dataset, entry in solr_browser.get_entries(): entry = entry_apply_links(dataset.name, entry) entry['dataset'] = dataset_apply_links(dataset.as_dict()) entries.append(entry) if format == 'csv': return write_csv(entries, response, filename='entries.csv') if expand_facets and len(datasets) == 1: facets = solr_browser.get_expanded_facets(datasets[0]) else: facets = solr_browser.get_facets() return to_jsonp({ 'stats': solr_browser.get_stats(), 'facets': facets, 'results': entries })
class Api2Controller(BaseController): def aggregate(self): parser = AggregateParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return {'errors': errors} params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') dataset = params.pop('dataset') format = params.pop('format') require.dataset.read(dataset) try: cache = AggregationCache(dataset) result = cache.aggregate(**params) if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links( dataset.name, result['drilldown']) response.last_modified = dataset.updated_at if cache.cache_enabled and 'cache_key' in result['summary']: etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: log.exception(ve) response.status = 400 return {'errors': ['Invalid aggregation query: %r' % ve]} if format == 'csv': return write_csv(result['drilldown'], response, filename=dataset.name + '.csv') return to_jsonp(result) @jsonpify def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return {'errors': errors} expand_facets = params.pop('expand_facet_dimensions') datasets = params.pop('dataset', None) if datasets is None or not len(datasets): q = model.Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not len(datasets): return {'errors': [_("No dataset available.")]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) b = Browser(**params) try: stats, facets, entries = b.execute() except SolrException, e: return {'errors': [unicode(e)]} _entries = [] for dataset, entry in entries: entry = entry_apply_links(dataset.name, entry) entry['dataset'] = dataset_apply_links(dataset.as_dict()) _entries.append(entry) if expand_facets and len(datasets) == 1: _expand_facets(facets, datasets[0]) return {'stats': stats, 'facets': facets, 'results': _entries}
def index(self, format='html'): c.query = request.params.items() c.add_filter = lambda f, v: '?' + urlencode(c.query + [(f, v)] if (f, v) not in c.query else c.query) c.del_filter = lambda f, v: '?' + urlencode([(k, x) for k, x in c.query if (k, x) != (f, v)]) c.results = c.datasets for language in request.params.getall('languages'): l = db.aliased(DatasetLanguage) c.results = c.results.join(l, Dataset._languages) c.results = c.results.filter(l.code == language) for territory in request.params.getall('territories'): t = db.aliased(DatasetTerritory) c.results = c.results.join(t, Dataset._territories) c.results = c.results.filter(t.code == territory) category = request.params.get('category') if category: c.results = c.results.filter(Dataset.category == category) c.results = list(c.results) c.territory_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', territories=code), 'label': COUNTRIES.get(code, code)} \ for (code, count) in DatasetTerritory.dataset_counts(c.results)] c.language_options = [{'code': code, 'count': count, 'url': h.url_for(controller='dataset', action='index', languages=code), 'label': LANGUAGES.get(code, code)} \ for (code, count) in DatasetLanguage.dataset_counts(c.results)] # TODO: figure out where to put this: ds_ids = [d.id for d in c.results] if len(ds_ids): q = db.select([Dataset.category, db.func.count(Dataset.id)], Dataset.id.in_(ds_ids), group_by=Dataset.category, order_by=db.func.count(Dataset.id).desc()) c.category_options = [{'category': category, 'count': count, 'url': h.url_for(controller='dataset', action='index', category=category), 'label': CATEGORIES.get(category, category)} \ for (category, count) in db.session.bind.execute(q).fetchall() \ if category is not None] else: c.category_options = [] c._must_revalidate = True if len(c.results): dt = max([r.updated_at for r in c.results]) etag_cache_keygen(dt) if format == 'json': results = map(lambda d: d.as_dict(), c.results) results = [dataset_apply_links(r) for r in results] return to_jsonp({ 'datasets': results, 'categories': c.category_options, 'territories': c.territory_options, 'languages': c.language_options }) elif format == 'csv': results = map(lambda d: d.as_dict(), c.results) return write_csv(results, response) c.show_rss = True return templating.render('dataset/index.html')
class Api2Controller(BaseController): def _response_params(self, params): for k, v in params.items(): k = k.replace('_', ' ').replace('-', ' ').split() k = '-'.join(['X'] + [l.capitalize() for l in k]) response.headers[k] = unicode(v).encode('ascii', 'ignore') def aggregate(self): parser = AggregateParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) params['cuts'] = params.pop('cut') params['drilldowns'] = params.pop('drilldown') dataset = params.pop('dataset') format = params.pop('format') require.dataset.read(dataset) self._response_params(params) try: cache = AggregationCache(dataset) result = cache.aggregate(**params) if 'drilldown' in result: result['drilldown'] = drilldowns_apply_links( dataset.name, result['drilldown']) response.last_modified = dataset.updated_at if cache.cache_enabled and 'cache_key' in result['summary']: etag_cache(result['summary']['cache_key']) except (KeyError, ValueError) as ve: log.exception(ve) response.status = 400 return to_jsonp({'errors': [unicode(ve)]}) if format == 'csv': return write_csv(result['drilldown'], response, filename=dataset.name + '.csv') return to_jsonp(result) def search(self): parser = SearchParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) expand_facets = params.pop('expand_facet_dimensions') format = params.pop('format') if format == 'csv': params['stats'] = False params['facet_field'] = None datasets = params.pop('dataset', None) if datasets is None or not datasets: q = model.Dataset.all_by_account(c.account) if params.get('category'): q = q.filter_by(category=params.pop('category')) datasets = q.all() expand_facets = False if not datasets: return {'errors': ["No dataset available."]} params['filter']['dataset'] = [] for dataset in datasets: require.dataset.read(dataset) params['filter']['dataset'].append(dataset.name) response.last_modified = max([d.updated_at for d in datasets]) etag_cache_keygen(parser.key(), response.last_modified) self._response_params(params) if params['pagesize'] > parser.defaults['pagesize']: # http://wiki.nginx.org/X-accel#X-Accel-Buffering response.headers['X-Accel-Buffering'] = 'no' if format == 'csv': csv_headers(response, 'entries.csv') streamer = CSVStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize']) return streamer.response() else: json_headers(filename='entries.json') streamer = JSONStreamingResponse( datasets, params, pagesize=parser.defaults['pagesize'], expand_facets=_expand_facets if expand_facets else None, callback=request.params.get('callback')) return streamer.response() b = Browser(**params) try: b.execute() except SolrException, e: return {'errors': [unicode(e)]} stats, facets, entries = b.get_stats(), b.get_facets(), b.get_entries() _entries = [] for dataset, entry in entries: entry = entry_apply_links(dataset.name, entry) entry['dataset'] = dataset_apply_links(dataset.as_dict()) _entries.append(entry) if format == 'csv': return write_csv(_entries, response, filename='entries.csv') if expand_facets and len(datasets) == 1: _expand_facets(facets, datasets[0]) return to_jsonp({ 'stats': stats, 'facets': facets, 'results': _entries })
def make_entries(self, entries): for dataset, entry in entries: entry = entry_apply_links(dataset.name, entry) entry['dataset'] = dataset_apply_links(dataset.as_dict()) yield entry