def create(self): require.dataset.create() try: dataset = dict(request.params) dataset['territories'] = request.params.getall('territories') dataset['languages'] = request.params.getall('languages') model = {'dataset': dataset} schema = dataset_schema(ValidationState(model)) data = schema.deserialize(dataset) if Dataset.by_name(data['name']) is not None: raise Invalid( SchemaNode(String(), name='dataset.name'), _("A dataset with this identifer already exists!")) dataset = Dataset({'dataset': data}) dataset.private = True dataset.managers.append(c.account) db.session.add(dataset) db.session.commit() redirect( h.url_for(controller='editor', action='index', dataset=dataset.name)) except Invalid as i: errors = i.asdict() return self.new(errors)
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Check if the params are there ('metadata', 'csv_file') if len(request.params) != 2: abort(status_code=400, detail='incorrect number of params') metadata = request.params['metadata'] \ if 'metadata' in request.params \ else abort(status_code=400, detail='metadata is missing') csv_file = request.params['csv_file'] \ if 'csv_file' in request.params \ else abort(status_code=400, detail='csv_file is missing') # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: abort(status_code=400, detail='JSON model could not be parsed') try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) abort(status_code=400, detail='Model is not well formed') dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = True # Default value db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def load_with_model_and_csv(self, metadata, csv_file, private): """ Load a dataset using a metadata model file and a csv file """ if metadata is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if csv_file is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = private db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def test_views_update(self): cra = Dataset.by_name('cra') views = cra.data['views'] views[0]['label'] = 'Banana' response = self.app.post(url(controller='editor', action='views_update', dataset='cra'), params={'views': json.dumps(views)}, extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '200' in response.status, response.status cra = Dataset.by_name('cra') assert 'Banana' in repr(cra.data['views'])
def test_retract(self): cra = Dataset.by_name('cra') assert cra.private is False, cra.private response = self.app.post(url(controller='editor', action='retract', dataset='cra'), extra_environ={'REMOTE_USER': '******'}) cra = Dataset.by_name('cra') assert cra.private is True, cra.private response = self.app.post(url(controller='editor', action='retract', dataset='cra'), extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '400' in response.status, response.status
def test_publish(self): cra = Dataset.by_name('cra') cra.private = True db.session.commit() response = self.app.post(url(controller='editor', action='publish', dataset='cra'), extra_environ={'REMOTE_USER': '******'}) cra = Dataset.by_name('cra') assert cra.private is False, cra.private response = self.app.post(url(controller='editor', action='publish', dataset='cra'), extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '400' in response.status, response.status
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def permissions(self): """ Check a user's permissions for a given dataset. This could also be done via request to the user, but since we're not really doing a RESTful service we do this via the api instead. """ # Check the parameters. Since we only use one parameter we check it # here instead of creating a specific parameter parser if len(request.params) != 1 or 'dataset' not in request.params: return to_jsonp({'error': 'Parameter dataset missing'}) # Get the dataset we want to check permissions for dataset = Dataset.by_name(request.params['dataset']) # Return permissions return to_jsonp({ "create": can.dataset.create() and dataset is None, "read": False if dataset is None else can.dataset.read(dataset), "update": False if dataset is None else can.dataset.update(dataset), "delete": False if dataset is None else can.dataset.delete(dataset) })
def archive_one(dataset_name, archive_dir): """ Find the dataset, create the archive directory and start archiving """ # Find the dataset dataset = Dataset.by_name(dataset_name) # If no dataset found, exit with error message if dataset is None: exit_with_error("Dataset not found. Unable to archive it.") # If the archive_dir exists we have to ask the user if we should overwrite if os.path.exists(archive_dir): # If user doesn't want to write over it we exit if not get_confirmation("%s exists. Do you want to overwrite?" % archive_dir): sys.exit(0) # If the archive dir is a file we don't do anything if os.path.isfile(archive_dir): exit_with_error("Cannot overwrite a file (need a directory).") # If the archive_dir doesn't exist we create it else: try: os.makedirs(archive_dir) except OSError: # If we couldn't create it, we exit with an error message exit_with_error("Couldn't create archive directory.") # Archive the model (dataset metadata) archive_model(dataset, archive_dir) # Archive the visualisations archive_visualisations(dataset, archive_dir) # Download all sources update(os.path.join(archive_dir, "sources"), dataset)
def archive_one(dataset_name, archive_dir): """ Find the dataset, create the archive directory and start archiving """ # Find the dataset dataset = Dataset.by_name(dataset_name) # If no dataset found, exit with error message if dataset is None: exit_with_error("Dataset not found. Unable to archive it.") # If the archive_dir exists we have to ask the user if we should overwrite if os.path.exists(archive_dir): # If user doesn't want to write over it we exit if not get_confirmation( "%s exists. Do you want to overwrite?" % archive_dir): sys.exit(0) # If the archive dir is a file we don't do anything if os.path.isfile(archive_dir): exit_with_error("Cannot overwrite a file (need a directory).") # If the archive_dir doesn't exist we create it else: try: os.makedirs(archive_dir) except OSError: # If we couldn't create it, we exit with an error message exit_with_error("Couldn't create archive directory.") # Archive the model (dataset metadata) archive_model(dataset, archive_dir) # Archive the visualisations archive_visualisations(dataset, archive_dir) # Download all sources update(os.path.join(archive_dir, 'sources'), dataset)
def setup(self): super(TestAttributeDimension, self).setup() self.engine = db.engine self.meta = db.metadata self.meta.bind = self.engine self.ds = Dataset(model_fixture('simple')) self.field = self.ds['field']
def test_new_wrong_user(self): # First we add a Dataset with user 'test_new' user = Account.by_name('test_new') assert user.api_key == 'd0610659-627b-4403-8b7f-6e2820ebc95d' u = url(controller='api/version2', action='create') params = { 'metadata': 'https://dl.dropbox.com/u/3250791/sample-openspending-model.json', 'csv_file': 'http://mk.ucant.org/info/data/sample-openspending-dataset.csv' } apikey_header = 'apikey {0}'.format(user.api_key) response = self.app.post(u, params, {'Authorization': apikey_header}) assert "200" in response.status assert Dataset.by_name('openspending-example') is not None # After that we try to update the Dataset with user 'test_new2' user = Account.by_name('test_new2') assert user.api_key == 'c011c340-8dad-419c-8138-1c6ded86ead5' u = url(controller='api/version2', action='create') params = { 'metadata': 'https://dl.dropbox.com/u/3250791/sample-openspending-model.json', 'csv_file': 'http://mk.ucant.org/info/data/sample-openspending-dataset.csv' } apikey_header = 'apikey {0}'.format(user.api_key) response = self.app.post(u, params, {'Authorization': apikey_header}, expect_errors=True) assert '403' in response.status
def test_delete(self): cra = Dataset.by_name('cra') assert len(cra) == 36, len(cra) # double-check authz response = self.app.post(url(controller='editor', action='delete', dataset='cra'), expect_errors=True) assert '403' in response.status cra = Dataset.by_name('cra') assert len(cra) == 36, len(cra) response = self.app.post(url(controller='editor', action='delete', dataset='cra'), extra_environ={'REMOTE_USER': '******'}) cra = Dataset.by_name('cra') assert cra is None, cra
def permissions(self): """ Check a user's permissions for a given dataset. This could also be done via request to the user, but since we're not really doing a RESTful service we do this via the api instead. """ # Check the parameters. Since we only use one parameter we check it # here instead of creating a specific parameter parser if len(request.params) != 1 or 'dataset' not in request.params: return to_jsonp({'error': 'Parameter dataset missing'}) # Get the dataset we want to check permissions for dataset = Dataset.by_name(request.params['dataset']) # Return permissions return to_jsonp( { "create": can.dataset.create() and dataset is None, "read": False if dataset is None else can.dataset.read(dataset), "update": False if dataset is None else can.dataset.update(dataset), "delete": False if dataset is None else can.dataset.delete(dataset)})
def test_index_hide_private(self): cra = Dataset.by_name('cra') cra.private = True db.session.commit() response = self.app.get( url(controller='dataset', action='index', format='json')) obj = json.loads(response.body) assert len(obj['datasets']) == 0
def index(self): # Get all of the datasets available to the account of the logged in # or an anonymous user (if c.account is None) c.datasets = Dataset.all_by_account(c.account) c.territories = DatasetTerritory.dataset_counts(c.datasets) c.num_entries = dataset_entries(None) return templating.render('home/index.html')
def get_or_create_dataset(model): """ Based on a provided model we get the model (if it doesn't exist we create it). """ # Get the dataset by the name provided in the model dataset = Dataset.by_name(model['dataset']['name']) # If the dataset wasn't found we create it if dataset is None: dataset = Dataset(model) db.session.add(dataset) db.session.commit() # Log information about the dataset and return it log.info("Dataset: %s", dataset.name) return dataset
def test_templates_update(self): response = self.app.post(url(controller='editor', action='templates_update', dataset='cra'), params={'serp_title': 'BANANA'}, extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '200' in response.status, response.status cra = Dataset.by_name('cra') assert cra.serp_title == 'BANANA', cra.serp_title
def test_team_update(self): response = self.app.post(url(controller='editor', action='team_update', dataset='cra'), params={}, extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '200' in response.status, response.status cra = Dataset.by_name('cra') assert len(cra.managers.all()) == 1, cra.managers
def test_feeds(self): # Anonymous user with one public dataset response = self.app.get(url(controller='dataset', action='feed_rss'), expect_errors=True) assert 'application/xml' in response.content_type assert '<title>Recently Created Datasets</title>' in response assert '<item><title>Country Regional Analysis v2009' in response cra = Dataset.by_name('cra') cra.private = True db.session.add(cra) db.session.commit() # Anonymous user with one private dataset response = self.app.get(url(controller='dataset', action='feed_rss'), expect_errors=True) assert 'application/xml' in response.content_type assert '<title>Recently Created Datasets</title>' in response assert '<item><title>Country Regional Analysis v2009' not in response # Logged in user with one public dataset cra.private = False db.session.add(cra) db.session.commit() response = self.app.get(url(controller='dataset', action='feed_rss'), expect_errors=True, extra_environ={'REMOTE_USER': '******'}) assert 'application/xml' in response.content_type assert '<title>Recently Created Datasets</title>' in response assert '<item><title>Country Regional Analysis v2009' in response # Logged in user with one private dataset cra.private = True db.session.add(cra) db.session.commit() response = self.app.get(url(controller='dataset', action='feed_rss'), expect_errors=True, extra_environ={'REMOTE_USER': '******'}) assert 'application/xml' in response.content_type assert '<title>Recently Created Datasets</title>' in response assert '<item><title>Country Regional Analysis v2009' not in response # Logged in admin user with one private dataset admin_user = make_account('admin') admin_user.admin = True db.session.add(admin_user) db.session.commit() response = self.app.get(url(controller='dataset', action='feed_rss'), extra_environ={'REMOTE_USER': '******'}) assert '<title>Recently Created Datasets</title>' in response assert '<item><title>Country Regional Analysis v2009' in response assert 'application/xml' in response.content_type response = self.app.get(url(controller='dataset', action='index')) assert ('<link rel="alternate" type="application/rss+xml" title="' 'Latest Datasets on OpenSpending" href="/datasets.rss"' in response)
def test_core_update_invalid_label(self): response = self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={'name': 'cra', 'label': '', 'description': 'I\'m a banana', 'currency': 'GBP'}, extra_environ={'REMOTE_USER': '******'}) assert 'Required' in response.body cra = Dataset.by_name('cra') assert cra.label != '', cra.label
def load_fixture(name, manager=None): """ Load fixture data into the database. """ model = model_fixture(name) dataset = Dataset(model) dataset.updated_at = datetime.utcnow() if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.model.generate() data = data_fixture(name) reader = csv.DictReader(data) for row in reader: entry = convert_types(model['mapping'], row) dataset.model.load(entry) data.close() return dataset
def dataset_index(account, source=None): # Get all of the public datasets ordered by when they were last updated results = Dataset.all_by_account(account, order=False) results = results.order_by(Dataset.updated_at.desc()) # Filter category if that has been provided if source: results = results.filter(Dataset.source == source) return list(results)
def test_view_private(self): cra = Dataset.by_name('cra') cra.private = True db.session.commit() response = self.app.get(url(controller='dataset', action='view', dataset='cra'), status=403) assert 'Country Regional Analysis v2009' not in response, \ "'Country Regional Analysis v2009' in response!" assert 'openspending_browser' not in response, \ "'openspending_browser' in response!"
def parse_dataset(self, dataset_name): if not dataset_name: self._error('dataset name not provided') return dataset = Dataset.by_name(dataset_name) if dataset is None: self._error('no dataset with name "%s"' % dataset_name) return return dataset
def parse_dataset(self, dataset): datasets = [] if dataset: for name in dataset.split('|'): dataset = Dataset.by_name(name) if dataset is None: self._error('no dataset with name "%s"' % name) return datasets.append(dataset) return datasets
def test_new_no_apikey(self): u = url(controller='api/version2', action='create') params = { 'metadata': 'https://dl.dropbox.com/u/3250791/sample-openspending-model.json', 'csv_file': 'http://mk.ucant.org/info/data/sample-openspending-dataset.csv' } response = self.app.post(u, params, expect_errors=True) assert "400" in response.status assert Dataset.by_name('openspending-example') is None
def test_dimensions_edit_mask_with_data(self): cra = Dataset.by_name('cra') src = Source(cra, self.user, 'file:///dev/null') src.analysis = {'columns': ['amount', 'etc']} db.session.add(src) db.session.commit() response = self.app.get(url(controller='editor', action='dimensions_edit', dataset='cra'), extra_environ={'REMOTE_USER': '******'}) assert 'cannot edit dimensions' in response.body assert '"amount"' not in response.body assert 'Update' not in response.body
def test_dimensions_update_invalid_json(self): cra = Dataset.by_name('cra') cra.drop() cra.init() cra.generate() response = self.app.post(url(controller='editor', action='dimensions_update', dataset='cra'), params={'mapping': 'banana'}, extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '400' in response.status, response.status
def test_core_update_invalid_currency(self): response = self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'glass pearls', 'default_time': 2009}, extra_environ={'REMOTE_USER': '******'}) assert 'not a valid currency' in response.body cra = Dataset.by_name('cra') assert cra.currency == 'GBP', cra.label
def test_core_update(self): self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'EUR', 'languages': 'en', 'territories': 'gb', 'category': 'budget', 'default_time': 2009}, extra_environ={'REMOTE_USER': '******'}) cra = Dataset.by_name('cra') assert cra.label == 'Common Rough Act', cra.label assert cra.currency == 'EUR', cra.currency
def setup(self): super(TestDimensionController, self).setup() load_fixture('cra') clean_and_reindex_solr() self.cra = Dataset.by_name('cra') for dimension in self.cra.dimensions: if isinstance(dimension, CompoundDimension) and \ dimension.name == 'cofog1': members = list( dimension.members(dimension.alias.c.name == '3', limit=1)) self.member = members.pop() break
def test_core_update_invalid_territory(self): response = self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={'name': 'cra', 'label': 'CRA', 'territories': 'su', 'description': 'I\'m a banana', 'currency': 'GBP', 'default_time': 2009}, extra_environ={'REMOTE_USER': '******'}) assert 'updated' not in response.body cra = Dataset.by_name('cra') assert 'su' not in cra.territories
def setup(self): super(TestDimensionController, self).setup() load_fixture('cra') clean_and_reindex_solr() self.cra = Dataset.by_name('cra') for dimension in self.cra.dimensions: if isinstance(dimension, CompoundDimension) and \ dimension.name == 'cofog1': members = list(dimension.members( dimension.alias.c.name == '3', limit=1)) self.member = members.pop() break
def test_core_update_invalid_category(self): response = self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'EUR', 'languages': 'en', 'territories': 'gb', 'category': 'foo', 'default_time': 2009}, extra_environ={'REMOTE_USER': '******'}) assert 'valid category' in response.body cra = Dataset.by_name('cra') assert cra.label != 'Common Rough Act', cra.label
def test_core_update_invalid_label(self): response = self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={ 'name': 'cra', 'label': '', 'description': 'I\'m a banana', 'currency': 'GBP' }, extra_environ={'REMOTE_USER': '******'}) assert 'Required' in response.body cra = Dataset.by_name('cra') assert cra.label != '', cra.label
def test_new_dataset(self): user = Account.by_name('test_new') assert user.api_key == 'd0610659-627b-4403-8b7f-6e2820ebc95d' u = url(controller='api/version2', action='create') params = { 'metadata': 'https://dl.dropbox.com/u/3250791/sample-openspending-model.json', 'csv_file': 'http://mk.ucant.org/info/data/sample-openspending-dataset.csv' } apikey_header = 'apikey {0}'.format(user.api_key) response = self.app.post(u, params, {'Authorization': apikey_header}) assert "200" in response.status assert Dataset.by_name('openspending-example') is not None
def test_core_update_invalid_currency(self): response = self.app.post(url(controller='editor', action='core_update', dataset='cra'), params={ 'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'glass pearls', 'default_time': 2009 }, extra_environ={'REMOTE_USER': '******'}) assert 'not a valid currency' in response.body cra = Dataset.by_name('cra') assert cra.currency == 'GBP', cra.label