def test_delete_dataset_requires_auth(self): name = self.cra.name url = url_for('datasets_api.delete', name=name) res = self.client.delete(url, query_string={}) assert '403' in res.status, res.status ds = Dataset.by_name(name) assert ds is not None, ds
def test_create_dataset(self): url = url_for('datasets_api.create') res = self.client.post(url, data=json.dumps({}), query_string=self.auth_qs, headers={'content-type': 'application/json'}) assert '400' in res.status, res.status assert 'errors' in res.json, res.json params = { 'name': 'testds', 'label': 'Test Dataset', 'category': 'budget', 'description': 'I\'m a banana!', 'currency': 'EUR' } data = json.dumps(params) res = self.client.post(url, data=data, query_string=self.auth_qs, headers={'content-type': 'application/json'}) assert "200" in res.status, res.status assert res.json['name'] == 'testds', res.json ds = Dataset.by_name('testds') assert ds.label == params['label'], ds
def test_delete_dataset(self): name = self.cra.name url = url_for('datasets_api.delete', name=name) res = self.client.delete(url, query_string=self.auth_qs) assert '410' in res.status, res.status ds = Dataset.by_name(name) assert ds is None, ds
def test_delete_dataset_requires_auth(self): name = self.cra.name url = url_for('datasets_api.delete', name=name) res = self.client.delete(url, query_string={}) assert '403' in res.status, res.status ds = Dataset.by_name(name) assert ds is not None, ds
def test_delete_dataset(self): name = self.cra.name url = url_for('datasets_api.delete', name=name) res = self.client.delete(url, query_string=self.auth_qs) assert '410' in res.status, res.status ds = Dataset.by_name(name) assert ds is None, ds
def test_view_fields_empty(self): cra = Dataset.by_name('cra') cra.fields = {} db.session.commit() url = url_for('datasets_api.structure', name='cra') res = self.client.get(url) fields = res.json.get('fields') assert 'cap_or_cur' not in fields, res.json
def test_view_fields_empty(self): cra = Dataset.by_name('cra') cra.fields = {} db.session.commit() url = url_for('datasets_api.structure', name='cra') res = self.client.get(url) fields = res.json.get('fields') assert 'cap_or_cur' not in fields, res.json
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if source_name is None: return artifact = tasks.transform_source(dataset, source_name) if artifact is None: return tasks.load(dataset, source_name=source_name)
def load_from_url(dataset_name, url): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return source = tasks.extract_url(dataset, url) if source is not None: load_from_source.delay(dataset_name, source.name)
def load_from_url(dataset_name, url): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return source = tasks.extract_url(dataset, url) if source is not None: load_from_source.delay(dataset_name, source.name)
def test_publish(self): cra = Dataset.by_name('cra') cra.private = True db.session.commit() url = url_for('datasets_api.view', name='cra') res = self.client.get(url) assert '403' in res.status, res.status res = self.client.get(url, query_string={'api_key': self.user.api_key}) assert '200' in res.status, res.status data = res.json.copy() data['category'] = 'budget' data['private'] = False response = self.client.post(url, data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '200' in response.status, response.json cra = Dataset.by_name('cra') assert cra.private is False, cra.private
def test_publish(self): cra = Dataset.by_name('cra') cra.private = True db.session.commit() url = url_for('datasets_api.view', name='cra') res = self.client.get(url) assert '403' in res.status, res.status res = self.client.get(url, query_string={'api_key': self.user.api_key}) assert '200' in res.status, res.status data = res.json.copy() data['category'] = 'budget' data['private'] = False response = self.client.post( url, data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '200' in response.status, response.json cra = Dataset.by_name('cra') assert cra.private is False, cra.private
def create(): require.dataset.create() dataset = request_data() data = validate_dataset(dataset) if Dataset.by_name(data['name']) is not None: raise Invalid(SchemaNode(String(), name='name'), _("A dataset with this identifer already exists!")) dataset = Dataset({'dataset': data, 'model': {}}) dataset.managers.append(current_user) db.session.add(dataset) db.session.commit() return view(dataset.name)
def authz(): obj = Dataset.by_name(request.args.get('dataset')) etag_cache_keygen(obj, private=True) if obj is None: return jsonify({ 'read': False, 'update': False }) return jsonify({ 'read': dataset.read(obj), 'update': dataset.update(obj) })
def test_update_invalid_label(self): data = {'name': 'cra', 'label': '', 'description': 'I\'m a banana', 'currency': 'GBP'} res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '400' in res.status, res.status assert 'Shorter than' in res.data, res.json cra = Dataset.by_name('cra') assert cra.label != '', cra.label
def create(): require.dataset.create() dataset = request_data() data = validate_dataset(dataset) if Dataset.by_name(data['name']) is not None: raise Invalid(SchemaNode(String(), name='name'), _("A dataset with this identifer already exists!")) dataset = Dataset({'dataset': data, 'model': {}}) dataset.managers.append(current_user) db.session.add(dataset) db.session.commit() return view(dataset.name)
def test_update_invalid_currency(self): data = {'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'category': 'budget', 'currency': 'glass pearls'} res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert 'not one of' in res.data, res.json cra = Dataset.by_name('cra') assert cra.currency == 'GBP', cra.label
def get_or_create_dataset(model): """ Based on a provided model we get the model (if it doesn't exist we create it). """ dataset = Dataset.by_name(model['dataset']['name']) # If the dataset wasn't found we create it if dataset is None: dataset = Dataset(model) db.session.add(dataset) db.session.commit() log.info("Dataset: %s", dataset.name) return dataset
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return if source_name is None: log.error("No source specified: %s", dataset_name) return source = tasks.transform_source(dataset, source_name) if source is None: return tasks.load(dataset, source_name=source_name)
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return if source_name is None: log.error("No source specified: %s", dataset_name) return artifact = tasks.transform_source(dataset, source_name) if artifact is None: return tasks.load(dataset, source_name=source_name)
def test_update(self): data = {'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'EUR', 'languages': ['en'], 'territories': ['GB'], 'category': 'budget'} res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) cra = Dataset.by_name('cra') assert cra.label == 'Common Rough Act', (cra.label, res.json) assert cra.currency == 'EUR', (cra.currency, res.json)
def test_update_invalid_territory(self): data = {'name': 'cra', 'label': 'CRA', 'territories': ['su'], 'description': 'I\'m a banana', 'currency': 'GBP'} response = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '400' in response.status, response.status assert 'updated' not in response.data cra = Dataset.by_name('cra') assert 'su' not in cra.territories
def test_update_invalid_category(self): data = {'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'EUR', 'languages': ['en'], 'territories': ['GB'], 'category': 'foo'} res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '400' in res.status, res.status assert 'not one of' in res.data, res.json cra = Dataset.by_name('cra') assert cra.label != 'Common Rough Act', cra.label
def test_update_invalid_label(self): data = { 'name': 'cra', 'label': '', 'description': 'I\'m a banana', 'currency': 'GBP' } res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '400' in res.status, res.status assert 'Shorter than' in res.data, res.json cra = Dataset.by_name('cra') assert cra.label != '', cra.label
def test_update_invalid_currency(self): data = { 'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'category': 'budget', 'currency': 'glass pearls' } res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert 'not one of' in res.data, res.json cra = Dataset.by_name('cra') assert cra.currency == 'GBP', cra.label
def cube(self, name, locale=None): dataset = Dataset.by_name(name) if name is None: raise NoSuchCubeError("Unknown dataset %s" % name, name) measures, dimensions, mappings = [], [], {} aggregates = [MeasureAggregate('num_entries', label='Numer of entries', function='count')] for measure in dataset.model.measures: cubes_measure = Measure(measure.name, label=measure.label) measures.append(cubes_measure) aggregate = MeasureAggregate(measure.name, label=measure.label, measure=measure.name, function='sum') aggregates.append(aggregate) mappings[measure.name] = measure.column for dimension in dataset.model.dimensions: attributes = [] for attr in dimension.attributes: attributes.append(attr.name) mappings[attr.path] = attr.column meta = { 'label': dimension.label, 'name': dimension.name, 'levels': [{ 'name': dimension.name, 'label': dimension.label, # 'key': 'name', 'attributes': attributes }] } dimensions.append(create_dimension(meta)) return Cube(name=dataset.name, fact=dataset.fact_table.table.name, aggregates=aggregates, measures=measures, label=dataset.label, description=dataset.description, dimensions=dimensions, store=self.store, mappings=mappings)
def test_update_invalid_territory(self): data = { 'name': 'cra', 'label': 'CRA', 'territories': ['su'], 'description': 'I\'m a banana', 'currency': 'GBP' } response = self.client.post( url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '400' in response.status, response.status assert 'updated' not in response.data cra = Dataset.by_name('cra') assert 'su' not in cra.territories
def test_update(self): data = { 'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'EUR', 'languages': ['en'], 'territories': ['GB'], 'category': 'budget' } res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) cra = Dataset.by_name('cra') assert cra.label == 'Common Rough Act', (cra.label, res.json) assert cra.currency == 'EUR', (cra.currency, res.json)
def test_update_invalid_category(self): data = { 'name': 'cra', 'label': 'Common Rough Act', 'description': 'I\'m a banana', 'currency': 'EUR', 'languages': ['en'], 'territories': ['GB'], 'category': 'foo' } res = self.client.post(url_for('datasets_api.update', name='cra'), data=json.dumps(data), headers={'content-type': 'application/json'}, query_string={'api_key': self.user.api_key}) assert '400' in res.status, res.status assert 'not one of' in res.data, res.json cra = Dataset.by_name('cra') assert cra.label != 'Common Rough Act', cra.label
def test_create_dataset(self): url = url_for('datasets_api.create') res = self.client.post(url, data=json.dumps({}), query_string=self.auth_qs, headers={'content-type': 'application/json'}) assert '400' in res.status, res.status assert 'errors' in res.json, res.json params = {'name': 'testds', 'label': 'Test Dataset', 'category': 'budget', 'description': 'I\'m a banana!', 'currency': 'EUR'} data = json.dumps(params) res = self.client.post(url, data=data, query_string=self.auth_qs, headers={'content-type': 'application/json'}) assert "200" in res.status, res.status assert res.json['name'] == 'testds', res.json ds = Dataset.by_name('testds') assert ds.label == params['label'], ds
def cube(self, name, locale=None, namespace=None): dataset = Dataset.by_name(name) if name is None: raise NoSuchCubeError("Unknown dataset %s" % name, name) measures, dimensions, mappings = [], [], {} aggregates = [ MeasureAggregate('fact_count', label='Number of entries', function='count') ] for measure in dataset.model.measures: cubes_measure = Measure(measure.name, label=measure.label) measures.append(cubes_measure) aggregate = MeasureAggregate(measure.name + '_sum', label=measure.label, measure=measure.name, function='sum') aggregates.append(aggregate) mappings[measure.name] = measure.column for dimension in dataset.model.dimensions: attributes, last_col = [], None for attr in dimension.attributes: attributes.append({'name': attr.name, 'label': attr.label}) mappings[attr.path] = last_col = attr.column # Workaround because the cubes mapper shortens references # for single-attribute dimensions to just the dimension name. if len(attributes) == 1: mappings[dimension.name] = last_col # Translate into cubes' categories cardinality = 'high' if dimension.cardinality: if dimension.cardinality < 6: cardinality = 'tiny' elif dimension.cardinality < 51: cardinality = 'low' elif dimension.cardinality < 1001: cardinality = 'medium' meta = { 'label': dimension.label, 'name': dimension.name, 'cardinality': cardinality, 'levels': [{ 'name': dimension.name, 'label': dimension.label, 'cardinality': cardinality, 'attributes': attributes }] } if dimension.key_attribute: meta['levels'][0]['key'] = dimension.key_attribute.name if dimension.label_attribute: meta['levels'][0]['label_attribute'] = \ dimension.label_attribute.name meta['levels'][0]['order_attribute'] = \ dimension.label_attribute.name dimensions.append(Dimension.from_metadata(meta)) cube = Cube(name=dataset.name, fact=dataset.fact_table.table.name, aggregates=aggregates, measures=measures, label=dataset.label, description=dataset.description, dimensions=dimensions, store=self.store, mappings=mappings) link_cube(cube, locale, provider=self, namespace=namespace) return cube
def load_from_url(dataset_name, url): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) source = tasks.extract_url(dataset, url) if source is not None: load_from_source.delay(dataset_name, source.name)
def authz(): obj = Dataset.by_name(request.args.get("dataset")) if obj is None: return jsonify({"read": False, "update": False}) return jsonify({"read": dataset.read(obj), "update": dataset.update(obj)})
def cube(self, name, locale=None, namespace=None): dataset = Dataset.by_name(name) if name is None: raise NoSuchCubeError("Unknown dataset %s" % name, name) measures, dimensions, mappings = [], [], {} aggregates = [MeasureAggregate("fact_count", label="Number of entries", function="count")] for measure in dataset.model.measures: cubes_measure = Measure(measure.name, label=measure.label) measures.append(cubes_measure) aggregate = MeasureAggregate( measure.name + "_sum", label=measure.label, measure=measure.name, function="sum" ) aggregates.append(aggregate) mappings[measure.name] = measure.column for dimension in dataset.model.dimensions: attributes, last_col = [], None for attr in dimension.attributes: attributes.append({"name": attr.name, "label": attr.label}) mappings[attr.path] = last_col = attr.column # Workaround because the cubes mapper shortens references # for single-attribute dimensions to just the dimension name. if len(attributes) == 1: mappings[dimension.name] = last_col # Translate into cubes' categories cardinality = "high" if dimension.cardinality: if dimension.cardinality < 6: cardinality = "tiny" elif dimension.cardinality < 51: cardinality = "low" elif dimension.cardinality < 1001: cardinality = "medium" meta = { "label": dimension.label, "name": dimension.name, "cardinality": cardinality, "levels": [ { "name": dimension.name, "label": dimension.label, "cardinality": cardinality, "attributes": attributes, } ], } if dimension.key_attribute: meta["levels"][0]["key"] = dimension.key_attribute.name if dimension.label_attribute: meta["levels"][0]["label_attribute"] = dimension.label_attribute.name meta["levels"][0]["order_attribute"] = dimension.label_attribute.name dimensions.append(Dimension.from_metadata(meta)) cube = Cube( name=dataset.name, fact=dataset.fact_table.table.name, aggregates=aggregates, measures=measures, label=dataset.label, description=dataset.description, dimensions=dimensions, store=self.store, mappings=mappings, ) link_cube(cube, locale, provider=self, namespace=namespace) return cube
def has_cube(self, name): dataset = Dataset.by_name(name) if dataset is None: return False return dataset.has_model
def authz(): obj = Dataset.by_name(request.args.get('dataset')) if obj is None: return jsonify({'read': False, 'update': False}) return jsonify({'read': dataset.read(obj), 'update': dataset.update(obj)})
def has_cube(self, name): dataset = Dataset.by_name(name) if dataset is None: return False return dataset.has_model
def create_budget_data_package(url, user, private): try: bdpkg = BudgetDataPackage(url) except Exception as problem: # Lots of different types of problems can arise with a # BudgetDataPackage, but their message should be understandable # so we catch just any Exception and email it's message to the user log.error("Failed to parse budget data package: {0}".format( problem.message)) return [] sources = [] for (idx, resource) in enumerate(bdpkg.resources): dataset = Dataset.by_name(bdpkg.name) if dataset is None: # Get information from the descriptior file for the given # resource (at index idx) info = get_dataset_info_from_descriptor(bdpkg, idx) # Set the dataset name based on the previously computed one info['dataset']['name'] = bdpkg.name # Create the model from the resource schema model = create_model_from_schema(resource.schema) # Set the default value for the time to the fiscal year of the # resource, because it isn't included in the budget CSV so we # won't be able to load it along with the data. model['time']['default_value'] = resource.fiscalYear # Add the model as the mapping info['mapping'] = model # Create the dataset dataset = Dataset(info) dataset.managers.append(user) dataset.private = private db.session.add(dataset) db.session.commit() else: if not dataset.can_update(user): log.error( "User {0} not permitted to update dataset {1}".format( user.name, bdpkg.name)) return [] if 'url' in resource: resource_url = resource.url elif 'path' in resource: if 'base' in bdpkg: resource_url = urlparse.urljoin(bdpkg.base, resource.path) else: resource_url = urlparse.urljoin(url, resource.path) else: log.error('Url not found') return [] # We do not re-add old sources so if we find the same source # we don't do anything, else we create the source and append it # to the source list for dataset_source in dataset.sources: if dataset_source.url == resource_url: break else: source = Source(dataset=dataset, creator=user, url=resource_url) db.session.add(source) db.session.commit() sources.append(source) return sources
def get_dataset(name): dataset = obj_or_404(Dataset.by_name(name)) require.dataset.read(dataset) return dataset
def authz(): obj = Dataset.by_name(request.args.get('dataset')) etag_cache_keygen(obj, private=True) if obj is None: return jsonify({'read': False, 'update': False}) return jsonify({'read': dataset.read(obj), 'update': dataset.update(obj)})
def cube(self, name, locale=None, namespace=None): dataset = Dataset.by_name(name) if name is None: raise NoSuchCubeError("Unknown dataset %s" % name, name) measures, dimensions, mappings = [], [], {} aggregates = [MeasureAggregate('fact_count', label='Number of entries', function='count')] for measure in dataset.model.measures: cubes_measure = Measure(measure.name, label=measure.label) measures.append(cubes_measure) aggregate = MeasureAggregate(measure.name + '_sum', label=measure.label, measure=measure.name, function='sum') aggregates.append(aggregate) mappings[measure.name] = measure.column_name for dimension in dataset.model.dimensions: attributes, last_col = [], None for attr in dimension.attributes: attributes.append({ 'name': attr.name, 'label': attr.label }) mappings[attr.ref] = last_col = attr.column_name # Workaround because the cubes mapper shortens references # for single-attribute dimensions to just the dimension name. if len(attributes) == 1: mappings[dimension.name] = last_col meta = { 'label': dimension.label, 'name': dimension.name, 'cardinality': dimension.cardinality_class, 'levels': [{ 'name': dimension.name, 'label': dimension.label, 'cardinality': dimension.cardinality_class, 'attributes': attributes }] } if dimension.key_attribute: meta['levels'][0]['key'] = dimension.key_attribute.name if dimension.label_attribute: meta['levels'][0]['label_attribute'] = \ dimension.label_attribute.name meta['levels'][0]['order_attribute'] = \ dimension.label_attribute.name dimensions.append(Dimension.from_metadata(meta)) cube = Cube(name=dataset.name, fact=dataset.fact_table.table.name, aggregates=aggregates, measures=measures, label=dataset.label, description=dataset.description, dimensions=dimensions, store=self.store, mappings=mappings) link_cube(cube, locale, provider=self, namespace=namespace) return cube