def create(self): require.dataset.create() try: dataset = dict(request.params) dataset['territories'] = request.params.getall('territories') dataset['languages'] = request.params.getall('languages') model = {'dataset': dataset} schema = dataset_schema(ValidationState(model)) data = schema.deserialize(dataset) if Dataset.by_name(data['name']) is not None: raise Invalid( SchemaNode(String(), name='dataset.name'), _("A dataset with this identifer already exists!")) dataset = Dataset({'dataset': data}) dataset.private = True dataset.managers.append(c.account) db.session.add(dataset) db.session.commit() redirect( h.url_for(controller='editor', action='index', dataset=dataset.name)) except Invalid as i: errors = i.asdict() return self.new(errors)
def setup(self): super(TestAttributeDimension, self).setup() self.engine = db.engine self.meta = db.metadata self.meta.bind = self.engine self.ds = Dataset(model_fixture('simple')) self.field = self.ds['field']
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Check if the params are there ('metadata', 'csv_file') if len(request.params) != 2: abort(status_code=400, detail='incorrect number of params') metadata = request.params['metadata'] \ if 'metadata' in request.params \ else abort(status_code=400, detail='metadata is missing') csv_file = request.params['csv_file'] \ if 'csv_file' in request.params \ else abort(status_code=400, detail='csv_file is missing') # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: abort(status_code=400, detail='JSON model could not be parsed') try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) abort(status_code=400, detail='Model is not well formed') dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = True # Default value db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def load_with_model_and_csv(self, metadata, csv_file, private): """ Load a dataset using a metadata model file and a csv file """ if metadata is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if csv_file is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = private db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def get_or_create_dataset(model): """ Based on a provided model we get the model (if it doesn't exist we create it). """ # Get the dataset by the name provided in the model dataset = Dataset.by_name(model['dataset']['name']) # If the dataset wasn't found we create it if dataset is None: dataset = Dataset(model) db.session.add(dataset) db.session.commit() # Log information about the dataset and return it log.info("Dataset: %s", dataset.name) return dataset
def load_fixture(name, manager=None): """ Load fixture data into the database. """ model = model_fixture(name) dataset = Dataset(model) dataset.updated_at = datetime.utcnow() if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.model.generate() data = data_fixture(name) reader = csv.DictReader(data) for row in reader: entry = convert_types(model['mapping'], row) dataset.model.load(entry) data.close() return dataset
def setup(self): super(TestDatasetLoad, self).setup() self.ds = Dataset(model_fixture('simple')) self.ds.generate() self.engine = db.engine
def setup(self): super(TestDataset, self).setup() self.model = model_fixture('simple') self.ds = Dataset(self.model)
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Parse the loading api parameters to get them into the right format parser = LoadingAPIParamParser(request.params) params, errors = parser.parse() if errors: response.status = 400 return to_jsonp({'errors': errors}) if params['metadata'] is None: response.status = 400 return to_jsonp({'errors': 'metadata is missing'}) if params['csv_file'] is None: response.status = 400 return to_jsonp({'errors': 'csv_file is missing'}) # We proceed with the dataset try: model = json.load(urllib2.urlopen(params['metadata'])) except: response.status = 400 return to_jsonp({'errors': 'JSON model could not be parsed'}) try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) response.status = 400 return to_jsonp({'errors': 'Model is not well formed'}) dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = params['private'] db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=params['csv_file']) log.info(source) for source_ in dataset.sources: if source_.url == params['csv_file']: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def create_budget_data_package(url, user, private): try: bdpkg = BudgetDataPackage(url) except Exception as problem: # Lots of different types of problems can arise with a # BudgetDataPackage, but their message should be understandable # so we catch just any Exception and email it's message to the user log.error("Failed to parse budget data package: {0}".format( problem.message)) return [] sources = [] for (idx, resource) in enumerate(bdpkg.resources): dataset = Dataset.by_name(bdpkg.name) if dataset is None: # Get information from the descriptior file for the given # resource (at index idx) info = get_dataset_info_from_descriptor(bdpkg, idx) # Set the dataset name based on the previously computed one info['dataset']['name'] = bdpkg.name # Create the model from the resource schema model = create_model_from_schema(resource.schema) # Set the default value for the time to the fiscal year of the # resource, because it isn't included in the budget CSV so we # won't be able to load it along with the data. model['time']['default_value'] = resource.fiscalYear # Add the model as the mapping info['mapping'] = model # Create the dataset dataset = Dataset(info) dataset.managers.append(user) dataset.private = private db.session.add(dataset) db.session.commit() else: if not dataset.can_update(user): log.error( "User {0} not permitted to update dataset {1}".format( user.name, bdpkg.name)) return [] if 'url' in resource: resource_url = resource.url elif 'path' in resource: if 'base' in bdpkg: resource_url = urlparse.urljoin(bdpkg.base, resource.path) else: resource_url = urlparse.urljoin(url, resource.path) else: log.error('Url not found') return [] # We do not re-add old sources so if we find the same source # we don't do anything, else we create the source and append it # to the source list for dataset_source in dataset.sources: if dataset_source.url == resource_url: break else: source = Source(dataset=dataset, creator=user, url=resource_url) db.session.add(source) db.session.commit() sources.append(source) return sources