def import_csv(dataset, url, args): """ Import the csv data into the dataset """ csv_data_url, source_url = url source = Source(dataset, shell_account(), csv_data_url) # Analyse the csv data and add it to the source # If we don't analyse it we'll be left with a weird message source.analysis = analyze_csv(csv_data_url) # Check to see if the dataset already has this source for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) # Check if imported from the file system (source and data url differ) if csv_data_url != source_url: # If we did, then we must update the source url based on the # sources in the dataset model (so we need to fetch the source again # or else we'll add a new one) source = Source.by_id(source.id) source.url = source_url db.session.commit()
def create(self): """ Adds a new dataset dynamically through a POST request """ # User must be authenticated so we should have a user object in # c.account, if not abort with error message if not c.account: abort(status_code=400, detail='user not authenticated') # Check if the params are there ('metadata', 'csv_file') if len(request.params) != 2: abort(status_code=400, detail='incorrect number of params') metadata = request.params['metadata'] \ if 'metadata' in request.params \ else abort(status_code=400, detail='metadata is missing') csv_file = request.params['csv_file'] \ if 'csv_file' in request.params \ else abort(status_code=400, detail='csv_file is missing') # We proceed with the dataset try: model = json.load(urllib2.urlopen(metadata)) except: abort(status_code=400, detail='JSON model could not be parsed') try: log.info("Validating model") model = validate_model(model) except Invalid as i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) abort(status_code=400, detail='Model is not well formed') dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) require.dataset.create() dataset.managers.append(c.account) dataset.private = True # Default value db.session.add(dataset) else: require.dataset.update(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset=dataset, creator=c.account, url=csv_file) log.info(source) for source_ in dataset.sources: if source_.url == csv_file: source = source_ break db.session.add(source) db.session.commit() # Send loading of source into celery queue load_source.delay(source.id) return to_jsonp(dataset_apply_links(dataset.as_dict()))
def test_view_source(self): url_ = 'http://banana.com/split.csv' source = Source(self.dataset, self.user, url_) db.session.add(source) db.session.commit() response = self.app.get(url(controller='source', action='view', dataset='cra', id=source.id), extra_environ={'REMOTE_USER': '******'}) assert response.headers['Location'] == url_, response.headers
def test_dimensions_edit_mask_with_data(self): cra = Dataset.by_name('cra') src = Source(cra, self.user, 'file:///dev/null') src.analysis = {'columns': ['amount', 'etc']} db.session.add(src) db.session.commit() response = self.app.get(url(controller='editor', action='dimensions_edit', dataset='cra'), extra_environ={'REMOTE_USER': '******'}) assert 'cannot edit dimensions' in response.body assert '"amount"' not in response.body assert 'Update' not in response.body
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def model(datasetname): #if not sourcename then we are saving the defaults for dataset dataset = get_dataset(datasetname) if not dataset.source: #then create one dataset_source = Source.by_source_name(dataset.name) if not dataset_source: dataset_source = Source(name=dataset.name, dataset=dataset) db.session.add(dataset_source) else: dataset_source.dataset = dataset db.session.commit() #figure out what they need over there? return jsonify(dataset.source)
def create(): """ This takes a json format post with label, name, description and creates a private dataset to put sources in The json_errors return a json object """ if not require.dataset.create(): return jsonify( {"errors": ["Can not create new dataset. Permission denied"]}) try: dataset = api_form_data() if not dataset.get("dataorg", None): return jsonify( {"errors": ["You must select the data source organization"]}) model = {'data': dataset} schema = dataset_schema(ValidationState(model)) data = schema.deserialize(dataset) #should have a better place for sluggify if (data.get('name', None)): tempname = slugify(str(data.get('name')), max_length=50) else: tempname = slugify(str(data.get('label')), max_length=50) if Dataset.by_name(tempname) is not None: return jsonify( {"errors": ["A dataset with this name already exists "]}) dataset = Dataset(data=data) dataset.managers.append(current_user) db.session.add(dataset) dataset_source = Source.by_source_name(dataset.name) if not dataset_source: dataset_source = Source(dataset=dataset, name=dataset.name) db.session.add(dataset_source) else: dataset_source.dataset = dataset #creating a new dataset so we have to create a source as well db.session.commit() return jsonify({"success": True, "dataset": dataset.name}) except Exception, e: ex_type, ex, tb = sys.exc_info() print traceback.print_tb(tb) return jsonify({"errors": ['Unknown Error has occurred: ' + str(e)]})
def create(self, dataset): self._get_dataset(dataset) require.dataset.update(c.dataset) try: schema = source_schema() data = schema.deserialize(request.params) source = Source(c.dataset, c.account, data['url']) db.session.add(source) db.session.commit() analyze_source.apply_async(args=[source.id], countdown=2) h.flash_success(_("The source has been created.")) redirect( h.url_for(controller='editor', action='index', dataset=c.dataset.name)) except Invalid as i: errors = i.asdict() errors = [(k[len('source.'):], v) for k, v in errors.items()] return self.new(dataset, dict(errors))
if len(request.files) == 1: upload_source_path = sourcefiles.save(request.files['sourcefile']) sourcefile = SourceFile(rawfile=upload_source_path) db.session.add(sourcefile) if basesource: if basesource.rawfile: basesource.rawfile.delete() basesource.rawfile = sourcefile source = basesource source.reload_openrefine() else: source = Source(dataset=dataset, name=data['name'], url=None, rawfile=sourcefile) db.session.add(source) #handle file elif data.get('url', None): if basesource: source = basesource source.name = data['name'] source.url = data['url'] source.reload_openrefine() #maybe reload the OpenRefine? #trigger reload else: source = Source(dataset=dataset, name=data['name'],
try: log.info("Validating model") model = validate_model(model) except Invalid, i: log.error("Errors occured during model validation:") for field, error in i.asdict().items(): log.error("%s: %s", field, error) return 1 dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) db.session.add(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset, shell_account(), csv_data_url) for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) return 0 def _csvimport(args): return csvimport(args.dataset_url, args)
orig_filepath = os.path.join(file_dir, filename) with codecs.open(orig_filepath, 'rb') as fh: wuezfile = FileStorage(stream=fh) #upload_source_path = sourcefiles.save(wuezfile, name=filename, folder=UPLOADED_FILES_DEST) upload_source_path = sourcefiles.save(wuezfile, name=filename) sourcefile = SourceFile(rawfile=upload_source_path) db.session.add(sourcefile) except Exception, e: print "!!!!!Error failed", e return (None, False) try: print sourcefile source = Source(dataset=dataset, name=dataset.name, url=None, rawfile=sourcefile) except Exception, e: traceback.print_exc(e) print "Could not load source rawfile", e return (None, False) else: try: source = Source(dataset=dataset, name=dataset.name, url=sourcejson['fields']['webservice'], rawfile=None) except Exception, e: print "Could not load source webservice", e