Esempio n. 1
0
def import_csv(dataset, url, args):
    """
    Import the csv data into the dataset
    """

    csv_data_url, source_url = url
    source = Source(dataset, shell_account(), csv_data_url)
    # Analyse the csv data and add it to the source
    # If we don't analyse it we'll be left with a weird message
    source.analysis = analyze_csv(csv_data_url)
    # Check to see if the dataset already has this source
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()

    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))

    # Check if imported from the file system (source and data url differ)
    if csv_data_url != source_url:
        # If we did, then we must update the source url based on the
        # sources in the dataset model (so we need to fetch the source again
        # or else we'll add a new one)
        source = Source.by_id(source.id)
        source.url = source_url
        db.session.commit()
Esempio n. 2
0
    def create(self):
        """
        Adds a new dataset dynamically through a POST request
        """

        # User must be authenticated so we should have a user object in
        # c.account, if not abort with error message
        if not c.account:
            abort(status_code=400, detail='user not authenticated')

        # Check if the params are there ('metadata', 'csv_file')
        if len(request.params) != 2:
            abort(status_code=400, detail='incorrect number of params')

        metadata = request.params['metadata'] \
            if 'metadata' in request.params \
            else abort(status_code=400, detail='metadata is missing')

        csv_file = request.params['csv_file'] \
            if 'csv_file' in request.params \
            else abort(status_code=400, detail='csv_file is missing')

        # We proceed with the dataset
        try:
            model = json.load(urllib2.urlopen(metadata))
        except:
            abort(status_code=400, detail='JSON model could not be parsed')
        try:
            log.info("Validating model")
            model = validate_model(model)
        except Invalid as i:
            log.error("Errors occured during model validation:")
            for field, error in i.asdict().items():
                log.error("%s: %s", field, error)
            abort(status_code=400, detail='Model is not well formed')
        dataset = Dataset.by_name(model['dataset']['name'])
        if dataset is None:
            dataset = Dataset(model)
            require.dataset.create()
            dataset.managers.append(c.account)
            dataset.private = True  # Default value
            db.session.add(dataset)
        else:
            require.dataset.update(dataset)

        log.info("Dataset: %s", dataset.name)
        source = Source(dataset=dataset, creator=c.account, url=csv_file)

        log.info(source)
        for source_ in dataset.sources:
            if source_.url == csv_file:
                source = source_
                break
        db.session.add(source)
        db.session.commit()

        # Send loading of source into celery queue
        load_source.delay(source.id)
        return to_jsonp(dataset_apply_links(dataset.as_dict()))
Esempio n. 3
0
 def test_view_source(self):
     url_ = 'http://banana.com/split.csv'
     source = Source(self.dataset, self.user, url_)
     db.session.add(source)
     db.session.commit()
     response = self.app.get(url(controller='source',
                                 action='view',
                                 dataset='cra',
                                 id=source.id),
                             extra_environ={'REMOTE_USER': '******'})
     assert response.headers['Location'] == url_, response.headers
Esempio n. 4
0
    def load_with_model_and_csv(self, metadata, csv_file, private):
        """
        Load a dataset using a metadata model file and a csv file
        """

        if metadata is None:
            response.status = 400
            return to_jsonp({'errors': 'metadata is missing'})

        if csv_file is None:
            response.status = 400
            return to_jsonp({'errors': 'csv_file is missing'})

        # We proceed with the dataset
        try:
            model = json.load(urllib2.urlopen(metadata))
        except:
            response.status = 400
            return to_jsonp({'errors': 'JSON model could not be parsed'})
        try:
            log.info("Validating model")
            model = validate_model(model)
        except Invalid as i:
            log.error("Errors occured during model validation:")
            for field, error in i.asdict().items():
                log.error("%s: %s", field, error)
            response.status = 400
            return to_jsonp({'errors': 'Model is not well formed'})
        dataset = Dataset.by_name(model['dataset']['name'])
        if dataset is None:
            dataset = Dataset(model)
            require.dataset.create()
            dataset.managers.append(c.account)
            dataset.private = private
            db.session.add(dataset)
        else:
            require.dataset.update(dataset)

        log.info("Dataset: %s", dataset.name)
        source = Source(dataset=dataset, creator=c.account, url=csv_file)

        log.info(source)
        for source_ in dataset.sources:
            if source_.url == csv_file:
                source = source_
                break
        db.session.add(source)
        db.session.commit()

        # Send loading of source into celery queue
        load_source.delay(source.id)
        return to_jsonp(dataset_apply_links(dataset.as_dict()))
Esempio n. 5
0
 def test_dimensions_edit_mask_with_data(self):
     cra = Dataset.by_name('cra')
     src = Source(cra, self.user, 'file:///dev/null')
     src.analysis = {'columns': ['amount', 'etc']}
     db.session.add(src)
     db.session.commit()
     response = self.app.get(url(controller='editor',
                                 action='dimensions_edit',
                                 dataset='cra'),
                             extra_environ={'REMOTE_USER': '******'})
     assert 'cannot edit dimensions' in response.body
     assert '"amount"' not in response.body
     assert 'Update' not in response.body
Esempio n. 6
0
def csvimport_fixture(name):
    model_fp = csvimport_fixture_file(name, 'model.json')
    mapping_fp = csvimport_fixture_file(name, 'mapping.json')
    model = json.load(model_fp)
    if mapping_fp:
        model['mapping'] = json.load(mapping_fp)
    dataset = Dataset(model)
    dataset.generate()
    db.session.add(dataset)
    data_path = csvimport_fixture_path(name, 'data.csv')
    user = make_account()
    source = Source(dataset, user, data_path)
    db.session.add(source)
    db.session.commit()
    return source
Esempio n. 7
0
 def create(self, dataset):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     try:
         schema = source_schema()
         data = schema.deserialize(request.params)
         source = Source(c.dataset, c.account, data['url'])
         db.session.add(source)
         db.session.commit()
         analyze_source.apply_async(args=[source.id], countdown=2)
         h.flash_success(_("The source has been created."))
         redirect(
             h.url_for(controller='editor',
                       action='index',
                       dataset=c.dataset.name))
     except Invalid as i:
         errors = i.asdict()
         errors = [(k[len('source.'):], v) for k, v in errors.items()]
         return self.new(dataset, dict(errors))
Esempio n. 8
0
    def create(self):
        """
        Adds a new dataset dynamically through a POST request
        """

        # User must be authenticated so we should have a user object in
        # c.account, if not abort with error message
        if not c.account:
            abort(status_code=400, detail='user not authenticated')

        # Parse the loading api parameters to get them into the right format
        parser = LoadingAPIParamParser(request.params)
        params, errors = parser.parse()

        if errors:
            response.status = 400
            return to_jsonp({'errors': errors})

        if params['metadata'] is None:
            response.status = 400
            return to_jsonp({'errors': 'metadata is missing'})

        if params['csv_file'] is None:
            response.status = 400
            return to_jsonp({'errors': 'csv_file is missing'})

        # We proceed with the dataset
        try:
            model = json.load(urllib2.urlopen(params['metadata']))
        except:
            response.status = 400
            return to_jsonp({'errors': 'JSON model could not be parsed'})
        try:
            log.info("Validating model")
            model = validate_model(model)
        except Invalid as i:
            log.error("Errors occured during model validation:")
            for field, error in i.asdict().items():
                log.error("%s: %s", field, error)
            response.status = 400
            return to_jsonp({'errors': 'Model is not well formed'})
        dataset = Dataset.by_name(model['dataset']['name'])
        if dataset is None:
            dataset = Dataset(model)
            require.dataset.create()
            dataset.managers.append(c.account)
            dataset.private = params['private']
            db.session.add(dataset)
        else:
            require.dataset.update(dataset)

        log.info("Dataset: %s", dataset.name)
        source = Source(dataset=dataset,
                        creator=c.account,
                        url=params['csv_file'])

        log.info(source)
        for source_ in dataset.sources:
            if source_.url == params['csv_file']:
                source = source_
                break
        db.session.add(source)
        db.session.commit()

        # Send loading of source into celery queue
        load_source.delay(source.id)
        return to_jsonp(dataset_apply_links(dataset.as_dict()))
Esempio n. 9
0
def create_budget_data_package(url, user, private):
    try:
        bdpkg = BudgetDataPackage(url)
    except Exception as problem:
        # Lots of different types of problems can arise with a
        # BudgetDataPackage, but their message should be understandable
        # so we catch just any Exception and email it's message to the user
        log.error("Failed to parse budget data package: {0}".format(
            problem.message))
        return []

    sources = []
    for (idx, resource) in enumerate(bdpkg.resources):
        dataset = Dataset.by_name(bdpkg.name)
        if dataset is None:
            # Get information from the descriptior file for the given
            # resource (at index idx)
            info = get_dataset_info_from_descriptor(bdpkg, idx)
            # Set the dataset name based on the previously computed one
            info['dataset']['name'] = bdpkg.name
            # Create the model from the resource schema
            model = create_model_from_schema(resource.schema)
            # Set the default value for the time to the fiscal year of the
            # resource, because it isn't included in the budget CSV so we
            # won't be able to load it along with the data.
            model['time']['default_value'] = resource.fiscalYear
            # Add the model as the mapping
            info['mapping'] = model

            # Create the dataset
            dataset = Dataset(info)
            dataset.managers.append(user)
            dataset.private = private
            db.session.add(dataset)
            db.session.commit()
        else:
            if not dataset.can_update(user):
                log.error(
                    "User {0} not permitted to update dataset {1}".format(
                        user.name, bdpkg.name))
                return []

        if 'url' in resource:
            resource_url = resource.url
        elif 'path' in resource:
            if 'base' in bdpkg:
                resource_url = urlparse.urljoin(bdpkg.base, resource.path)
            else:
                resource_url = urlparse.urljoin(url, resource.path)
        else:
            log.error('Url not found')
            return []

        # We do not re-add old sources so if we find the same source
        # we don't do anything, else we create the source and append it
        # to the source list
        for dataset_source in dataset.sources:
            if dataset_source.url == resource_url:
                break
        else:
            source = Source(dataset=dataset, creator=user, url=resource_url)
            db.session.add(source)
            db.session.commit()
            sources.append(source)

    return sources