class DatasetTestCase(unittest.TestCase):
    def setUp(self):
        make_test_app()
        self.ds = Dataset(SIMPLE_MODEL)

    def tearDown(self):
        tear_down_test_app()

    def test_load_model_properties(self):
        assert self.ds.name == SIMPLE_MODEL["dataset"]["name"], self.ds.name
        assert self.ds.label == SIMPLE_MODEL["dataset"]["label"], self.ds.label

    def test_load_model_dimensions(self):
        assert len(self.ds.dimensions) == 4, self.ds.dimensions
        assert isinstance(self.ds["time"], ValueDimension), self.ds["time"]
        assert isinstance(self.ds["field"], ValueDimension), self.ds["field"]
        assert isinstance(self.ds["to"], ComplexDimension), self.ds["to"]
        assert isinstance(self.ds["function"], ComplexDimension), self.ds["function"]
        assert len(self.ds.metrics) == 1, self.ds.metrics
        assert isinstance(self.ds["amount"], Metric), self.ds["amount"]

    def test_value_dimensions_as_attributes(self):
        self.ds.generate()
        dim = self.ds["field"]
        assert isinstance(dim.column.type, UnicodeText), dim.column
        assert "field" == dim.column.name, dim.column
        assert dim.name == "field", dim.name
        assert dim.source_column == SIMPLE_MODEL["mapping"]["field"]["column"], dim.source_column
        assert dim.label == SIMPLE_MODEL["mapping"]["field"]["label"], dim.label
        assert dim.default == None, dim.default
        assert dim.dataset == self.ds, dim.dataset
        assert dim.datatype == "string", dim.datatype
        assert not hasattr(dim, "table")
        assert not hasattr(dim, "alias")

    def test_generate_db_entry_table(self):
        self.ds.generate()
        assert self.ds.table.name == "test_entry", self.ds.table.name
        cols = self.ds.table.c
        assert "id" in cols
        assert isinstance(cols["id"].type, Integer)
        # TODO:
        assert "time" in cols
        assert isinstance(cols["time"].type, UnicodeText)
        assert "amount" in cols
        assert isinstance(cols["amount"].type, Float)
        assert "field" in cols
        assert isinstance(cols["field"].type, UnicodeText)
        assert "to_id" in cols
        assert isinstance(cols["to_id"].type, Integer)
        assert "function_id" in cols
        assert isinstance(cols["function_id"].type, Integer)
        self.assertRaises(KeyError, cols.__getitem__, "foo")
Exemple #2
0
def create():
    require.dataset.create()
    dataset = request_data()
    data = validate_dataset(dataset)
    if Dataset.by_name(data['name']) is not None:
        raise Invalid(SchemaNode(String(), name='name'),
                      _("A dataset with this identifer already exists!"))
    dataset = Dataset({'dataset': data, 'model': {}})
    dataset.managers.append(current_user)
    db.session.add(dataset)
    db.session.commit()
    return view(dataset.name)
Exemple #3
0
def get_or_create_dataset(model):
    """ Based on a provided model we get the model (if it doesn't
    exist we create it). """
    dataset = Dataset.by_name(model['dataset']['name'])

    # If the dataset wasn't found we create it
    if dataset is None:
        dataset = Dataset(model)
        db.session.add(dataset)
        db.session.commit()

    log.info("Dataset: %s", dataset.name)
    return dataset
Exemple #4
0
 def test_delete_dataset_requires_auth(self):
     name = self.cra.name
     url = url_for('datasets_api.delete', name=name)
     res = self.client.delete(url, query_string={})
     assert '403' in res.status, res.status
     ds = Dataset.by_name(name)
     assert ds is not None, ds
Exemple #5
0
    def test_create_dataset(self):
        url = url_for('datasets_api.create')
        res = self.client.post(url,
                               data=json.dumps({}),
                               query_string=self.auth_qs,
                               headers={'content-type': 'application/json'})
        assert '400' in res.status, res.status
        assert 'errors' in res.json, res.json

        params = {
            'name': 'testds',
            'label': 'Test Dataset',
            'category': 'budget',
            'description': 'I\'m a banana!',
            'currency': 'EUR'
        }
        data = json.dumps(params)
        res = self.client.post(url,
                               data=data,
                               query_string=self.auth_qs,
                               headers={'content-type': 'application/json'})
        assert "200" in res.status, res.status
        assert res.json['name'] == 'testds', res.json

        ds = Dataset.by_name('testds')
        assert ds.label == params['label'], ds
Exemple #6
0
 def list_cubes(self):
     cubes = []
     for dataset in Dataset.all_by_account(None):
         if not dataset.has_model:
             continue
         cubes.append({'name': dataset.name, 'label': dataset.label})
     return cubes
Exemple #7
0
def query_index():
    q = Dataset.all_by_account(current_user, order=False)
    q = q.order_by(Dataset.updated_at.desc())

    # Filter by languages if they have been provided
    for language in request.args.getlist('languages'):
        l = aliased(DatasetLanguage)
        q = q.join(l, Dataset._languages)
        q = q.filter(l.code == language)

    # Filter by territories if they have been provided
    for territory in request.args.getlist('territories'):
        t = aliased(DatasetTerritory)
        q = q.join(t, Dataset._territories)
        q = q.filter(t.code == territory)

    # Return a list of languages as dicts with code, count, url and label
    languages = [{'code': code, 'count': count, 'label': LANGUAGES.get(code)}
                 for (code, count) in DatasetLanguage.dataset_counts(q)]

    territories = [{'code': code, 'count': count, 'label': COUNTRIES.get(code)}
                   for (code, count) in DatasetTerritory.dataset_counts(q)]

    pager = Pager(q, limit=15)
    return pager, languages, territories
Exemple #8
0
 def list_cubes(self):
     cubes = []
     for dataset in Dataset.all_by_account(None):
         if not dataset.has_model:
             continue
         cubes.append({"name": dataset.name, "label": dataset.label})
     return cubes
Exemple #9
0
 def test_delete_dataset(self):
     name = self.cra.name
     url = url_for('datasets_api.delete', name=name)
     res = self.client.delete(url, query_string=self.auth_qs)
     assert '410' in res.status, res.status
     ds = Dataset.by_name(name)
     assert ds is None, ds
Exemple #10
0
 def test_delete_dataset_requires_auth(self):
     name = self.cra.name
     url = url_for('datasets_api.delete', name=name)
     res = self.client.delete(url, query_string={})
     assert '403' in res.status, res.status
     ds = Dataset.by_name(name)
     assert ds is not None, ds
Exemple #11
0
 def test_delete_dataset(self):
     name = self.cra.name
     url = url_for('datasets_api.delete', name=name)
     res = self.client.delete(url, query_string=self.auth_qs)
     assert '410' in res.status, res.status
     ds = Dataset.by_name(name)
     assert ds is None, ds
Exemple #12
0
def query_index():
    q = Dataset.all_by_account(current_user, order=False)
    q = q.order_by(Dataset.updated_at.desc())

    # Filter by languages if they have been provided
    for language in request.args.getlist('languages'):
        l = aliased(DatasetLanguage)
        q = q.join(l, Dataset._languages)
        q = q.filter(l.code == language)

    # Filter by territories if they have been provided
    for territory in request.args.getlist('territories'):
        t = aliased(DatasetTerritory)
        q = q.join(t, Dataset._territories)
        q = q.filter(t.code == territory)

    # Filter by account if one has been provided
    for account in request.args.getlist('account'):
        a = aliased(Account)
        q = q.join(a, Dataset.managers)
        q = q.filter(a.name == account)

    # Return a list of languages as dicts with code, count, url and label
    languages = [{'code': code, 'count': count, 'label': LANGUAGES.get(code)}
                 for (code, count) in DatasetLanguage.dataset_counts(q)]

    territories = [{'code': code, 'count': count, 'label': COUNTRIES.get(code)}
                   for (code, count) in DatasetTerritory.dataset_counts(q)]

    pager = Pager(q, limit=15)
    return pager, languages, territories
Exemple #13
0
 def test_view_fields_empty(self):
     cra = Dataset.by_name('cra')
     cra.fields = {}
     db.session.commit()
     url = url_for('datasets_api.structure', name='cra')
     res = self.client.get(url)
     fields = res.json.get('fields')
     assert 'cap_or_cur' not in fields, res.json
Exemple #14
0
 def setUp(self):
     super(TestLoad, self).setUp()
     data_manager._index = None
     model = meta_fixture('cra')
     self.ds = Dataset(model)
     db.session.add(self.ds)
     db.session.commit()
     self.cra_url = csvimport_fixture_path('../data', 'cra.csv')
Exemple #15
0
 def test_view_fields_empty(self):
     cra = Dataset.by_name('cra')
     cra.fields = {}
     db.session.commit()
     url = url_for('datasets_api.structure', name='cra')
     res = self.client.get(url)
     fields = res.json.get('fields')
     assert 'cap_or_cur' not in fields, res.json
Exemple #16
0
def load_from_url(dataset_name, url):
    with flask_app.app_context():
        dataset = Dataset.by_name(dataset_name)
        if dataset is None:
            log.error("Dataset not found: %s", dataset_name)
            return
        source = tasks.extract_url(dataset, url)
        if source is not None:
            load_from_source.delay(dataset_name, source.name)
Exemple #17
0
 def test_publish(self):
     cra = Dataset.by_name('cra')
     cra.private = True
     db.session.commit()
     url = url_for('datasets_api.view', name='cra')
     res = self.client.get(url)
     assert '403' in res.status, res.status
     res = self.client.get(url, query_string={'api_key': self.user.api_key})
     assert '200' in res.status, res.status
     data = res.json.copy()
     data['category'] = 'budget'
     data['private'] = False
     response = self.client.post(url, data=json.dumps(data),
                                 headers={'content-type': 'application/json'},
                                 query_string={'api_key': self.user.api_key})
     assert '200' in response.status, response.json
     cra = Dataset.by_name('cra')
     assert cra.private is False, cra.private
Exemple #18
0
def load_from_source(dataset_name, source_name):
    with flask_app.app_context():
        dataset = Dataset.by_name(dataset_name)
        if source_name is None:
            return
        artifact = tasks.transform_source(dataset, source_name)
        if artifact is None:
            return
        tasks.load(dataset, source_name=source_name)
Exemple #19
0
def load_from_url(dataset_name, url):
    with flask_app.app_context():
        dataset = Dataset.by_name(dataset_name)
        if dataset is None:
            log.error("Dataset not found: %s", dataset_name)
            return
        source = tasks.extract_url(dataset, url)
        if source is not None:
            load_from_source.delay(dataset_name, source.name)
 def setUp(self):
     make_test_app()
     self.engine = core.db.engine 
     self.meta = core.db.metadata #MetaData()
     self.meta.bind = self.engine
     self.ds = Dataset(SIMPLE_MODEL)
     self.reader = csv.DictReader(StringIO(TEST_DATA))
     self.entity = self.ds['to']
     self.classifier = self.ds['function']
Exemple #21
0
 def list_cubes(self):
     cubes = []
     for dataset in Dataset.all_by_account(None):
         if not len(dataset.model.axes):
             continue
         cubes.append({
             'name': dataset.name,
             'label': dataset.label
         })
     return cubes
def import_fixture(name):
    meta_fp = csvimport_fixture_file(name, 'meta.json')
    model_fp = csvimport_fixture_file(name, 'model.json')
    meta = json.load(meta_fp)
    if model_fp:
        meta['model'] = json.load(model_fp)
    dataset = Dataset(meta)
    db.session.add(dataset)
    data_path = csvimport_fixture_path(name, 'data.csv')
    db.session.commit()
    return dataset, data_path
Exemple #23
0
 def test_publish(self):
     cra = Dataset.by_name('cra')
     cra.private = True
     db.session.commit()
     url = url_for('datasets_api.view', name='cra')
     res = self.client.get(url)
     assert '403' in res.status, res.status
     res = self.client.get(url, query_string={'api_key': self.user.api_key})
     assert '200' in res.status, res.status
     data = res.json.copy()
     data['category'] = 'budget'
     data['private'] = False
     response = self.client.post(
         url,
         data=json.dumps(data),
         headers={'content-type': 'application/json'},
         query_string={'api_key': self.user.api_key})
     assert '200' in response.status, response.json
     cra = Dataset.by_name('cra')
     assert cra.private is False, cra.private
Exemple #24
0
def authz():
    obj = Dataset.by_name(request.args.get('dataset'))
    etag_cache_keygen(obj, private=True)
    if obj is None:
        return jsonify({
            'read': False,
            'update': False
        })
    return jsonify({
        'read': dataset.read(obj),
        'update': dataset.update(obj)
    })
Exemple #25
0
def create():
    require.dataset.create()
    dataset = request_data()
    data = validate_dataset(dataset)
    if Dataset.by_name(data['name']) is not None:
        raise Invalid(SchemaNode(String(), name='name'),
                      _("A dataset with this identifer already exists!"))
    dataset = Dataset({'dataset': data, 'model': {}})
    dataset.managers.append(current_user)
    db.session.add(dataset)
    db.session.commit()
    return view(dataset.name)
Exemple #26
0
 def test_update_invalid_label(self):
     data = {'name': 'cra', 'label': '',
             'description': 'I\'m a banana',
             'currency': 'GBP'}
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     assert '400' in res.status, res.status
     assert 'Shorter than' in res.data, res.json
     cra = Dataset.by_name('cra')
     assert cra.label != '', cra.label
Exemple #27
0
def load_from_source(dataset_name, source_name):
    with flask_app.app_context():
        dataset = Dataset.by_name(dataset_name)
        if dataset is None:
            log.error("Dataset not found: %s", dataset_name)
            return
        if source_name is None:
            log.error("No source specified: %s", dataset_name)
            return
        source = tasks.transform_source(dataset, source_name)
        if source is None:
            return
        tasks.load(dataset, source_name=source_name)
Exemple #28
0
 def test_update_invalid_currency(self):
     data = {'name': 'cra',
             'label': 'Common Rough Act',
             'description': 'I\'m a banana',
             'category': 'budget',
             'currency': 'glass pearls'}
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     assert 'not one of' in res.data, res.json
     cra = Dataset.by_name('cra')
     assert cra.currency == 'GBP', cra.label
Exemple #29
0
 def test_update(self):
     data = {'name': 'cra', 'label': 'Common Rough Act',
             'description': 'I\'m a banana',
             'currency': 'EUR', 'languages': ['en'],
             'territories': ['GB'],
             'category': 'budget'}
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     cra = Dataset.by_name('cra')
     assert cra.label == 'Common Rough Act', (cra.label, res.json)
     assert cra.currency == 'EUR', (cra.currency, res.json)
Exemple #30
0
def load_from_source(dataset_name, source_name):
    with flask_app.app_context():
        dataset = Dataset.by_name(dataset_name)
        if dataset is None:
            log.error("Dataset not found: %s", dataset_name)
            return
        if source_name is None:
            log.error("No source specified: %s", dataset_name)
            return
        artifact = tasks.transform_source(dataset, source_name)
        if artifact is None:
            return
        tasks.load(dataset, source_name=source_name)
Exemple #31
0
 def test_update_invalid_territory(self):
     data = {'name': 'cra', 'label': 'CRA',
             'territories': ['su'],
             'description': 'I\'m a banana',
             'currency': 'GBP'}
     response = self.client.post(url_for('datasets_api.update', name='cra'),
                                 data=json.dumps(data),
                                 headers={'content-type': 'application/json'},
                                 query_string={'api_key': self.user.api_key})
     assert '400' in response.status, response.status
     assert 'updated' not in response.data
     cra = Dataset.by_name('cra')
     assert 'su' not in cra.territories
Exemple #32
0
 def test_update_invalid_category(self):
     data = {'name': 'cra',
             'label': 'Common Rough Act',
             'description': 'I\'m a banana',
             'currency': 'EUR', 'languages': ['en'],
             'territories': ['GB'], 'category': 'foo'}
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     assert '400' in res.status, res.status
     assert 'not one of' in res.data, res.json
     cra = Dataset.by_name('cra')
     assert cra.label != 'Common Rough Act', cra.label
Exemple #33
0
 def test_update_invalid_currency(self):
     data = {
         'name': 'cra',
         'label': 'Common Rough Act',
         'description': 'I\'m a banana',
         'category': 'budget',
         'currency': 'glass pearls'
     }
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     assert 'not one of' in res.data, res.json
     cra = Dataset.by_name('cra')
     assert cra.currency == 'GBP', cra.label
Exemple #34
0
 def test_update_invalid_label(self):
     data = {
         'name': 'cra',
         'label': '',
         'description': 'I\'m a banana',
         'currency': 'GBP'
     }
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     assert '400' in res.status, res.status
     assert 'Shorter than' in res.data, res.json
     cra = Dataset.by_name('cra')
     assert cra.label != '', cra.label
Exemple #35
0
    def cube(self, name, locale=None):
        dataset = Dataset.by_name(name)
        if name is None:
            raise NoSuchCubeError("Unknown dataset %s" % name, name)

        measures, dimensions, mappings = [], [], {}
        aggregates = [MeasureAggregate('num_entries',
                                       label='Numer of entries',
                                       function='count')]

        for measure in dataset.model.measures:
            cubes_measure = Measure(measure.name, label=measure.label)
            measures.append(cubes_measure)
            aggregate = MeasureAggregate(measure.name,
                                         label=measure.label,
                                         measure=measure.name,
                                         function='sum')
            aggregates.append(aggregate)
            mappings[measure.name] = measure.column

        for dimension in dataset.model.dimensions:
            attributes = []
            for attr in dimension.attributes:
                attributes.append(attr.name)
                mappings[attr.path] = attr.column

            meta = {
                'label': dimension.label,
                'name': dimension.name,
                'levels': [{
                    'name': dimension.name,
                    'label': dimension.label,
                    # 'key': 'name',
                    'attributes': attributes
                }]
            }
            dimensions.append(create_dimension(meta))

        return Cube(name=dataset.name,
                    fact=dataset.fact_table.table.name,
                    aggregates=aggregates,
                    measures=measures,
                    label=dataset.label,
                    description=dataset.description,
                    dimensions=dimensions,
                    store=self.store,
                    mappings=mappings)
Exemple #36
0
 def test_update(self):
     data = {
         'name': 'cra',
         'label': 'Common Rough Act',
         'description': 'I\'m a banana',
         'currency': 'EUR',
         'languages': ['en'],
         'territories': ['GB'],
         'category': 'budget'
     }
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     cra = Dataset.by_name('cra')
     assert cra.label == 'Common Rough Act', (cra.label, res.json)
     assert cra.currency == 'EUR', (cra.currency, res.json)
Exemple #37
0
 def test_update_invalid_territory(self):
     data = {
         'name': 'cra',
         'label': 'CRA',
         'territories': ['su'],
         'description': 'I\'m a banana',
         'currency': 'GBP'
     }
     response = self.client.post(
         url_for('datasets_api.update', name='cra'),
         data=json.dumps(data),
         headers={'content-type': 'application/json'},
         query_string={'api_key': self.user.api_key})
     assert '400' in response.status, response.status
     assert 'updated' not in response.data
     cra = Dataset.by_name('cra')
     assert 'su' not in cra.territories
Exemple #38
0
 def test_update_invalid_category(self):
     data = {
         'name': 'cra',
         'label': 'Common Rough Act',
         'description': 'I\'m a banana',
         'currency': 'EUR',
         'languages': ['en'],
         'territories': ['GB'],
         'category': 'foo'
     }
     res = self.client.post(url_for('datasets_api.update', name='cra'),
                            data=json.dumps(data),
                            headers={'content-type': 'application/json'},
                            query_string={'api_key': self.user.api_key})
     assert '400' in res.status, res.status
     assert 'not one of' in res.data, res.json
     cra = Dataset.by_name('cra')
     assert cra.label != 'Common Rough Act', cra.label
Exemple #39
0
    def test_create_dataset(self):
        url = url_for('datasets_api.create')
        res = self.client.post(url, data=json.dumps({}),
                               query_string=self.auth_qs,
                               headers={'content-type': 'application/json'})
        assert '400' in res.status, res.status
        assert 'errors' in res.json, res.json

        params = {'name': 'testds', 'label': 'Test Dataset',
                  'category': 'budget', 'description': 'I\'m a banana!',
                  'currency': 'EUR'}
        data = json.dumps(params)
        res = self.client.post(url, data=data,
                               query_string=self.auth_qs,
                               headers={'content-type': 'application/json'})
        assert "200" in res.status, res.status
        assert res.json['name'] == 'testds', res.json

        ds = Dataset.by_name('testds')
        assert ds.label == params['label'], ds
class ComplexDimensionTestCase(unittest.TestCase):

    def setUp(self):
        make_test_app()
        self.engine = core.db.engine 
        self.meta = core.db.metadata #MetaData()
        self.meta.bind = self.engine
        self.ds = Dataset(SIMPLE_MODEL)
        self.reader = csv.DictReader(StringIO(TEST_DATA))
        self.entity = self.ds['to']
        self.classifier = self.ds['function']

    def tearDown(self):
        tear_down_test_app()

    def test_basic_properties(self):
        self.ds.generate()
        assert self.entity.name=='to', self.entity.name
        assert self.classifier.name=='function', self.classifier.name
        assert self.entity.scheme=='entity', self.entity.scheme
        assert self.classifier.scheme=='funny', self.classifier.scheme
        
    def test_generated_tables(self):
        assert not hasattr(self.entity, 'table'), self.entity
        self.ds.generate()
        assert hasattr(self.entity, 'table'), self.entity
        assert self.entity.table.name=='test_' + self.entity.scheme, self.entity.table.name
        assert hasattr(self.entity, 'alias')
        assert self.entity.alias.name==self.entity.name, self.entity.alias.name
        cols = self.entity.table.c
        assert 'id' in cols
        self.assertRaises(KeyError, cols.__getitem__, 'field')

    def test_attributes_exist_on_object(self):
        assert len(self.entity.attributes)==3, self.entity.attributes
        self.assertRaises(KeyError, self.entity.__getitem__, 'field')
        assert self.entity['name'].name=='name'
        assert self.entity['name'].datatype=='string'
        assert self.entity['const'].default=='true'

    def test_attributes_exist_on_table(self):
        self.ds.generate()
        assert hasattr(self.entity, 'table'), self.entity
        assert 'name' in self.entity.table.c, self.entity.table.c
        assert 'label' in self.entity.table.c, self.entity.table.c
Exemple #41
0
 def has_cube(self, name):
     dataset = Dataset.by_name(name)
     if dataset is None:
         return False
     return dataset.has_model
Exemple #42
0
def authz():
    obj = Dataset.by_name(request.args.get("dataset"))
    if obj is None:
        return jsonify({"read": False, "update": False})
    return jsonify({"read": dataset.read(obj), "update": dataset.update(obj)})
Exemple #43
0
def authz():
    obj = Dataset.by_name(request.args.get('dataset'))
    if obj is None:
        return jsonify({'read': False, 'update': False})
    return jsonify({'read': dataset.read(obj), 'update': dataset.update(obj)})
 def setUp(self):
     make_test_app()
     self.ds = Dataset(SIMPLE_MODEL)
     self.engine = core.db.engine
     self.ds.generate()
     self.reader = csv.DictReader(StringIO(TEST_DATA))
Exemple #45
0
def create_budget_data_package(url, user, private):
    try:
        bdpkg = BudgetDataPackage(url)
    except Exception as problem:
        # Lots of different types of problems can arise with a
        # BudgetDataPackage, but their message should be understandable
        # so we catch just any Exception and email it's message to the user
        log.error("Failed to parse budget data package: {0}".format(
            problem.message))
        return []

    sources = []
    for (idx, resource) in enumerate(bdpkg.resources):
        dataset = Dataset.by_name(bdpkg.name)
        if dataset is None:
            # Get information from the descriptior file for the given
            # resource (at index idx)
            info = get_dataset_info_from_descriptor(bdpkg, idx)
            # Set the dataset name based on the previously computed one
            info['dataset']['name'] = bdpkg.name
            # Create the model from the resource schema
            model = create_model_from_schema(resource.schema)
            # Set the default value for the time to the fiscal year of the
            # resource, because it isn't included in the budget CSV so we
            # won't be able to load it along with the data.
            model['time']['default_value'] = resource.fiscalYear
            # Add the model as the mapping
            info['mapping'] = model

            # Create the dataset
            dataset = Dataset(info)
            dataset.managers.append(user)
            dataset.private = private
            db.session.add(dataset)
            db.session.commit()
        else:
            if not dataset.can_update(user):
                log.error(
                    "User {0} not permitted to update dataset {1}".format(
                        user.name, bdpkg.name))
                return []

        if 'url' in resource:
            resource_url = resource.url
        elif 'path' in resource:
            if 'base' in bdpkg:
                resource_url = urlparse.urljoin(bdpkg.base, resource.path)
            else:
                resource_url = urlparse.urljoin(url, resource.path)
        else:
            log.error('Url not found')
            return []

        # We do not re-add old sources so if we find the same source
        # we don't do anything, else we create the source and append it
        # to the source list
        for dataset_source in dataset.sources:
            if dataset_source.url == resource_url:
                break
        else:
            source = Source(dataset=dataset, creator=user,
                            url=resource_url)
            db.session.add(source)
            db.session.commit()
            sources.append(source)

    return sources
Exemple #46
0
    def cube(self, name, locale=None, namespace=None):
        dataset = Dataset.by_name(name)
        if name is None:
            raise NoSuchCubeError("Unknown dataset %s" % name, name)

        measures, dimensions, mappings = [], [], {}
        aggregates = [MeasureAggregate("fact_count", label="Number of entries", function="count")]

        for measure in dataset.model.measures:
            cubes_measure = Measure(measure.name, label=measure.label)
            measures.append(cubes_measure)
            aggregate = MeasureAggregate(
                measure.name + "_sum", label=measure.label, measure=measure.name, function="sum"
            )
            aggregates.append(aggregate)
            mappings[measure.name] = measure.column

        for dimension in dataset.model.dimensions:
            attributes, last_col = [], None
            for attr in dimension.attributes:
                attributes.append({"name": attr.name, "label": attr.label})
                mappings[attr.path] = last_col = attr.column

            # Workaround because the cubes mapper shortens references
            # for single-attribute dimensions to just the dimension name.
            if len(attributes) == 1:
                mappings[dimension.name] = last_col

            # Translate into cubes' categories
            cardinality = "high"
            if dimension.cardinality:
                if dimension.cardinality < 6:
                    cardinality = "tiny"
                elif dimension.cardinality < 51:
                    cardinality = "low"
                elif dimension.cardinality < 1001:
                    cardinality = "medium"

            meta = {
                "label": dimension.label,
                "name": dimension.name,
                "cardinality": cardinality,
                "levels": [
                    {
                        "name": dimension.name,
                        "label": dimension.label,
                        "cardinality": cardinality,
                        "attributes": attributes,
                    }
                ],
            }
            if dimension.key_attribute:
                meta["levels"][0]["key"] = dimension.key_attribute.name
            if dimension.label_attribute:
                meta["levels"][0]["label_attribute"] = dimension.label_attribute.name
                meta["levels"][0]["order_attribute"] = dimension.label_attribute.name
            dimensions.append(Dimension.from_metadata(meta))

        cube = Cube(
            name=dataset.name,
            fact=dataset.fact_table.table.name,
            aggregates=aggregates,
            measures=measures,
            label=dataset.label,
            description=dataset.description,
            dimensions=dimensions,
            store=self.store,
            mappings=mappings,
        )

        link_cube(cube, locale, provider=self, namespace=namespace)
        return cube
Exemple #47
0
def get_dataset(name):
    dataset = obj_or_404(Dataset.by_name(name))
    require.dataset.read(dataset)
    return dataset
 def setUp(self):
     make_test_app()
     self.ds = Dataset(SIMPLE_MODEL)
Exemple #49
0
    def cube(self, name, locale=None, namespace=None):
        dataset = Dataset.by_name(name)
        if name is None:
            raise NoSuchCubeError("Unknown dataset %s" % name, name)

        measures, dimensions, mappings = [], [], {}
        aggregates = [
            MeasureAggregate('fact_count',
                             label='Number of entries',
                             function='count')
        ]

        for measure in dataset.model.measures:
            cubes_measure = Measure(measure.name, label=measure.label)
            measures.append(cubes_measure)
            aggregate = MeasureAggregate(measure.name + '_sum',
                                         label=measure.label,
                                         measure=measure.name,
                                         function='sum')
            aggregates.append(aggregate)
            mappings[measure.name] = measure.column

        for dimension in dataset.model.dimensions:
            attributes, last_col = [], None
            for attr in dimension.attributes:
                attributes.append({'name': attr.name, 'label': attr.label})
                mappings[attr.path] = last_col = attr.column

            # Workaround because the cubes mapper shortens references
            # for single-attribute dimensions to just the dimension name.
            if len(attributes) == 1:
                mappings[dimension.name] = last_col

            # Translate into cubes' categories
            cardinality = 'high'
            if dimension.cardinality:
                if dimension.cardinality < 6:
                    cardinality = 'tiny'
                elif dimension.cardinality < 51:
                    cardinality = 'low'
                elif dimension.cardinality < 1001:
                    cardinality = 'medium'

            meta = {
                'label':
                dimension.label,
                'name':
                dimension.name,
                'cardinality':
                cardinality,
                'levels': [{
                    'name': dimension.name,
                    'label': dimension.label,
                    'cardinality': cardinality,
                    'attributes': attributes
                }]
            }
            if dimension.key_attribute:
                meta['levels'][0]['key'] = dimension.key_attribute.name
            if dimension.label_attribute:
                meta['levels'][0]['label_attribute'] = \
                    dimension.label_attribute.name
                meta['levels'][0]['order_attribute'] = \
                    dimension.label_attribute.name
            dimensions.append(Dimension.from_metadata(meta))

        cube = Cube(name=dataset.name,
                    fact=dataset.fact_table.table.name,
                    aggregates=aggregates,
                    measures=measures,
                    label=dataset.label,
                    description=dataset.description,
                    dimensions=dimensions,
                    store=self.store,
                    mappings=mappings)

        link_cube(cube, locale, provider=self, namespace=namespace)
        return cube
Exemple #50
0
def authz():
    obj = Dataset.by_name(request.args.get('dataset'))
    etag_cache_keygen(obj, private=True)
    if obj is None:
        return jsonify({'read': False, 'update': False})
    return jsonify({'read': dataset.read(obj), 'update': dataset.update(obj)})
class DatasetLoadTestCase(unittest.TestCase):
    def setUp(self):
        make_test_app()
        self.ds = Dataset(SIMPLE_MODEL)
        self.engine = core.db.engine
        self.ds.generate()
        self.reader = csv.DictReader(StringIO(TEST_DATA))

    def tearDown(self):
        tear_down_test_app()

    def test_load_all(self):
        self.ds.load_all(self.reader)
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn) == 6, resn
        row0 = resn[0]
        assert row0["time"] == "2010", row0.items()
        assert row0["amount"] == 200, row0.items()
        assert row0["field"] == "foo", row0.items()

    def test_flush(self):
        self.ds.load_all(self.reader)
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn) == 6, resn
        self.ds.flush()
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn) == 0, resn

    def test_drop(self):
        tn = self.engine.table_names()
        assert "test_entry" in tn, tn
        assert "test_entity" in tn, tn
        assert "test_funny" in tn, tn
        self.ds.drop()
        tn = self.engine.table_names()
        assert "test_entry" not in tn, tn
        assert "test_entity" not in tn, tn
        assert "test_funny" not in tn, tn

    def test_aggregate_simple(self):
        self.ds.load_all(self.reader)
        res = self.ds.aggregate()
        assert res["summary"]["num_entries"] == 6, res
        assert res["summary"]["amount"] == 2690.0, res

    def test_aggregate_basic_cut(self):
        self.ds.load_all(self.reader)
        res = self.ds.aggregate(cuts=[("field", u"foo")])
        assert res["summary"]["num_entries"] == 3, res
        assert res["summary"]["amount"] == 1000, res

    def test_aggregate_or_cut(self):
        self.ds.load_all(self.reader)
        res = self.ds.aggregate(cuts=[("field", u"foo"), ("field", u"bar")])
        assert res["summary"]["num_entries"] == 4, res
        assert res["summary"]["amount"] == 1190, res

    def test_aggregate_dimensions_drilldown(self):
        self.ds.load_all(self.reader)
        res = self.ds.aggregate(drilldowns=["function"])
        assert res["summary"]["num_entries"] == 6, res
        assert res["summary"]["amount"] == 2690, res
        assert len(res["drilldown"]) == 2, res["drilldown"]

    def test_aggregate_two_dimensions_drilldown(self):
        self.ds.load_all(self.reader)
        res = self.ds.aggregate(drilldowns=["function", "field"])
        # pprint(res)
        assert res["summary"]["num_entries"] == 6, res
        assert res["summary"]["amount"] == 2690, res
        assert len(res["drilldown"]) == 5, res["drilldown"]

    def test_materialize_table(self):
        self.ds.load_all(self.reader)
        itr = self.ds.materialize()
        tbl = list(itr)
        assert len(tbl) == 6, tbl
        row = tbl[0]
        assert isinstance(row["field"], unicode), row
        assert isinstance(row["function"], dict), row
        assert isinstance(row["to"], dict), row