class TestCompoundDimension(DatabaseTestCase):

    def setup(self):
        super(TestCompoundDimension, self).setup()
        self.engine = db.engine
        self.meta = db.metadata
        self.meta.bind = self.engine
        self.ds = Dataset(SIMPLE_MODEL)
        self.entity = self.ds['to']
        self.classifier = self.ds['function']

    def test_is_compound(self):
        h.assert_true(self.entity.is_compound)

    def test_basic_properties(self):
        assert self.entity.name=='to', self.entity.name
        assert self.classifier.name=='function', self.classifier.name

    def test_generated_tables(self):
        #assert not hasattr(self.entity, 'table'), self.entity
        #self.ds.generate()
        assert hasattr(self.entity, 'table'), self.entity
        assert self.entity.table.name=='test__' + self.entity.taxonomy, self.entity.table.name
        assert hasattr(self.entity, 'alias')
        assert self.entity.alias.name==self.entity.name, self.entity.alias.name
        cols = self.entity.table.c
        assert 'id' in cols
        assert_raises(KeyError, cols.__getitem__, 'field')

    def test_attributes_exist_on_object(self):
        assert len(self.entity.attributes)==2, self.entity.attributes
        assert_raises(KeyError, self.entity.__getitem__, 'field')
        assert self.entity['name'].name=='name'
        assert self.entity['name'].datatype=='id'

    def test_attributes_exist_on_table(self):
        assert hasattr(self.entity, 'table'), self.entity
        assert 'name' in self.entity.table.c, self.entity.table.c
        assert 'label' in self.entity.table.c, self.entity.table.c

    def test_members(self):
        self.ds.generate()
        self.entity.load(self.ds.bind, {'name': 'one', 'label': 'Label One'})
        self.entity.load(self.ds.bind, {'name': 'two', 'label': 'Label Two'})

        members = list(self.entity.members())
        h.assert_equal(len(members), 2)

        members = list(self.entity.members(self.entity.alias.c.name == 'one'))
        h.assert_equal(len(members), 1)
Beispiel #2
0
def csvimport_fixture(name):
    model_fp = csvimport_fixture_file(name, 'model.json')
    mapping_fp = csvimport_fixture_file(name, 'mapping.json')
    model = json.load(model_fp)
    if mapping_fp:
        model['mapping'] = json.load(mapping_fp)
    dataset = Dataset(model)
    dataset.generate()
    db.session.add(dataset)
    data_path = csvimport_fixture_path(name, 'data.csv')
    user = h.make_account()
    source = Source(dataset, user, data_path)
    db.session.add(source)
    db.session.commit()
    return source
Beispiel #3
0
def csvimport_fixture(name):
    model_fp = csvimport_fixture_file(name, 'model.json')
    mapping_fp = csvimport_fixture_file(name, 'mapping.json')
    model = json.load(model_fp)
    if mapping_fp:
        model['mapping'] = json.load(mapping_fp)
    dataset = Dataset(model)
    dataset.generate()
    db.session.add(dataset)
    data_path = csvimport_fixture_path(name, 'data.csv')
    user = make_account()
    source = Source(dataset, user, data_path)
    db.session.add(source)
    db.session.commit()
    return source
Beispiel #4
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    model = model_fixture(name)
    dataset = Dataset(model)
    dataset.updated_at = datetime.utcnow()
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.generate()
    data = data_fixture(name)
    reader = csv.DictReader(data)
    for row in reader:
        entry = convert_types(model["mapping"], row)
        dataset.load(entry)
    data.close()
    dataset.commit()
    return dataset
Beispiel #5
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    model = model_fixture(name)
    dataset = Dataset(model)
    dataset.updated_at = datetime.utcnow()
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.generate()
    data = data_fixture(name)
    reader = csv.DictReader(data)
    for row in reader:
        entry = convert_types(model['mapping'], row)
        dataset.load(entry)
    data.close()
    dataset.commit()
    return dataset
Beispiel #6
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    from openspending.validation.data import convert_types
    fh = open(fixture_path('%s.js' % name), 'r')
    data = json.load(fh)
    fh.close()
    dataset = Dataset(data)
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.generate()
    fh = open(fixture_path('%s.csv' % name), 'r')
    reader = csv.DictReader(fh)
    for row in reader:
        entry = convert_types(data['mapping'], row)
        dataset.load(entry)
    fh.close()
    dataset.commit()
    return dataset
Beispiel #7
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    from openspending.validation.data import convert_types
    fh = fixture_file('%s.js' % name)
    data = json.load(fh)
    fh.close()
    dataset = Dataset(data)
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.generate()
    fh = fixture_file('%s.csv' % name)
    reader = csv.DictReader(fh)
    for row in reader:
        entry = convert_types(data['mapping'], row)
        dataset.load(entry)
    fh.close()
    dataset.commit()
    return dataset
Beispiel #8
0
class TestDatasetLoad(DatabaseTestCase):
    def setup(self):
        super(TestDatasetLoad, self).setup()
        self.ds = Dataset(SIMPLE_MODEL)
        self.ds.generate()
        self.engine = db.engine

    def test_load_all(self):
        load_dataset(self.ds)
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn) == 6, resn
        row0 = resn[0]
        assert row0["amount"] == 200, row0.items()
        assert row0["field"] == "foo", row0.items()

    def test_flush(self):
        load_dataset(self.ds)
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn) == 6, resn
        self.ds.flush()
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn) == 0, resn

    def test_drop(self):
        tn = self.engine.table_names()
        assert "test__entry" in tn, tn
        assert "test__to" in tn, tn
        assert "test__function" in tn, tn
        self.ds.drop()
        tn = self.engine.table_names()
        assert "test__entry" not in tn, tn
        assert "test__to" not in tn, tn
        assert "test__function" not in tn, tn

    def test_dataset_count(self):
        load_dataset(self.ds)
        assert len(self.ds) == 6, len(self.ds)

    def test_aggregate_simple(self):
        load_dataset(self.ds)
        res = self.ds.aggregate()
        assert res["summary"]["num_entries"] == 6, res
        assert res["summary"]["amount"] == 2690.0, res

    def test_aggregate_basic_cut(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(cuts=[("field", u"foo")])
        assert res["summary"]["num_entries"] == 3, res
        assert res["summary"]["amount"] == 1000, res

    def test_aggregate_or_cut(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(cuts=[("field", u"foo"), ("field", u"bar")])
        assert res["summary"]["num_entries"] == 4, res
        assert res["summary"]["amount"] == 1190, res

    def test_aggregate_dimensions_drilldown(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=["function"])
        assert res["summary"]["num_entries"] == 6, res
        assert res["summary"]["amount"] == 2690, res
        assert len(res["drilldown"]) == 2, res["drilldown"]

    def test_aggregate_two_dimensions_drilldown(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=["function", "field"])
        assert res["summary"]["num_entries"] == 6, res
        assert res["summary"]["amount"] == 2690, res
        assert len(res["drilldown"]) == 5, res["drilldown"]

    def test_aggregate_by_attribute(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=["function.label"])
        assert len(res["drilldown"]) == 2, res["drilldown"]

    def test_aggregate_two_attributes_same_dimension(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=["function.name", "function.label"])
        assert len(res["drilldown"]) == 2, res["drilldown"]

    def test_materialize_table(self):
        load_dataset(self.ds)
        itr = self.ds.entries()
        tbl = list(itr)
        assert len(tbl) == 6, len(tbl)
        row = tbl[0]
        assert isinstance(row["field"], unicode), row
        assert isinstance(row["function"], dict), row
        assert isinstance(row["to"], dict), row
Beispiel #9
0
class TestDatasetLoad(DatabaseTestCase):

    def setup(self):
        super(TestDatasetLoad, self).setup()
        self.ds = Dataset(SIMPLE_MODEL)
        self.ds.generate()
        self.engine = db.engine
    
    def test_load_all(self):
        load_dataset(self.ds)
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn)==6,resn
        row0 = resn[0]
        assert row0['amount']==200, row0.items()
        assert row0['field']=='foo', row0.items()
    
    def test_flush(self):
        load_dataset(self.ds)
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn)==6,resn
        self.ds.flush()
        resn = self.engine.execute(self.ds.table.select()).fetchall()
        assert len(resn)==0,resn
    
    def test_drop(self):
        tn = self.engine.table_names()
        assert 'test__entry' in tn, tn
        assert 'test__to' in tn, tn
        assert 'test__function' in tn, tn
        self.ds.drop()
        tn = self.engine.table_names()
        assert 'test__entry' not in tn, tn
        assert 'test__to' not in tn, tn
        assert 'test__function' not in tn, tn

    def test_dataset_count(self):
        load_dataset(self.ds)
        assert len(self.ds)==6,len(self.ds)

    def test_aggregate_simple(self):
        load_dataset(self.ds)
        res = self.ds.aggregate()
        assert res['summary']['num_entries']==6, res
        assert res['summary']['amount']==2690.0, res

    def test_aggregate_basic_cut(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(cuts=[('field', u'foo')])
        assert res['summary']['num_entries']==3, res
        assert res['summary']['amount']==1000, res

    def test_aggregate_or_cut(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(cuts=[('field', u'foo'), 
                                      ('field', u'bar')])
        assert res['summary']['num_entries']==4, res
        assert res['summary']['amount']==1190, res

    def test_aggregate_dimensions_drilldown(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=['function'])
        assert res['summary']['num_entries']==6, res
        assert res['summary']['amount']==2690, res
        assert len(res['drilldown'])==2, res['drilldown']

    def test_aggregate_two_dimensions_drilldown(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=['function', 'field'])
        assert res['summary']['num_entries']==6, res
        assert res['summary']['amount']==2690, res
        assert len(res['drilldown'])==5, res['drilldown']
    
    def test_aggregate_by_attribute(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=['function.label'])
        assert len(res['drilldown'])==2, res['drilldown']

    def test_aggregate_two_attributes_same_dimension(self):
        load_dataset(self.ds)
        res = self.ds.aggregate(drilldowns=['function.name', 'function.label'])
        assert len(res['drilldown'])==2, res['drilldown']

    def test_materialize_table(self):
        load_dataset(self.ds)
        itr = self.ds.entries()
        tbl = list(itr)
        assert len(tbl)==6, len(tbl)
        row = tbl[0]
        assert isinstance(row['field'], unicode), row
        assert isinstance(row['function'], dict), row
        assert isinstance(row['to'], dict), row
Beispiel #10
0
    dataset = Dataset.by_name(model['dataset']['name'])
    if dataset is None:
        dataset = Dataset(model)
        db.session.add(dataset)
    log.info("Dataset: %s", dataset.name)

    source = Source(dataset, shell_account(), csv_data_url)
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()

    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))
    return 0


def _csvimport(args):
    return csvimport(args.dataset_url, args)


def configure_parser(subparser):
    p = subparser.add_parser('csvimport',
                             help='Load a CSV dataset',
                             description='You must specify --model.',
                             parents=[import_parser])
    p.add_argument('--model',
Beispiel #11
0
    dataset = Dataset.by_name(model['dataset']['name'])
    if dataset is None:
        dataset = Dataset(model)
        db.session.add(dataset)
    log.info("Dataset: %s", dataset.name)

    source = Source(dataset, shell_account(), 
                    csv_data_url)
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()
    
    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))
    return 0

def _csvimport(args):
    return csvimport(args.dataset_url, args)

def configure_parser(subparser):
    p = subparser.add_parser('csvimport',
                             help='Load a CSV dataset',
                             description='You must specify --model.',
                             parents=[import_parser])
    p.add_argument('--model', action="store", dest='model',
                   default=None, metavar='url',
                   help="URL of JSON format model (metadata and mapping).")