class TestCompoundDimension(DatabaseTestCase): def setup(self): super(TestCompoundDimension, self).setup() self.engine = db.engine self.meta = db.metadata self.meta.bind = self.engine self.ds = Dataset(SIMPLE_MODEL) self.entity = self.ds['to'] self.classifier = self.ds['function'] def test_is_compound(self): h.assert_true(self.entity.is_compound) def test_basic_properties(self): assert self.entity.name=='to', self.entity.name assert self.classifier.name=='function', self.classifier.name def test_generated_tables(self): #assert not hasattr(self.entity, 'table'), self.entity #self.ds.generate() assert hasattr(self.entity, 'table'), self.entity assert self.entity.table.name=='test__' + self.entity.taxonomy, self.entity.table.name assert hasattr(self.entity, 'alias') assert self.entity.alias.name==self.entity.name, self.entity.alias.name cols = self.entity.table.c assert 'id' in cols assert_raises(KeyError, cols.__getitem__, 'field') def test_attributes_exist_on_object(self): assert len(self.entity.attributes)==2, self.entity.attributes assert_raises(KeyError, self.entity.__getitem__, 'field') assert self.entity['name'].name=='name' assert self.entity['name'].datatype=='id' def test_attributes_exist_on_table(self): assert hasattr(self.entity, 'table'), self.entity assert 'name' in self.entity.table.c, self.entity.table.c assert 'label' in self.entity.table.c, self.entity.table.c def test_members(self): self.ds.generate() self.entity.load(self.ds.bind, {'name': 'one', 'label': 'Label One'}) self.entity.load(self.ds.bind, {'name': 'two', 'label': 'Label Two'}) members = list(self.entity.members()) h.assert_equal(len(members), 2) members = list(self.entity.members(self.entity.alias.c.name == 'one')) h.assert_equal(len(members), 1)
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = h.make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def load_fixture(name, manager=None): """ Load fixture data into the database. """ model = model_fixture(name) dataset = Dataset(model) dataset.updated_at = datetime.utcnow() if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.generate() data = data_fixture(name) reader = csv.DictReader(data) for row in reader: entry = convert_types(model["mapping"], row) dataset.load(entry) data.close() dataset.commit() return dataset
def load_fixture(name, manager=None): """ Load fixture data into the database. """ model = model_fixture(name) dataset = Dataset(model) dataset.updated_at = datetime.utcnow() if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.generate() data = data_fixture(name) reader = csv.DictReader(data) for row in reader: entry = convert_types(model['mapping'], row) dataset.load(entry) data.close() dataset.commit() return dataset
def load_fixture(name, manager=None): """ Load fixture data into the database. """ from openspending.validation.data import convert_types fh = open(fixture_path('%s.js' % name), 'r') data = json.load(fh) fh.close() dataset = Dataset(data) if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.generate() fh = open(fixture_path('%s.csv' % name), 'r') reader = csv.DictReader(fh) for row in reader: entry = convert_types(data['mapping'], row) dataset.load(entry) fh.close() dataset.commit() return dataset
def load_fixture(name, manager=None): """ Load fixture data into the database. """ from openspending.validation.data import convert_types fh = fixture_file('%s.js' % name) data = json.load(fh) fh.close() dataset = Dataset(data) if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.generate() fh = fixture_file('%s.csv' % name) reader = csv.DictReader(fh) for row in reader: entry = convert_types(data['mapping'], row) dataset.load(entry) fh.close() dataset.commit() return dataset
class TestDatasetLoad(DatabaseTestCase): def setup(self): super(TestDatasetLoad, self).setup() self.ds = Dataset(SIMPLE_MODEL) self.ds.generate() self.engine = db.engine def test_load_all(self): load_dataset(self.ds) resn = self.engine.execute(self.ds.table.select()).fetchall() assert len(resn) == 6, resn row0 = resn[0] assert row0["amount"] == 200, row0.items() assert row0["field"] == "foo", row0.items() def test_flush(self): load_dataset(self.ds) resn = self.engine.execute(self.ds.table.select()).fetchall() assert len(resn) == 6, resn self.ds.flush() resn = self.engine.execute(self.ds.table.select()).fetchall() assert len(resn) == 0, resn def test_drop(self): tn = self.engine.table_names() assert "test__entry" in tn, tn assert "test__to" in tn, tn assert "test__function" in tn, tn self.ds.drop() tn = self.engine.table_names() assert "test__entry" not in tn, tn assert "test__to" not in tn, tn assert "test__function" not in tn, tn def test_dataset_count(self): load_dataset(self.ds) assert len(self.ds) == 6, len(self.ds) def test_aggregate_simple(self): load_dataset(self.ds) res = self.ds.aggregate() assert res["summary"]["num_entries"] == 6, res assert res["summary"]["amount"] == 2690.0, res def test_aggregate_basic_cut(self): load_dataset(self.ds) res = self.ds.aggregate(cuts=[("field", u"foo")]) assert res["summary"]["num_entries"] == 3, res assert res["summary"]["amount"] == 1000, res def test_aggregate_or_cut(self): load_dataset(self.ds) res = self.ds.aggregate(cuts=[("field", u"foo"), ("field", u"bar")]) assert res["summary"]["num_entries"] == 4, res assert res["summary"]["amount"] == 1190, res def test_aggregate_dimensions_drilldown(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=["function"]) assert res["summary"]["num_entries"] == 6, res assert res["summary"]["amount"] == 2690, res assert len(res["drilldown"]) == 2, res["drilldown"] def test_aggregate_two_dimensions_drilldown(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=["function", "field"]) assert res["summary"]["num_entries"] == 6, res assert res["summary"]["amount"] == 2690, res assert len(res["drilldown"]) == 5, res["drilldown"] def test_aggregate_by_attribute(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=["function.label"]) assert len(res["drilldown"]) == 2, res["drilldown"] def test_aggregate_two_attributes_same_dimension(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=["function.name", "function.label"]) assert len(res["drilldown"]) == 2, res["drilldown"] def test_materialize_table(self): load_dataset(self.ds) itr = self.ds.entries() tbl = list(itr) assert len(tbl) == 6, len(tbl) row = tbl[0] assert isinstance(row["field"], unicode), row assert isinstance(row["function"], dict), row assert isinstance(row["to"], dict), row
class TestDatasetLoad(DatabaseTestCase): def setup(self): super(TestDatasetLoad, self).setup() self.ds = Dataset(SIMPLE_MODEL) self.ds.generate() self.engine = db.engine def test_load_all(self): load_dataset(self.ds) resn = self.engine.execute(self.ds.table.select()).fetchall() assert len(resn)==6,resn row0 = resn[0] assert row0['amount']==200, row0.items() assert row0['field']=='foo', row0.items() def test_flush(self): load_dataset(self.ds) resn = self.engine.execute(self.ds.table.select()).fetchall() assert len(resn)==6,resn self.ds.flush() resn = self.engine.execute(self.ds.table.select()).fetchall() assert len(resn)==0,resn def test_drop(self): tn = self.engine.table_names() assert 'test__entry' in tn, tn assert 'test__to' in tn, tn assert 'test__function' in tn, tn self.ds.drop() tn = self.engine.table_names() assert 'test__entry' not in tn, tn assert 'test__to' not in tn, tn assert 'test__function' not in tn, tn def test_dataset_count(self): load_dataset(self.ds) assert len(self.ds)==6,len(self.ds) def test_aggregate_simple(self): load_dataset(self.ds) res = self.ds.aggregate() assert res['summary']['num_entries']==6, res assert res['summary']['amount']==2690.0, res def test_aggregate_basic_cut(self): load_dataset(self.ds) res = self.ds.aggregate(cuts=[('field', u'foo')]) assert res['summary']['num_entries']==3, res assert res['summary']['amount']==1000, res def test_aggregate_or_cut(self): load_dataset(self.ds) res = self.ds.aggregate(cuts=[('field', u'foo'), ('field', u'bar')]) assert res['summary']['num_entries']==4, res assert res['summary']['amount']==1190, res def test_aggregate_dimensions_drilldown(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=['function']) assert res['summary']['num_entries']==6, res assert res['summary']['amount']==2690, res assert len(res['drilldown'])==2, res['drilldown'] def test_aggregate_two_dimensions_drilldown(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=['function', 'field']) assert res['summary']['num_entries']==6, res assert res['summary']['amount']==2690, res assert len(res['drilldown'])==5, res['drilldown'] def test_aggregate_by_attribute(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=['function.label']) assert len(res['drilldown'])==2, res['drilldown'] def test_aggregate_two_attributes_same_dimension(self): load_dataset(self.ds) res = self.ds.aggregate(drilldowns=['function.name', 'function.label']) assert len(res['drilldown'])==2, res['drilldown'] def test_materialize_table(self): load_dataset(self.ds) itr = self.ds.entries() tbl = list(itr) assert len(tbl)==6, len(tbl) row = tbl[0] assert isinstance(row['field'], unicode), row assert isinstance(row['function'], dict), row assert isinstance(row['to'], dict), row
dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) db.session.add(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset, shell_account(), csv_data_url) for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) return 0 def _csvimport(args): return csvimport(args.dataset_url, args) def configure_parser(subparser): p = subparser.add_parser('csvimport', help='Load a CSV dataset', description='You must specify --model.', parents=[import_parser]) p.add_argument('--model',
dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) db.session.add(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset, shell_account(), csv_data_url) for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) return 0 def _csvimport(args): return csvimport(args.dataset_url, args) def configure_parser(subparser): p = subparser.add_parser('csvimport', help='Load a CSV dataset', description='You must specify --model.', parents=[import_parser]) p.add_argument('--model', action="store", dest='model', default=None, metavar='url', help="URL of JSON format model (metadata and mapping).")