def test_load_data(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') tasks.transform_source(self.ds, source.name) tasks.load(self.ds, source.name) q = self.ds.fact_table.table.select() resn = db.engine.execute(q).fetchall() assert len(resn) == 36, resn
def test_load_data(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') tasks.transform_source(self.ds, source.name) tasks.load(self.ds, source.name) assert self.ds.fact_table.num_entries() == 36, \ self.ds.fact_table.num_entries() entries = list(self.ds.fact_table.entries()) assert len(entries) == 36, entries
def _test_import(self, name, lines=None): dataset, url = import_fixture(name) data = urllib.urlopen(url) if lines is None: lines = self.count_lines_in_stream(data) - 1 # -1 for header row source = tasks.extract_url(dataset, url) tasks.transform_source(dataset, source.name) tasks.load(dataset, source_name=source.name) for run in db.session.query(Run).all(): assert run.status == Run.STATUS_COMPLETE, run # check correct number of entries dataset = db.session.query(Dataset).first() entries = list(dataset.fact_table.entries()) assert len(entries) == lines, len(entries)
def test_transform_source(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') src = tasks.transform_source(self.ds, source.name) assert src.name == source.name, src.name rows = list(tasks.load_table(src)) assert len(rows) == 36, rows assert 'cofog1_label' in rows[1], rows[1] assert 'cofog1.label' not in rows[1], rows[1]
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if source_name is None: return artifact = tasks.transform_source(dataset, source_name) if artifact is None: return tasks.load(dataset, source_name=source_name)
def test_field_detection(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') source = tasks.transform_source(self.ds, source.name) fields = source.meta.get('fields') assert len(fields) == 34, len(fields) assert 'amount' in fields, fields amt = fields.get('amount') assert amt['type'] == 'integer', amt
def test_transform_source(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') art = tasks.transform_source(self.ds, source.name) assert art.name == tasks.ARTIFACT_NAME, art.name rows = list(art.records()) assert len(rows) == 36, rows assert 'cofog1_label' in rows[1], rows[1] assert 'cofog1.label' not in rows[1], rows[1]
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return if source_name is None: log.error("No source specified: %s", dataset_name) return source = tasks.transform_source(dataset, source_name) if source is None: return tasks.load(dataset, source_name=source_name)
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return if source_name is None: log.error("No source specified: %s", dataset_name) return artifact = tasks.transform_source(dataset, source_name) if artifact is None: return tasks.load(dataset, source_name=source_name)