def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if source_name is None: return artifact = tasks.transform_source(dataset, source_name) if artifact is None: return tasks.load(dataset, source_name=source_name)
def test_load_data(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') tasks.transform_source(self.ds, source.name) tasks.load(self.ds, source.name) q = self.ds.fact_table.table.select() resn = db.engine.execute(q).fetchall() assert len(resn) == 36, resn
def test_load_data(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') tasks.transform_source(self.ds, source.name) tasks.load(self.ds, source.name) assert self.ds.fact_table.num_entries() == 36, \ self.ds.fact_table.num_entries() entries = list(self.ds.fact_table.entries()) assert len(entries) == 36, entries
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return if source_name is None: log.error("No source specified: %s", dataset_name) return source = tasks.transform_source(dataset, source_name) if source is None: return tasks.load(dataset, source_name=source_name)
def load_from_source(dataset_name, source_name): with flask_app.app_context(): dataset = Dataset.by_name(dataset_name) if dataset is None: log.error("Dataset not found: %s", dataset_name) return if source_name is None: log.error("No source specified: %s", dataset_name) return artifact = tasks.transform_source(dataset, source_name) if artifact is None: return tasks.load(dataset, source_name=source_name)
def _test_import(self, name, lines=None): dataset, url = import_fixture(name) data = urllib.urlopen(url) if lines is None: lines = self.count_lines_in_stream(data) - 1 # -1 for header row source = tasks.extract_url(dataset, url) tasks.transform_source(dataset, source.name) tasks.load(dataset, source_name=source.name) for run in db.session.query(Run).all(): assert run.status == Run.STATUS_COMPLETE, run # check correct number of entries dataset = db.session.query(Dataset).first() entries = list(dataset.fact_table.entries()) assert len(entries) == lines, len(entries)