def view(dataset, id): dataset = get_dataset(dataset) run = obj_or_404(Run.by_id(dataset, id)) data = run.to_dict() package = data_manager.package(dataset.name) data['messages'] = list(logger.load(package, run.id)) return jsonify(data)
def test_load_from_url(self): tasks.load_from_url(self.dsn, self.cra_url) package = data_manager.package(self.dsn) sources = list(package.all(Source)) assert len(sources) == 1, sources src0 = sources[0] assert src0.meta['name'] == 'cra.csv', src0.meta.items()
def serve(dataset, name): dataset = get_dataset(dataset) package = data_manager.package(dataset.name) source = Source(package, name) if source.url is not None: return redirect(source.url) return send_file(source.fh(), mimetype=source.meta.get('mime_type'))
def csvimport_table(name): from spendb.core import data_manager from spendb.etl.extract import validate_table, load_table package = data_manager.package(uuid.uuid4().hex) source = package.ingest(data_fixture(name)) source = validate_table(source) rows = list(load_table(source)) return source.meta.get('fields'), rows
def load(dataset, name): dataset = get_dataset(dataset) require.dataset.update(dataset) package = data_manager.package(dataset.name) source = Source(package, name) if not source.exists(): raise BadRequest('Source does not exist.') load_from_source.delay(dataset.name, source.name) return jsonify({'status': 'ok'})
def test_extract_missing_url(self): url = csvimport_fixture_path('../data', 'xcra.csv') source = tasks.extract_url(self.ds, url) assert source is None, source run = db.session.query(Run).first() package = data_manager.package(self.ds.name) messages = list(logger.load(package, run.id)) assert len(messages) > 2, messages
def start(self): self.run = Run(self.operation, Run.STATUS_RUNNING, self.dataset) db.session.add(self.run) db.session.commit() self.package = data_manager.package(self.dataset.name) modules = [self.log, 'loadkit'] self.log_handler = capture(self.package, self.run.id, modules) self.log.info("Starting: %s", self.operation)
def index(dataset): dataset = get_dataset(dataset) package = data_manager.package(dataset.name) sources = list(package.all(Source)) sources = sorted(sources, key=lambda s: s.meta.get('updated_at'), reverse=True) rc = lambda ss: [source_to_dict(dataset, s) for s in ss] return jsonify(Pager(sources, dataset=dataset.name, limit=5, results_converter=rc))
def index(dataset): dataset = get_dataset(dataset) package = data_manager.package(dataset.name) sources = list(package.all(Source)) sources = sorted(sources, key=lambda s: s.meta.get('updated_at'), reverse=True) rc = lambda ss: [source_to_dict(dataset, s) for s in ss] return jsonify( Pager(sources, dataset=dataset.name, limit=5, results_converter=rc))
def test_manager(self): assert data_manager.collection is not None, data_manager.collection package = data_manager.package('cra') assert package.id == 'cra', package
def view(dataset, name): dataset = get_dataset(dataset) package = data_manager.package(dataset.name) source = Source(package, name) return jsonify(source_to_dict(dataset, source))