def test_duplicate_file(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') assert 'cra2.csv' == source.name, source.name fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') assert 'cra2-2.csv' == source.name, source.name
def upload(dataset): dataset = get_dataset(dataset) require.dataset.update(dataset) file_ = request.files.get('file') if not file_ or not file_.filename: raise BadRequest("You need to upload a file") source = extract_fileobj(dataset, fh=file_, file_name=file_.filename) load_from_source.delay(dataset.name, source.name) return jsonify(source_to_dict(dataset, source))
def test_transform_source(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') src = tasks.transform_source(self.ds, source.name) assert src.name == source.name, src.name rows = list(tasks.load_table(src)) assert len(rows) == 36, rows assert 'cofog1_label' in rows[1], rows[1] assert 'cofog1.label' not in rows[1], rows[1]
def test_load_data(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') tasks.transform_source(self.ds, source.name) tasks.load(self.ds, source.name) q = self.ds.fact_table.table.select() resn = db.engine.execute(q).fetchall() assert len(resn) == 36, resn
def test_field_detection(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') source = tasks.transform_source(self.ds, source.name) fields = source.meta.get('fields') assert len(fields) == 34, len(fields) assert 'amount' in fields, fields amt = fields.get('amount') assert amt['type'] == 'integer', amt
def test_transform_source(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') art = tasks.transform_source(self.ds, source.name) assert art.name == tasks.ARTIFACT_NAME, art.name rows = list(art.records()) assert len(rows) == 36, rows assert 'cofog1_label' in rows[1], rows[1] assert 'cofog1.label' not in rows[1], rows[1]
def test_load_data(self): fp = csvimport_fixture_file('../data', 'cra.csv') source = tasks.extract_fileobj(self.ds, fp, file_name='cra2.csv') tasks.transform_source(self.ds, source.name) tasks.load(self.ds, source.name) assert self.ds.fact_table.num_entries() == 36, \ self.ds.fact_table.num_entries() entries = list(self.ds.fact_table.entries()) assert len(entries) == 36, entries
def upload(dataset): dataset = get_dataset(dataset) require.dataset.update(dataset) file_ = request.files.get('file') if not file_ or not file_.filename: raise BadRequest("You need to upload a file") # TODO: consider copying this into a tempfile before upload to make # boto happy (it appears to be whacky in it's handling of flask uploads) source = extract_fileobj(dataset, fh=file_, file_name=file_.filename) load_from_source.delay(dataset.name, source.name) return jsonify(source_to_dict(dataset, source))
def sign(dataset): dataset = get_dataset(dataset) require.dataset.update(dataset) data = request_data() if not data.get('file_name'): raise BadRequest("You need to give a file name") data['mime_type'] = data.get('mime_type') or 'application/octet-stream' # create a stub: source = extract_fileobj(dataset, fh=StringIO(), file_name=data['file_name'], mime_type=data['mime_type']) # generate a policy document to replace with actual content: res = generate_s3_upload_policy(source, data['file_name'], data['mime_type']) return jsonify(res)