def _test_file_with_model(self, data_filename, model, checks): data = csv_fixture(data_filename) importer = CSVImporter(data, model) importer.run(dry_run=True) for check in checks: check(self, importer)
def test_error_with_empty_additional_date(self): data, model = csvimport_fixture('empty_additional_date') importer = CSVImporter(data, model) importer.run() # We are currently not able to import date cells without a value. See: # http://trac.openspending.org/ticket/170 h.assert_equal(len(importer.errors), 1)
def command(self): super(CSVImportCommand, self).command() self._check_args_length(1) def json_of_url(url): return json.load(urllib2.urlopen(url)) csv_data_url = self.args.pop(0) have_model = self.options.model or (self.options.mapping and self.options.metadata) if not have_model: print("You must provide --model OR (--mapping AND --metadata)!", file=sys.stderr) return 1 if self.options.model: model = json_of_url(self.options.model) else: model = {} from openspending.ui.lib.mappingimporter import MappingImporter mi = MappingImporter() model["mapping"] = mi.import_from_url(self.options.mapping) model["dataset"] = json_of_url(self.options.metadata) csv = util.urlopen_lines(csv_data_url) importer = CSVImporter(csv, model, csv_data_url) try: importer.run(**self.get_args()) return 0 except ImporterError as e: log.error(e) return 1
def test_import_errors(self): data = csv_fixture("import_errors") model = csv_fixture_model() importer = CSVImporter(data, model) importer.run(dry_run=True) h.assert_true(len(importer.errors) > 1, "Should have errors") h.assert_equal(importer.errors[0].line_number, 1, "Should detect missing date colum in line 1")
def test_erroneous_values(self): data = csv_fixture("erroneous_values") model = csv_fixture_model() importer = CSVImporter(data, model) importer.run(dry_run=True) h.assert_equal(len(importer.errors), 1) h.assert_true("date" in importer.errors[0].message, "Should find badly formatted date") h.assert_equal(importer.errors[0].line_number, 5)
def test_no_dimensions_for_measures(self): data, dmodel = csvimport_fixture('simple') importer = CSVImporter(data, dmodel) importer.run() dataset = db.session.query(Dataset).first() dimensions = [str(d.name) for d in dataset.dimensions] h.assert_equal(sorted(dimensions), ['entry_id', 'from', 'time', 'to'])
def csv_import(resource_url, model_url, **kwargs): import urllib from openspending.lib import json from openspending.etl import util from openspending.etl.importer import CSVImporter model = json.load(urllib.urlopen(model_url)) csv = util.urlopen_lines(resource_url) importer = CSVImporter(csv, model, resource_url) importer.run(**kwargs)
def test_empty_csv(self): empty_data = StringIO("") model = csv_fixture_model() importer = CSVImporter(empty_data, model) importer.run(dry_run=True) h.assert_equal(len(importer.errors), 2) h.assert_equal(importer.errors[0].line_number, 0) h.assert_equal(importer.errors[1].line_number, 0) h.assert_true("Didn't read any lines of data" in str(importer.errors[1].message))
def test_successful_import(self): data = csv_fixture("successful_import") model = csv_fixture_model() importer = CSVImporter(data, model) importer.run() dataset = Dataset.find_one() h.assert_true(dataset is not None, "Dataset should not be None") h.assert_equal(dataset.name, "test-csv") entries = list(Entry.find({"dataset.name": dataset.name})) h.assert_equal(len(entries), 4) entry = Entry.find_one({"provenance.line": 2}) h.assert_true(entry is not None, "Entry with name could not be found") h.assert_equal(entry.amount, 130000.0)
def _test_import(self, name): data, dmodel = csvimport_fixture(name) lines = self.count_lines_in_stream(data) - 1 # -1 for header row importer = CSVImporter(data, dmodel) importer.run() h.assert_equal(len(importer.errors), 0) # check correct number of entries dataset = db.session.query(Dataset).first() entries = list(dataset.entries()) h.assert_equal(len(entries), lines)
def test_successful_import(self): data, dmodel = csvimport_fixture('successful_import') importer = CSVImporter(data, dmodel) importer.run() dataset = db.session.query(Dataset).first() h.assert_true(dataset is not None, "Dataset should not be None") h.assert_equal(dataset.name, "test-csv") entries = dataset.entries() h.assert_equal(len(list(entries)), 4) # TODO: provenance entry = list(dataset.entries(limit=1, offset=1)).pop() h.assert_true(entry is not None, "Entry with name could not be found") h.assert_equal(entry['amount'], 66097.77)
def test_successful_import_with_simple_testdata(self): data, dmodel = csvimport_fixture('simple') importer = CSVImporter(data, dmodel) importer.run() h.assert_equal(importer.errors, []) dataset = db.session.query(Dataset).first() h.assert_true(dataset is not None, "Dataset should not be None") entries = list(dataset.entries()) h.assert_equal(len(entries), 5) entry = entries[0] h.assert_equal(entry['from']['label'], 'Test From') h.assert_equal(entry['to']['label'], 'Test To') h.assert_equal(entry['time']['name'], '2010-01-01') h.assert_equal(entry['amount'], 100.00)
def csvimport(csv_data_url, args): def json_of_url(url): return json.load(urllib2.urlopen(url)) if args.model: model = json_of_url(args.model) else: print("You must provide --model!", file=sys.stderr) return 1 csv = util.urlopen_lines(csv_data_url) importer = CSVImporter(csv, model, csv_data_url) importer.run(**vars(args)) return 0
def test_successful_import_with_simple_testdata(self): data = csv_fixture("simple") model = csv_fixture_model(name="simple") importer = CSVImporter(data, model) importer.run() h.assert_equal(importer.errors, []) dataset = Dataset.find_one() h.assert_true(dataset is not None, "Dataset should not be None") entries = list(Entry.find({"dataset.name": dataset.name})) h.assert_equal(len(entries), 5) entry = entries[0] h.assert_equal(entry["from"]["label"], "Test From") h.assert_equal(entry["to"]["label"], "Test To") h.assert_equal(entry["time"]["unparsed"], "2010-01-01") h.assert_equal(entry["amount"], 100.00)
def _test_dataset_dir(self, dir): data_csv = h.fixture_file("csv_import/%s/data.csv" % dir) mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir) dataset_name = unicode(dir) model = csv_fixture_model() model["mapping"] = json.load(mapping_json) model["dataset"]["name"] = dataset_name lines = self.count_lines_in_stream(data_csv) - 1 importer = CSVImporter(data_csv, model) importer.run() assert len(importer.errors) == 0, "Import should not throw errors" # check correct number of entries entries = Entry.find({"dataset.name": dataset_name}) assert entries.count() == lines
def test_malformed_csv(self): data, model = csvimport_fixture('malformed') importer = CSVImporter(data, model) importer.run(dry_run=True) h.assert_equal(len(importer.errors), 1)