def test_successful_import(self): data = csv_fixture("successful_import") model = csv_fixture_model() importer = CSVImporter(data, model) importer.run() dataset = Dataset.find_one() h.assert_true(dataset is not None, "Dataset should not be None") h.assert_equal(dataset.name, "test-csv") entries = list(Entry.find({"dataset.name": dataset.name})) h.assert_equal(len(entries), 4) entry = Entry.find_one({"provenance.line": 2}) h.assert_true(entry is not None, "Entry with name could not be found") h.assert_equal(entry.amount, 130000.0)
def test_successful_import_with_simple_testdata(self): data = csv_fixture("simple") model = csv_fixture_model(name="simple") importer = CSVImporter(data, model) importer.run() h.assert_equal(importer.errors, []) dataset = Dataset.find_one() h.assert_true(dataset is not None, "Dataset should not be None") entries = list(Entry.find({"dataset.name": dataset.name})) h.assert_equal(len(entries), 5) entry = entries[0] h.assert_equal(entry["from"]["label"], "Test From") h.assert_equal(entry["to"]["label"], "Test To") h.assert_equal(entry["time"]["unparsed"], "2010-01-01") h.assert_equal(entry["amount"], 100.00)
def command(self): super(GraphCommand, self).command() if not nx: print "Could not load 'networkx' module, which is needed for graph"\ " command.\nHave you tried `pip install networkx`?" return 1 if len(self.args) != 2: GraphCommand.parser.print_help() return 1 dataset_name, file_name = self.args g = nx.DiGraph() edges = {} def _edge(f, t, w): ew = edges.get((f, t), 0.0) edges[(f, t)] = ew + w for entry in Entry.find({"dataset.name": dataset_name}): to = entry.get('to') if to.get('name') not in g: g.add_node(to.get('name'), label=to.get('label'), type='entity', country=to.get('country', '')) from_ = entry.get('from') if from_.get('name') not in g: g.add_node(from_.get('name'), label=from_.get('label'), type='entity', country=from_.get('country', '')) _edge(from_.get('name'), to.get('name'), entry.get('amount')) for k, v in entry.items(): if k in ['time', 'dataset', 'from', 'to'] or not isinstance(v, dict): continue if v.get('name') not in g: _type = 'classifier' if isinstance(v.get('ref'), dict): _type = v.get('ref').get('$ref') g.add_node(v.get('name'), label=v.get('label', v.get('name')), type=_type) #_edge(v.get('name'), to.get('name'), entry.get('amount')) #_edge(from_.get('name'), v.get('name'), entry.get('amount')) for (f, t), w in edges.items(): g.add_edge(f, t, weight=w) nx.write_graphml(g, file_name)
def _test_dataset_dir(self, dir): data_csv = h.fixture_file("csv_import/%s/data.csv" % dir) mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir) dataset_name = unicode(dir) model = csv_fixture_model() model["mapping"] = json.load(mapping_json) model["dataset"]["name"] = dataset_name lines = self.count_lines_in_stream(data_csv) - 1 importer = CSVImporter(data_csv, model) importer.run() assert len(importer.errors) == 0, "Import should not throw errors" # check correct number of entries entries = Entry.find({"dataset.name": dataset_name}) assert entries.count() == lines
def times(dataset, time_axis): return sorted(Entry.find({'dataset.name': dataset}).distinct(time_axis))