Ejemplo n.º 1
0
 def test_successful_import(self):
     data = csv_fixture("successful_import")
     model = csv_fixture_model()
     importer = CSVImporter(data, model)
     importer.run()
     dataset = Dataset.find_one()
     h.assert_true(dataset is not None, "Dataset should not be None")
     h.assert_equal(dataset.name, "test-csv")
     entries = list(Entry.find({"dataset.name": dataset.name}))
     h.assert_equal(len(entries), 4)
     entry = Entry.find_one({"provenance.line": 2})
     h.assert_true(entry is not None, "Entry with name could not be found")
     h.assert_equal(entry.amount, 130000.0)
Ejemplo n.º 2
0
    def test_successful_import_with_simple_testdata(self):
        data = csv_fixture("simple")
        model = csv_fixture_model(name="simple")
        importer = CSVImporter(data, model)
        importer.run()
        h.assert_equal(importer.errors, [])

        dataset = Dataset.find_one()
        h.assert_true(dataset is not None, "Dataset should not be None")

        entries = list(Entry.find({"dataset.name": dataset.name}))
        h.assert_equal(len(entries), 5)

        entry = entries[0]
        h.assert_equal(entry["from"]["label"], "Test From")
        h.assert_equal(entry["to"]["label"], "Test To")
        h.assert_equal(entry["time"]["unparsed"], "2010-01-01")
        h.assert_equal(entry["amount"], 100.00)
Ejemplo n.º 3
0
    def command(self):
        super(GraphCommand, self).command()

        if not nx:
            print "Could not load 'networkx' module, which is needed for graph"\
                  " command.\nHave you tried `pip install networkx`?"
            return 1

        if len(self.args) != 2:
            GraphCommand.parser.print_help()
            return 1

        dataset_name, file_name = self.args

        g = nx.DiGraph()
        edges = {}
        def _edge(f, t, w):
            ew = edges.get((f, t), 0.0)
            edges[(f, t)] = ew + w

        for entry in Entry.find({"dataset.name": dataset_name}):
            to = entry.get('to')
            if to.get('name') not in g:
                g.add_node(to.get('name'), label=to.get('label'),
                    type='entity', country=to.get('country', ''))
            from_ = entry.get('from')
            if from_.get('name') not in g:
                g.add_node(from_.get('name'), label=from_.get('label'),
                    type='entity', country=from_.get('country', ''))
            _edge(from_.get('name'), to.get('name'), entry.get('amount'))
            for k, v in entry.items():
                if k in ['time', 'dataset', 'from', 'to'] or not isinstance(v, dict):
                    continue
                if v.get('name') not in g:
                    _type = 'classifier'
                    if isinstance(v.get('ref'), dict):
                        _type = v.get('ref').get('$ref')
                    g.add_node(v.get('name'), label=v.get('label', v.get('name')),
                            type=_type)
                #_edge(v.get('name'), to.get('name'), entry.get('amount'))
                #_edge(from_.get('name'), v.get('name'), entry.get('amount'))
        for (f, t), w in edges.items():
            g.add_edge(f, t, weight=w)
        nx.write_graphml(g, file_name)
Ejemplo n.º 4
0
    def _test_dataset_dir(self, dir):
        data_csv = h.fixture_file("csv_import/%s/data.csv" % dir)
        mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir)

        dataset_name = unicode(dir)

        model = csv_fixture_model()
        model["mapping"] = json.load(mapping_json)
        model["dataset"]["name"] = dataset_name

        lines = self.count_lines_in_stream(data_csv) - 1

        importer = CSVImporter(data_csv, model)
        importer.run()

        assert len(importer.errors) == 0, "Import should not throw errors"

        # check correct number of entries
        entries = Entry.find({"dataset.name": dataset_name})
        assert entries.count() == lines
Ejemplo n.º 5
0
def times(dataset, time_axis):
    return sorted(Entry.find({'dataset.name': dataset}).distinct(time_axis))