Beispiel #1
0
def import_csv(dataset, url, args):
    """
    Import the csv data into the dataset
    """

    csv_data_url, source_url = url
    source = Source(dataset, shell_account(), csv_data_url)
    # Analyse the csv data and add it to the source
    # If we don't analyse it we'll be left with a weird message
    source.analysis = analyze_csv(csv_data_url)
    # Check to see if the dataset already has this source
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()

    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))

    # Check if imported from the file system (source and data url differ)
    if csv_data_url != source_url:
        # If we did, then we must update the source url based on the
        # sources in the dataset model (so we need to fetch the source again
        # or else we'll add a new one)
        source = Source.by_id(source.id)
        source.url = source_url
        db.session.commit()
Beispiel #2
0
    def setup(self):

        super(TestRunController, self).setup()
        self.source = csvimport_fixture('import_errors')
        self.source.dataset.managers.append(Account.by_name('test'))
        self.importer = CSVImporter(self.source)
        self.importer.run()
Beispiel #3
0
class TestRunController(ControllerTestCase):

    def setup(self):

        super(TestRunController, self).setup()
        self.source = csvimport_fixture('import_errors')
        self.source.dataset.managers.append(Account.by_name('test'))
        self.importer = CSVImporter(self.source)
        self.importer.run()

    def test_view_run(self):
        response = self.app.get(url(controller='run',
            action='view', dataset=self.source.dataset.name,
            source=self.source.id,
            id=self.importer._run.id),
            extra_environ={'REMOTE_USER': '******'},
            expect_errors=True)
        assert readable_url(self.source.url).encode('utf-8') in response.body

    def test_view_run_does_not_exist(self):
        response = self.app.get(url(controller='run',
            action='view', dataset=self.source.dataset.name,
            source=self.source.id,
            id=47347893),
            extra_environ={'REMOTE_USER': '******'},
            expect_errors=True)
        assert '404' in response.status, response.status
Beispiel #4
0
    def test_delete_successfully_loaded_source(self):
        """
        Test source removal with a source that has been successfully loaded.
        Removing a source that has been successfully loaded should not be
        possible.
        """

        # Add and import source without errors.
        # The source is added to a dataset called 'test-csv' (but
        # we'll just use source.dataset.name in case it changes)
        source = csvimport_fixture('successful_import')
        source.dataset.managers.append(Account.by_name('test'))
        importer = CSVImporter(source)
        importer.run()

        # Make sure the source is imported
        assert db.session.query(Source).filter_by(id=source.id).count() == 1, \
            "Import of csv failed. Source not found"

        # Delete the source
        response = self.app.post(url(controller='source',
                                     action='delete',
                                     dataset=source.dataset.name,
                                     id=source.id),
                                 extra_environ={'REMOTE_USER': '******'})

        # Check if source has been deleted
        assert db.session.query(Source).filter_by(id=source.id).count() == 1, \
            "Deleting source succeeded. The source is gone."
Beispiel #5
0
    def test_delete_source(self):
        """
        Test source removal with a source that includes errors
        """

        # Add and import source with errors (we want to remove it)
        # The source is added to a dataset called 'test-csv' (but
        # we'll just use source.dataset.name in case it changes)
        source = csvimport_fixture('import_errors')
        source.dataset.managers.append(Account.by_name('test'))
        importer = CSVImporter(source)
        importer.run()

        # Make sure the source is imported
        assert db.session.query(Source).filter_by(id=source.id).count() == 1, \
            "Import of csv failed. Source not found"

        # Delete the source
        response = self.app.post(url(controller='source',
                                     action='delete',
                                     dataset=source.dataset.name,
                                     id=source.id),
                                 extra_environ={'REMOTE_USER': '******'})

        # Check if source has been deleted
        assert db.session.query(Source).filter_by(id=source.id).count() == 0, \
            "Deleting source unsuccessful. Source still exists."
Beispiel #6
0
class TestRunController(ControllerTestCase):

    def setup(self):
        h.skip_if_stubbed_solr()

        super(TestRunController, self).setup()
        self.source = csvimport_fixture('import_errors')
        self.source.dataset.managers.append(Account.by_name('test'))
        self.importer = CSVImporter(self.source)
        self.importer.run()

    def test_view_run(self):
        response = self.app.get(url(controller='run', 
            action='view', dataset=self.source.dataset.name,
            source=self.source.id, 
            id=self.importer._run.id),
            extra_environ={'REMOTE_USER': '******'},
            expect_errors=True)
        assert self.source.name.encode('utf-8') in response.body
    
    def test_view_run_does_not_exist(self):
        response = self.app.get(url(controller='run', 
            action='view', dataset=self.source.dataset.name,
            source=self.source.id, 
            id=47347893),
            extra_environ={'REMOTE_USER': '******'},
            expect_errors=True)
        assert '404' in response.status, response.status
Beispiel #7
0
def import_csv(dataset, url, args):
    """
    Import the csv data into the dataset
    """

    csv_data_url, source_url = url 
    source = Source(dataset, shell_account(), 
                    csv_data_url)
    # Analyse the csv data and add it to the source
    # If we don't analyse it we'll be left with a weird message
    source.analysis = analyze_csv(csv_data_url)
    # Check to see if the dataset already has this source
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()
    
    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))

    # Check if imported from the file system (source and data url differ)
    if csv_data_url != source_url:
        # If we did, then we must update the source url based on the
        # sources in the dataset model (so we need to fetch the source again
        # or else we'll add a new one)
        source = Source.by_id(source.id)
        source.url = source_url
        db.session.commit()
Beispiel #8
0
 def test_error_with_empty_additional_date(self):
     source = csvimport_fixture('empty_additional_date')
     importer = CSVImporter(source)
     importer.run()
     # We are currently not able to import date cells without a value. See:
     # http://trac.openspending.org/ticket/170
     h.assert_equal(importer.errors, 1)
Beispiel #9
0
    def test_delete_successfully_loaded_source(self):
        """
        Test source removal with a source that has been successfully loaded.
        Removing a source that has been successfully loaded should not be
        possible.
        """

        # Add and import source without errors.
        # The source is added to a dataset called 'test-csv' (but
        # we'll just use source.dataset.name in case it changes)
        source = csvimport_fixture('successful_import')
        source.dataset.managers.append(Account.by_name('test'))
        importer = CSVImporter(source)
        importer.run()

        # Make sure the source is imported
        assert db.session.query(Source).filter_by(id=source.id).count() == 1, \
            "Import of csv failed. Source not found"

        # Delete the source
        response = self.app.post(url(controller='source',
                                     action='delete',
                                     dataset=source.dataset.name,
                                     id=source.id),
                                 extra_environ={'REMOTE_USER': '******'})

        # Check if source has been deleted
        assert db.session.query(Source).filter_by(id=source.id).count() == 1, \
            "Deleting source succeeded. The source is gone."
Beispiel #10
0
    def test_delete_source(self):
        """
        Test source removal with a source that includes errors
        """

        # Add and import source with errors (we want to remove it)
        # The source is added to a dataset called 'test-csv' (but
        # we'll just use source.dataset.name in case it changes)
        source = csvimport_fixture('import_errors')
        source.dataset.managers.append(Account.by_name('test'))
        importer = CSVImporter(source)
        importer.run()

        # Make sure the source is imported
        assert db.session.query(Source).filter_by(id=source.id).count() == 1, \
            "Import of csv failed. Source not found"

        # Delete the source
        response = self.app.post(url(controller='source',
                                     action='delete',
                                     dataset=source.dataset.name,
                                     id=source.id),
                                 extra_environ={'REMOTE_USER': '******'})

        # Check if source has been deleted
        assert db.session.query(Source).filter_by(id=source.id).count() == 0, \
            "Deleting source unsuccessful. Source still exists."
Beispiel #11
0
 def test_error_with_empty_additional_date(self):
     source = csvimport_fixture('empty_additional_date')
     importer = CSVImporter(source)
     importer.run()
     # We are currently not able to import date cells without a value. See:
     # http://trac.openspending.org/ticket/170
     h.assert_equal(importer.errors, 1)
Beispiel #12
0
    def test_no_dimensions_for_measures(self):
        source = csvimport_fixture('simple')
        importer = CSVImporter(source)
        importer.run()
        dataset = db.session.query(Dataset).first()

        dimensions = [str(d.name) for d in dataset.dimensions]
        assert sorted(dimensions) == ['entry_id', 'from', 'time', 'to']
Beispiel #13
0
    def test_no_dimensions_for_measures(self):
        source = csvimport_fixture('simple')
        importer = CSVImporter(source)
        importer.run()
        dataset = db.session.query(Dataset).first()

        dimensions = [str(d.name) for d in dataset.dimensions]
        h.assert_equal(sorted(dimensions), ['entry_id', 'from', 'time', 'to'])
Beispiel #14
0
 def test_erroneous_values(self):
     source = csvimport_fixture('erroneous_values')
     importer = CSVImporter(source)
     importer.run(dry_run=True)
     h.assert_equal(importer.errors, 2)
     records = list(importer._run.records)
     h.assert_true("time" in records[1].attribute,
                   "Should find badly formatted date")
     h.assert_equal(records[1].row, 5)
Beispiel #15
0
    def test_import_errors(self):
        source = csvimport_fixture('import_errors')

        importer = CSVImporter(source)
        importer.run(dry_run=True)
        h.assert_true(importer.errors > 1, "Should have errors")
        records = list(importer._run.records)
        h.assert_equal(records[0].row, 1,
                       "Should detect missing date colum in line 1")
Beispiel #16
0
    def test_import_errors(self):
        source = csvimport_fixture('import_errors')

        importer = CSVImporter(source)
        importer.run(dry_run=True)
        h.assert_true(importer.errors > 1, "Should have errors")
        records = list(importer._run.records)
        h.assert_equal(records[0].row, 1,
                       "Should detect missing date colum in line 1")
Beispiel #17
0
 def test_erroneous_values(self):
     source = csvimport_fixture('erroneous_values')
     importer = CSVImporter(source)
     importer.run(dry_run=True)
     h.assert_equal(importer.errors, 2)
     records = list(importer._run.records)
     h.assert_true("time" in records[1].attribute,
                   "Should find badly formatted date")
     h.assert_equal(records[1].row, 5)
Beispiel #18
0
    def test_empty_csv(self):
        source = csvimport_fixture('default')
        source.url = 'file:///dev/null'
        importer = CSVImporter(source)
        importer.run(dry_run=True)

        h.assert_equal(importer.errors, 2)
        records = list(importer._run.records)
        h.assert_equal(records[0].row, 0)
        h.assert_equal(records[1].row, 0)
        h.assert_true("Didn't read any lines of data" in str(records[1].message))
Beispiel #19
0
    def test_empty_csv(self):
        source = csvimport_fixture('default')
        source.url = 'file:///dev/null'
        importer = CSVImporter(source)
        importer.run(dry_run=True)

        h.assert_equal(importer.errors, 2)
        records = list(importer._run.records)
        h.assert_equal(records[0].row, 0)
        h.assert_equal(records[1].row, 0)
        h.assert_true("Didn't read any lines of data" in str(records[1].message))
Beispiel #20
0
    def test_successful_import(self):
        source = csvimport_fixture('successful_import')
        importer = CSVImporter(source)
        importer.run()
        dataset = db.session.query(Dataset).first()
        h.assert_true(dataset is not None, "Dataset should not be None")
        h.assert_equal(dataset.name, "test-csv")
        entries = dataset.entries()
        h.assert_equal(len(list(entries)), 4)

        # TODO: provenance
        entry = list(dataset.entries(limit=1, offset=1)).pop()
        h.assert_true(entry is not None, "Entry with name could not be found")
        h.assert_equal(entry['amount'], 66097.77)
Beispiel #21
0
    def _test_import(self, name):
        source = csvimport_fixture(name)
        data = open(source.url)
        lines = self.count_lines_in_stream(data) - 1 # -1 for header row

        importer = CSVImporter(source)
        importer.run()

        h.assert_equal(importer.errors, 0)

        # check correct number of entries
        dataset = db.session.query(Dataset).first()
        entries = list(dataset.entries())
        h.assert_equal(len(entries), lines)
Beispiel #22
0
    def _test_import(self, name):
        source = csvimport_fixture(name)
        data = open(source.url)
        lines = self.count_lines_in_stream(data) - 1  # -1 for header row

        importer = CSVImporter(source)
        importer.run()

        assert importer.errors == 0

        # check correct number of entries
        dataset = db.session.query(Dataset).first()
        entries = list(dataset.entries())
        assert len(entries) == lines
Beispiel #23
0
    def test_successful_import(self):
        source = csvimport_fixture('successful_import')
        importer = CSVImporter(source)
        importer.run()
        dataset = db.session.query(Dataset).first()
        h.assert_true(dataset is not None, "Dataset should not be None")
        h.assert_equal(dataset.name, "test-csv")
        entries = dataset.entries()
        h.assert_equal(len(list(entries)), 4)

        # TODO: provenance
        entry = list(dataset.entries(limit=1, offset=1)).pop()
        h.assert_true(entry is not None,
                      "Entry with name could not be found")
        h.assert_equal(entry['amount'], 66097.77)
Beispiel #24
0
    def test_erroneous_values(self):
        source = csvimport_fixture('erroneous_values')
        importer = CSVImporter(source)
        importer.run(dry_run=True)

        # Expected failures:
        # * unique key constraint not met (2x)
        # * amount cannot be parsed
        # * time cannot be parse
        h.assert_equal(importer.errors, 4)
        records = list(importer._run.records)
        # The fourth record should be about badly formed date
        h.assert_true("time" in records[3].attribute,
                      "Should find badly formatted date")
        # The row number of the badly formed date should be 5
        h.assert_equal(records[3].row, 5)
Beispiel #25
0
    def setup(self):

        super(TestRunController, self).setup()
        self.source = csvimport_fixture('import_errors')
        self.source.dataset.managers.append(Account.by_name('test'))
        self.importer = CSVImporter(self.source)
        self.importer.run()
Beispiel #26
0
    def test_successful_import_with_simple_testdata(self):
        source = csvimport_fixture('simple')
        importer = CSVImporter(source)
        importer.run()
        h.assert_equal(importer.errors, 0)

        dataset = db.session.query(Dataset).first()
        h.assert_true(dataset is not None, "Dataset should not be None")

        entries = list(dataset.entries())
        h.assert_equal(len(entries), 5)

        entry = entries[0]
        h.assert_equal(entry['from']['label'], 'Test From')
        h.assert_equal(entry['to']['label'], 'Test To')
        h.assert_equal(entry['time']['name'], '2010-01-01')
        h.assert_equal(entry['amount'], 100.00)
Beispiel #27
0
    def test_successful_import_with_simple_testdata(self):
        source = csvimport_fixture('simple')
        importer = CSVImporter(source)
        importer.run()
        assert importer.errors == 0

        dataset = db.session.query(Dataset).first()
        assert dataset is not None, "Dataset should not be None"

        entries = list(dataset.entries())
        assert len(entries) == 5

        entry = entries[0]
        assert entry['from']['label'] == 'Test From'
        assert entry['to']['label'] == 'Test To'
        assert entry['time']['name'] == '2010-01-01'
        assert entry['amount'] == 100.00
Beispiel #28
0
    def test_successful_import_with_simple_testdata(self):
        source = csvimport_fixture('simple')
        importer = CSVImporter(source)
        importer.run()
        assert importer.errors == 0

        dataset = db.session.query(Dataset).first()
        assert dataset is not None, "Dataset should not be None"

        entries = list(dataset.entries())
        assert len(entries) == 5

        entry = entries[0]
        assert entry['from']['label'] == 'Test From'
        assert entry['to']['label'] == 'Test To'
        assert entry['time']['name'] == '2010-01-01'
        assert entry['amount'] == 100.00
Beispiel #29
0
    def test_successful_import_with_simple_testdata(self):
        source = csvimport_fixture('simple')
        importer = CSVImporter(source)
        importer.run()
        h.assert_equal(importer.errors, 0)

        dataset = db.session.query(Dataset).first()
        h.assert_true(dataset is not None, "Dataset should not be None")

        entries = list(dataset.entries())
        h.assert_equal(len(entries), 5)

        entry = entries[0]
        h.assert_equal(entry['from']['label'], 'Test From')
        h.assert_equal(entry['to']['label'], 'Test To')
        h.assert_equal(entry['time']['name'], '2010-01-01')
        h.assert_equal(entry['amount'], 100.00)
Beispiel #30
0
    def test_erroneous_values(self):
        source = csvimport_fixture('erroneous_values')
        importer = CSVImporter(source)
        importer.run(dry_run=True)

        # Expected failures:
        # * unique key constraint not met (2x)
        # * amount cannot be parsed
        # * time cannot be parse
        assert importer.errors == 4

        records = list(importer._run.records)
        # The fourth record should be about badly formed date
        assert "time" in records[3].attribute, \
            "Should find badly formatted date"

        # The row number of the badly formed date should be 5
        assert records[3].row == 5
Beispiel #31
0
def load_source(source_id, sample=False):
    from openspending.model.source import Source
    from openspending.importer import CSVImporter
    source = Source.by_id(source_id)
    if not source:
        log.error("No such source: %s", source_id)

    if not source.loadable:
        log.error("Dataset has no mapping.")
        return

    source.dataset.generate()
    importer = CSVImporter(source)
    if sample:
        importer.run(dry_run=True, max_lines=1000, max_errors=1000)
    else:
        importer.run()
        index_dataset.delay(source.dataset.name)
Beispiel #32
0
def load_source(source_id, sample=False):
    from openspending.model import Source
    from openspending.importer import CSVImporter
    source = Source.by_id(source_id)
    if not source:
        log.error("No such source: %s", source_id)

    if not source.loadable:
        log.error("Dataset has no mapping.")
        return

    source.dataset.generate()
    importer = CSVImporter(source)
    if sample:
        importer.run(max_lines=1000, max_errors=1000)
    else:
        importer.run()
    index_dataset.delay(source.dataset.name)
Beispiel #33
0
 def test_error_with_empty_additional_date(self):
     source = csvimport_fixture('empty_additional_date')
     importer = CSVImporter(source)
     importer.run()
     assert importer.errors == 1
Beispiel #34
0
    dataset = Dataset.by_name(model['dataset']['name'])
    if dataset is None:
        dataset = Dataset(model)
        db.session.add(dataset)
    log.info("Dataset: %s", dataset.name)

    source = Source(dataset, shell_account(), csv_data_url)
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()

    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))
    return 0


def _csvimport(args):
    return csvimport(args.dataset_url, args)


def configure_parser(subparser):
    p = subparser.add_parser('csvimport',
                             help='Load a CSV dataset',
                             description='You must specify --model.',
                             parents=[import_parser])
    p.add_argument('--model',
                   action="store",
Beispiel #35
0
 def test_quoting(self):
     source = csvimport_fixture('quoting')
     importer = CSVImporter(source)
     importer.run()
     h.assert_equal(importer.errors, 0)
Beispiel #36
0
 def test_malformed_csv(self):
     source = csvimport_fixture('malformed')
     importer = CSVImporter(source)
     importer.run(dry_run=True)
     assert importer.errors == 1
Beispiel #37
0
 def test_quoting(self):
     source = csvimport_fixture('quoting')
     importer = CSVImporter(source)
     importer.run()
     h.assert_equal(importer.errors, 0)
Beispiel #38
0
 def test_error_with_empty_additional_date(self):
     source = csvimport_fixture('empty_additional_date')
     importer = CSVImporter(source)
     importer.run()
     assert importer.errors == 1
Beispiel #39
0
 def test_malformed_csv(self):
     source = csvimport_fixture('malformed')
     importer = CSVImporter(source)
     importer.run(dry_run=True)
     h.assert_equal(importer.errors, 1)
Beispiel #40
0
 def test_quoting(self):
     source = csvimport_fixture('quoting')
     importer = CSVImporter(source)
     importer.run()
     assert importer.errors == 0
Beispiel #41
0
 def test_quoting(self):
     source = csvimport_fixture('quoting')
     importer = CSVImporter(source)
     importer.run()
     assert importer.errors == 0
Beispiel #42
0
    if dataset is None:
        dataset = Dataset(model)
        db.session.add(dataset)
    log.info("Dataset: %s", dataset.name)

    source = Source(dataset, shell_account(), 
                    csv_data_url)
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()
    
    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))
    return 0

def _csvimport(args):
    return csvimport(args.dataset_url, args)

def configure_parser(subparser):
    p = subparser.add_parser('csvimport',
                             help='Load a CSV dataset',
                             description='You must specify --model.',
                             parents=[import_parser])
    p.add_argument('--model', action="store", dest='model',
                   default=None, metavar='url',
                   help="URL of JSON format model (metadata and mapping).")
    p.add_argument('dataset_url', help="Dataset file URL")