def data(args):
    from colander import Invalid
    from openspending.validation.data import convert_types
    return_code = 0
    model = _validate_model(args.model)
    if model is None:
        return 1
    try:
        fh = open(args.csv_file, 'rb')
        for line in UnicodeDictReader(fh):
            try:
                convert_types(model['mapping'], line)
            except Invalid as errors:
                return_code = 1
                for error in errors.children:
                    value = error.value
                    if value and len(value) > 70:
                        value = value[:66] + ' ...'
                    message = "[Column '%s' -> Attribute '%s' " \
                        "(%s)]\n\t%s\n\t(Value: %s)\n" % (
                            error.column, error.node.name,
                            error.datatype, error.msg,
                            value)
                    print message.encode('utf-8')
    except Exception as ex:
        print unicode(ex).encode('utf-8')
        return 1
    if not return_code:
        print "OK: data validates for the model."
    return return_code
Example #2
0
    def process_line(self, line):
        if self.row_number % 1000 == 0:
            log.info('Imported %s lines' % self.row_number)

        try:
            data = convert_types(self.dataset.mapping.get('mapping', {}), line)
            if not self.dry_run:
                self.source.model.load(data)
            else:
                # Check uniqueness
                unique_value = ', '.join([unicode(data[k]) for k in self.key])
                if unique_value in self.unique_check:
                    # Log the error (with the unique key represented as
                    # a dictionary)
                    self.log_exception(
                        ValueError("Unique key constraint not met"),
                        error="%s is not a unique key" % unique_value)
                self.unique_check[unique_value] = True
        except Invalid as invalid:
            for child in invalid.children:
                self.log_invalid_data(child)
            if self.raise_errors:
                raise
        except Exception as ex:
            self.log_exception(ex)
            if self.raise_errors:
                raise
Example #3
0
def load_dataset(dataset):
    simple_model = model_fixture('simple')
    data = data_fixture('simple')
    reader = csv.DictReader(data)
    for row in reader:
        row = convert_types(simple_model['mapping'], row)
        dataset.model.load(row)
    data.close()
Example #4
0
def load_dataset(dataset):
    from StringIO import StringIO
    import csv
    from openspending.validation.data import convert_types
    reader = csv.DictReader(StringIO(TEST_DATA))
    for row in reader:
        row = convert_types(SIMPLE_MODEL['mapping'], row)
        dataset.load(row)
Example #5
0
def load_dataset(dataset):
    from StringIO import StringIO
    import csv
    from openspending.validation.data import convert_types
    reader = csv.DictReader(StringIO(TEST_DATA))
    for row in reader:
        row = convert_types(SIMPLE_MODEL['mapping'], row)
        dataset.load(row)
Example #6
0
def load_dataset(dataset):
    simple_model = model_fixture('simple')
    data = data_fixture('simple')
    reader = csv.DictReader(data)
    for row in reader:
        row = convert_types(simple_model['mapping'], row)
        dataset.load(row)
    data.close()
 def test_convert_dates_custom_format(self):
     mapping = {
                 "foo": {"column": "foo",
                         "format": "%d.%m.%Y", 
                         "datatype": "date"}
               }
     row = {"foo": "7.5.2010"}
     out = convert_types(mapping, row)
     assert out['foo']==datetime.date(2010, 5, 7)
 def test_convert_types_value(self):
     mapping = {
                 "foo": {"column": "foo", 
                        "datatype": "string"}
               }
     row = {"foo": "bar"}
     out = convert_types(mapping, row)
     assert isinstance(out, dict), out
     assert 'foo' in out, out
     assert out['foo']=='bar'
 def test_convert_types_casting(self):
     mapping = {
                 "foo": {"column": "foo", 
                        "datatype": "float"}
               }
     row = {"foo": "5.0"}
     out = convert_types(mapping, row)
     assert isinstance(out, dict), out
     assert 'foo' in out, out
     assert out['foo']==5.0
    def test_convert_dates(self):
        mapping = {
                    "foo": {"column": "foo", 
                           "datatype": "date"}
                  }
        row = {"foo": "2010"}
        out = convert_types(mapping, row)
        assert out['foo']==datetime.date(2010, 1, 1)
    
        row = {"foo": "2010-02"}
        out = convert_types(mapping, row)
        assert out['foo']==datetime.date(2010, 2, 1)
        
        row = {"foo": "2010-02-03"}
        out = convert_types(mapping, row)
        assert out['foo']==datetime.date(2010, 2, 3)

        row = {"foo": "2010-02-03Z"}
        out = convert_types(mapping, row)
        assert out['foo']==datetime.date(2010, 2, 3)
 def test_convert_types_compound(self):
     mapping = {
                 "foo": {"attributes": {
                     "name": {"column": "foo_name", 
                         "datatype": "string"},
                     "label": {"column": "foo_label", 
                         "datatype": "string"}
                     }
                 }
               }
     row = {"foo_name": "bar", "foo_label": "qux"}
     out = convert_types(mapping, row)
     assert isinstance(out, dict), out
     assert 'foo' in out, out
     assert isinstance(out['foo'], dict), out
     assert out['foo']['name']=='bar'
     assert out['foo']['label']=='qux'
Example #12
0
    def process_line(self, line):
        if self.row_number % 1000 == 0:
            log.info('Imported %s lines' % self.row_number)

        try:
            data = convert_types(self.dataset.mapping, line)
            if not self.dry_run:
                self.dataset.load(data)
        except Invalid as invalid:
            for child in invalid.children:
                self.log_invalid_data(child)
            if self.raise_errors:
                raise
        except Exception as ex:
            self.log_exception(ex)
            if self.raise_errors:
                raise
Example #13
0
    def process_line(self, line):
        if self.row_number % 1000 == 0:
            log.info('Imported %s lines' % self.row_number)

        try:
            data = convert_types(self.dataset.mapping, line)
            if not self.dry_run:
                self.dataset.load(data)
        except Invalid as invalid:
            for child in invalid.children:
                self.log_invalid_data(child)
            if self.raise_errors:
                raise
        except Exception as ex:
            self.log_exception(ex)
            if self.raise_errors:
                raise
Example #14
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    model = model_fixture(name)
    dataset = Dataset(model)
    dataset.updated_at = datetime.utcnow()
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.model.generate()
    data = data_fixture(name)
    reader = csv.DictReader(data)
    for row in reader:
        entry = convert_types(model['mapping'], row)
        dataset.model.load(entry)
    data.close()
    return dataset
Example #15
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    model = model_fixture(name)
    dataset = Dataset(model)
    dataset.updated_at = datetime.utcnow()
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.model.generate()
    data = data_fixture(name)
    reader = csv.DictReader(data)
    for row in reader:
        entry = convert_types(model['mapping'], row)
        dataset.model.load(entry)
    data.close()
    return dataset
Example #16
0
    def process_line(self, line):
        if self.row_number % 1000 == 0:
            log.info('Imported %s lines' % self.row_number)

        try:
            # {u'geom_time_id': u'0', u'country_level0': {u'countryid': u'Caribbean small stat
            # es', u'name': u'caribbean-small-states', u'label': u'Caribbean small states'}, u
            # 'amount': 27.1328588135588, u'theid': u'71', u'time': datetime.date(1977, 1, 1)}

            data = convert_types(self.dataset.mapping.get('mapping', {}), line)
            gid = self._match_country_id(data['country_level0'])
            data['geom_time_id'] = str(self._match_time_geom(
                gid, data['time']))

            # if not data['geom_time_id'] or data['geom_time_id'] == "0":
            #     logger = {'label': data['country_level0']['label']}
            #     self.log_invalid_countrytime(logger)

            if not self.dry_run:
                self.source.model.load(data)
            else:
                # Check uniqueness
                unique_value = ', '.join([unicode(data[k]) for k in self.key])
                if unique_value in self.unique_check:
                    # Log the error (with the unique key represented as
                    # a dictionary)
                    print "unique error probelm???????????????????"
                    self.log_exception(
                        ValueError("Unique key constraint not met"),
                        error="%s is not a unique key" % unique_value)
                self.unique_check[unique_value] = True
        except Invalid as invalid:
            for child in invalid.children:
                self.log_invalid_data(child)
            if self.raise_errors:
                raise
        except Exception as ex:
            self.log_exception(ex)
            if self.raise_errors:
                raise
Example #17
0
    def process_line(self, line):
        if self.row_number % 1000 == 0:
            log.info('Imported %s lines' % self.row_number)

        try:
# {u'geom_time_id': u'0', u'country_level0': {u'countryid': u'Caribbean small stat
# es', u'name': u'caribbean-small-states', u'label': u'Caribbean small states'}, u
# 'amount': 27.1328588135588, u'theid': u'71', u'time': datetime.date(1977, 1, 1)}

            data = convert_types(self.dataset.mapping.get('mapping', {}), line)
            gid = self._match_country_id(data['country_level0'])
            data['geom_time_id'] = str(self._match_time_geom(gid, data['time']))

            # if not data['geom_time_id'] or data['geom_time_id'] == "0":
            #     logger = {'label': data['country_level0']['label']}
            #     self.log_invalid_countrytime(logger)         

            if not self.dry_run:
                self.source.model.load(data)
            else:
                # Check uniqueness
                unique_value = ', '.join([unicode(data[k]) for k in self.key])
                if unique_value in self.unique_check:
                    # Log the error (with the unique key represented as
                    # a dictionary)
                    print "unique error probelm???????????????????"
                    self.log_exception(
                        ValueError("Unique key constraint not met"),
                        error="%s is not a unique key" % unique_value)
                self.unique_check[unique_value] = True
        except Invalid as invalid:
            for child in invalid.children:
                self.log_invalid_data(child)
            if self.raise_errors:
                raise
        except Exception as ex:
            self.log_exception(ex)
            if self.raise_errors:
                raise
Example #18
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    from openspending.validation.data import convert_types
    fh = fixture_file('%s.js' % name)
    data = json.load(fh)
    fh.close()
    dataset = Dataset(data)
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.generate()
    fh = fixture_file('%s.csv' % name)
    reader = csv.DictReader(fh)
    for row in reader:
        entry = convert_types(data['mapping'], row)
        dataset.load(entry)
    fh.close()
    dataset.commit()
    return dataset
Example #19
0
def load_fixture(name, manager=None):
    """
    Load fixture data into the database.
    """
    from openspending.validation.data import convert_types
    fh = open(fixture_path('%s.js' % name), 'r')
    data = json.load(fh)
    fh.close()
    dataset = Dataset(data)
    if manager is not None:
        dataset.managers.append(manager)
    db.session.add(dataset)
    db.session.commit()
    dataset.generate()
    fh = open(fixture_path('%s.csv' % name), 'r')
    reader = csv.DictReader(fh)
    for row in reader:
        entry = convert_types(data['mapping'], row)
        dataset.load(entry)
    fh.close()
    dataset.commit()
    return dataset