Esempio n. 1
0
def test_cast_row_wrong_type_multiple_errors_handled():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', 'notdecimal', '10.6', 'string', 'string']
    target = ['string', 'notdecimal', '10.6', 'string', 'string']
    errors = []

    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))

    actual = schema.cast_row(source, exc_handler=handler)
    assert actual == target
    assert isinstance(actual[1], FailedCast)
    assert isinstance(actual[2], FailedCast)
    assert len(errors) == 1
    expect_row_data = OrderedDict([('id', 'string'), ('height', 'notdecimal'),
                                   ('age', '10.6'), ('name', 'string'),
                                   ('occupation', 'string')])
    expect_error_data = OrderedDict([('height', 'notdecimal'),
                                     ('age', '10.6')])
    _check_error(errors[0],
                 expect_exc_class=exceptions.CastError,
                 expect_exc_str='There are 2 cast errors',
                 expect_row_number=None,
                 expect_row_data=expect_row_data,
                 expect_error_data=expect_error_data)
    exc = errors[0][0]
    assert len(exc.errors) == 2
Esempio n. 2
0
def test_cast_row_handled():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string', 'string']
    target = ['string', Decimal(10.0), 1, 'string', 'string']
    errors = []
    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))
    assert schema.cast_row(source, exc_handler=handler) == target
    assert len(errors) == 0
Esempio n. 3
0
def test_cast_row_null_values_handled():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '', '-', 'string', 'null']
    target = ['string', None, None, 'string', None]
    errors = []
    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))
    assert schema.cast_row(source, exc_handler=handler) == target
    assert len(errors) == 0
Esempio n. 4
0
class ResourceIterator(object):
    def __init__(self, infile, spec, orig_spec, validate=False, debug=False):
        self.spec = spec
        self.table_schema = Schema(orig_spec['schema'])
        self.field_names = [f['name'] for f in orig_spec['schema']['fields']]
        self.validate = validate
        self.infile = infile
        self.debug = debug
        self.stopped = False

    def __iter__(self):
        return self

    def __next__(self):
        if self.stopped:
            raise StopIteration()
        if self.debug:
            logging.error('WAITING')
        line = self.infile.readline().strip()
        if self.debug:
            logging.error('INGESTING: %r', line)
        if line == '':
            self.stopped = True
            raise StopIteration()
        line = json.loadl(line)
        if self.validate:
            to_validate = [line.get(f) for f in self.field_names]
            try:
                self.table_schema.cast_row(to_validate)
            except CastError as e:
                logging.error('Failed to validate row: %s', e)
                for i, err in enumerate(e.errors):
                    logging.error('%d) %s', i + 1, err.message)
                raise ValueError('Casting failed for row %r' % line) from e
            except TypeError as e:
                raise ValueError('Validation failed for row %r' % line) from e

        return line

    def next(self):
        return self.__next__()
Esempio n. 5
0
def test_cast_row_too_short_handled():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string']
    # Missing values get substituted by None
    target = ['string', Decimal(10.0), 1, 'string', None]
    errors = []
    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))
    assert schema.cast_row(source, exc_handler=handler) == target
    assert len(errors) == 1
    expect_row_data = OrderedDict(
        [('id', 'string'), ('height', '10.0'), ('age', '1'),
         ('name', 'string'), ('occupation', None)])
    _check_error(
        errors[0], expect_exc_class=exceptions.CastError,
        expect_exc_str='Row length', expect_row_number=None,
        expect_row_data=expect_row_data, expect_error_data=expect_row_data)
Esempio n. 6
0
def test_cast_row_too_long_handled():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string', 'string', 'string']
    # superfluous values are left out
    target = ['string', Decimal(10.0), 1, 'string', 'string']
    errors = []
    def handler(exc, row_number, row_data, error_data):
        errors.append((exc, row_number, row_data, error_data))
    assert schema.cast_row(source, exc_handler=handler) == target
    assert len(errors) == 1
    # superfluous values are keyed with col num for error reporting
    expect_row_data = OrderedDict(
        [('id', 'string'), ('height', '10.0'), ('age', '1'),
         ('name', 'string'), ('occupation', 'string'),
         ('tableschema-cast-error-extra-col-6', 'string')])
    _check_error(
        errors[0], expect_exc_class=exceptions.CastError,
        expect_exc_str='Row length', expect_row_number=None,
        expect_row_data=expect_row_data, expect_error_data=expect_row_data)
Esempio n. 7
0
    def _iter_rows(self):
        if self._schema is not None:  # Not empty results
            schema_obj = Schema(self._schema)
            if 'results' in self.raw_data:
                field_names = [field.name for field in schema_obj.fields]
                result_vars = self.raw_data['head']['vars']

                for binding in self.raw_data['results']['bindings']:
                    rdf_terms = table_schema.order_terms_in_binding(
                        result_vars, binding)

                    values = []
                    for rdf_term in rdf_terms:
                        if rdf_term is not None:
                            values.append(rdf_term['value'])
                        else:
                            values.append(None)

                    table_row = schema_obj.cast_row(values)

                    # when the column is a string value, the jsontableschema
                    # library is incorrectly mapping the several literal
                    # string values ('null', 'none', '-', etc.) to the python
                    # `None` value - a deeper fix might be to reconsider using
                    # that library, or maybe fixing this issue in that
                    # library (since it's probably not a good idea to render
                    # a number of strings un-representable) - this fixes the
                    # problem for our result sets.  Essentially, this zips
                    # over each result set and checks whether we mapped a
                    # non-null value to `None` in a string field, and if
                    # so it restores the non-null value before continuing
                    table_row = map(
                        lambda field, original, mapped: original
                        if (not mapped) and original and field.type == 'string'
                        else mapped, schema_obj.fields, values, table_row)

                    yield OrderedDict(zip(field_names, table_row))
            elif 'boolean' in self.raw_data:
                # Results of an ASK query
                yield {'boolean': self.raw_data['boolean']}
Esempio n. 8
0
def test_cast_row():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string', 'string']
    target = ['string', Decimal(10.0), 1, 'string', 'string']
    assert schema.cast_row(source) == target
Esempio n. 9
0
def test_cast_row_wrong_type_multiple_errors():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', 'notdecimal', '10.6', 'string', 'string']
    with pytest.raises(exceptions.CastError) as excinfo:
        schema.cast_row(source)
    assert len(excinfo.value.errors) == 2
Esempio n. 10
0
def test_cast_row_wrong_type():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', 'notdecimal', '10.6', 'string', 'string']
    with pytest.raises(exceptions.CastError):
        schema.cast_row(source)
Esempio n. 11
0
def test_cast_row_wrong_type_multiple_errors():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', 'notdecimal', '10.6', 'string', 'string']
    with pytest.raises(exceptions.CastError) as excinfo:
        schema.cast_row(source)
    assert len(excinfo.value.errors) == 2
Esempio n. 12
0
def test_cast_row_wrong_type():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', 'notdecimal', '10.6', 'string', 'string']
    with pytest.raises(exceptions.CastError):
        schema.cast_row(source)
Esempio n. 13
0
def test_cast_row_too_long():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string', 'string', 'string']
    with pytest.raises(exceptions.CastError):
        schema.cast_row(source)
Esempio n. 14
0
def test_cast_row():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string', 'string']
    target = ['string', Decimal(10.0), 1, 'string', 'string']
    assert schema.cast_row(source) == target
Esempio n. 15
0
def test_cast_row_null_values():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '', '-', 'string', 'null']
    target = ['string', None, None, 'string', None]
    assert schema.cast_row(source) == target
Esempio n. 16
0
def test_cast_row_null_values():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '', '-', 'string', 'null']
    target = ['string', None, None, 'string', None]
    assert schema.cast_row(source) == target
Esempio n. 17
0
def test_cast_row_too_long():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', '10.0', '1', 'string', 'string', 'string']
    with pytest.raises(exceptions.CastError):
        schema.cast_row(source)
Esempio n. 18
0
def test_cast_row_wrong_type_no_fail_fast_true():
    schema = Schema(DESCRIPTOR_MAX)
    source = ['string', 'notdecimal', '10.6', 'string', 'string']
    with pytest.raises(exceptions.MultipleInvalid):
        schema.cast_row(source, no_fail_fast=True)