def caster(self): '''Returns a function that takes a row that can be indexed by positions which returns a new row with all of the values cast to schema types. ''' from ambry.transform import CasterTransformBuilder bdr = CasterTransformBuilder() for c in self.columns: bdr.append(c.name, c.python_type) return bdr
def test_caster(self): from ambry.transform import CasterTransformBuilder, NonNegativeInt, NaturalInt import datetime ctb = CasterTransformBuilder() ctb.append('int',int) ctb.append('float',float) ctb.append('str',str) row, errors = ctb({'int':1,'float':2,'str':'3'}) self.assertIsInstance(row['int'],int) self.assertEquals(row['int'],1) self.assertTrue(isinstance(row['float'],float)) self.assertEquals(row['float'],2.0) self.assertTrue(isinstance(row['str'],unicode)) self.assertEquals(row['str'],'3') # Should be idempotent row, errors = ctb(row) self.assertTrue(isinstance(row['int'],int)) self.assertEquals(row['int'],1) self.assertTrue(isinstance(row['float'],float)) self.assertEquals(row['float'],2.0) self.assertTrue(isinstance(row['str'],unicode)) self.assertEquals(row['str'],'3') ctb = CasterTransformBuilder() ctb.append('date',datetime.date) ctb.append('time',datetime.time) ctb.append('datetime',datetime.datetime) row, errors = ctb({'int':1,'float':2,'str':'3'}) self.assertIsNone(row['date']) self.assertIsNone(row['time']) self.assertIsNone(row['datetime']) row, errors = ctb({'date':'1990-01-01','time':'10:52','datetime':'1990-01-01T12:30'}) self.assertTrue(isinstance(row['date'],datetime.date)) self.assertTrue(isinstance(row['time'],datetime.time)) self.assertTrue(isinstance(row['datetime'],datetime.datetime)) self.assertEquals(row['date'],datetime.date(1990, 1, 1)) self.assertEquals(row['time'],datetime.time(10, 52)) self.assertEquals(row['datetime'],datetime.datetime(1990, 1, 1, 12, 30)) # Should be idempotent row, errors = ctb(row) self.assertTrue(isinstance(row['date'],datetime.date)) self.assertTrue(isinstance(row['time'],datetime.time)) self.assertTrue(isinstance(row['datetime'],datetime.datetime)) # Case insensitive row, errors = ctb({'Date':'1990-01-01','Time':'10:52','Datetime':'1990-01-01T12:30'}) self.assertEquals(row['date'],datetime.date(1990, 1, 1)) self.assertEquals(row['time'],datetime.time(10, 52)) self.assertEquals(row['datetime'],datetime.datetime(1990, 1, 1, 12, 30)) # # Custom caster types # class UpperCaster(str): def __new__(cls, v): return str.__new__(cls, v.upper()) ctb = CasterTransformBuilder() ctb.append('int', int) ctb.append('float', float) ctb.append('str', UpperCaster) ctb.add_type(UpperCaster) row, errors = ctb({'int': 1, 'float': 2, 'str': 'three'}) self.assertEquals(row['str'], 'THREE') # # Handling Errors # ctb = CasterTransformBuilder() ctb.append('int', int) ctb.append('float', float) ctb.append('str', str) ctb.append('ni1', NaturalInt) ctb.append('ni2', NaturalInt) row, errors = ctb({'int': '.', 'float': 'a', 'str': '3', 'ni1': 0, 'ni2': 3 }, codify_cast_errors=True)