Example #1
0
 def test_padded_ints_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'0001', u'0997', u'8.7', u''], normal_type=int)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 0)
         self.assertEqual(e.value, '0001')
         self.assertEqual(e.normal_type, int)
Example #2
0
 def test_ints_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 4)
         self.assertEqual(e.value, 'TRUE')
         self.assertEqual(e.normal_type, int)
Example #3
0
 def test_floats_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'1', u'-87.413', u'418000000.0', u'Hello, world!'], normal_type=float)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, 'Hello, world!')
         self.assertEqual(e.normal_type, float)
Example #4
0
 def test_nulls_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'n/a', u'NA', u'.', u'1.7', u'none', u''], normal_type=NoneType)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '1.7')
         self.assertEqual(e.normal_type, NoneType)
Example #5
0
    def test_padded_ints_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'0001', u'0997', u'8.7', u''], normal_type=int)

        self.assertEqual(e.exception.index, 0)
        self.assertEqual(e.exception.value, '0001')
        self.assertEqual(e.exception.normal_type, int)
Example #6
0
    def test_dates_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'4:45 AM'], normal_type=datetime.datetime)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '4:45 AM')
        self.assertEqual(e.exception.normal_type, datetime.datetime)
Example #7
0
    def test_nulls_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'n/a', u'NA', u'.', u'1.7', u'none', u''], normal_type=NoneType)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '1.7')
        self.assertEqual(e.exception.normal_type, NoneType)
Example #8
0
 def test_booleans_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '17')
         self.assertEqual(e.normal_type, bool)
Example #9
0
 def test_times_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'], normal_type=datetime.time)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '1,000,000')
         self.assertEqual(e.normal_type, datetime.time)
Example #10
0
    def test_booleans_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '17')
        self.assertEqual(e.exception.normal_type, bool)
Example #11
0
    def test_ints_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int)

        self.assertEqual(e.exception.index, 4)
        self.assertEqual(e.exception.value, 'TRUE')
        self.assertEqual(e.exception.normal_type, int)
Example #12
0
    def test_floats_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'1', u'-87.413', u'418000000.0', u'Hello, world!'], normal_type=float)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, 'Hello, world!')
        self.assertEqual(e.exception.normal_type, float)
Example #13
0
    def test_times_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'], normal_type=datetime.time)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '1,000,000')
        self.assertEqual(e.exception.normal_type, datetime.time)
Example #14
0
 def test_padded_ints_coerce_fail(self):
     try:
         typeinference.normalize_column_type(
             [u'0001', u'0997', u'8.7', u''], normal_type=int)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 0)
         self.assertEqual(e.value, '0001')
         self.assertEqual(e.normal_type, int)
Example #15
0
 def test_ints_coerce_fail(self):
     try:
         typeinference.normalize_column_type(
             [u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 4)
         self.assertEqual(e.value, 'TRUE')
         self.assertEqual(e.normal_type, int)
Example #16
0
 def test_booleans_coerce_fail(self):
     try:
         typeinference.normalize_column_type(
             [u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '17')
         self.assertEqual(e.normal_type, bool)
Example #17
0
    def test_ints_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type(
                [u'1', u'-87', u'418000000', u'', u'TRUE'], normal_type=int)

        self.assertEqual(e.exception.index, 4)
        self.assertEqual(e.exception.value, 'TRUE')
        self.assertEqual(e.exception.normal_type, int)
Example #18
0
    def test_booleans_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type(
                [u'False', u'TRUE', u'FALSE', u'17', u''], normal_type=bool)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '17')
        self.assertEqual(e.exception.normal_type, bool)
Example #19
0
    def test_padded_ints_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type(
                [u'0001', u'0997', u'8.7', u''], normal_type=int)

        self.assertEqual(e.exception.index, 0)
        self.assertEqual(e.exception.value, '0001')
        self.assertEqual(e.exception.normal_type, int)
Example #20
0
 def test_nulls_coerce_fail(self):
     try:
         typeinference.normalize_column_type(
             [u'n/a', u'NA', u'.', u'1.7', u'none', u''],
             normal_type=NoneType)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '1.7')
         self.assertEqual(e.normal_type, NoneType)
Example #21
0
    def test_times_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type(
                [u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'],
                normal_type=datetime.time)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '1,000,000')
        self.assertEqual(e.exception.normal_type, datetime.time)
Example #22
0
    def test_nulls_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type(
                [u'n/a', u'NA', u'.', u'1.7', u'none', u''],
                normal_type=NoneType)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '1.7')
        self.assertEqual(e.exception.normal_type, NoneType)
Example #23
0
 def test_floats_coerce_fail(self):
     try:
         typeinference.normalize_column_type(
             [u'1', u'-87.413', u'418000000.0', u'Hello, world!'],
             normal_type=float)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, 'Hello, world!')
         self.assertEqual(e.normal_type, float)
Example #24
0
    def test_floats_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type(
                [u'1', u'-87.413', u'418000000.0', u'Hello, world!'],
                normal_type=float)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, 'Hello, world!')
        self.assertEqual(e.exception.normal_type, float)
Example #25
0
 def test_dates_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'4:45 AM'], normal_type=datetime.datetime)
     except InvalidValueForTypeException as e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '4:45 AM')
         self.assertEqual(e.normal_type, datetime.datetime)
     else:
         raise AssertionError('Expected InvalidValueForTypeException')
Example #26
0
 def test_times_coerce_fail(self):
     try:
         typeinference.normalize_column_type(
             [u'4:40 AM', u'03:45:00', u'16:14:45', u'1,000,000'],
             normal_type=datetime.time)
     except InvalidValueForTypeException, e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '1,000,000')
         self.assertEqual(e.normal_type, datetime.time)
 def test_dates_coerce_fail(self):
     try:
         typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'4:45 AM'], normal_type=datetime.datetime)
     except InvalidValueForTypeException as e:
         self.assertEqual(e.index, 3)
         self.assertEqual(e.value, '4:45 AM')
         self.assertEqual(e.normal_type, datetime.datetime)
     else:
         raise AssertionError('Expected InvalidValueForTypeException')
Example #28
0
    def test_dates_coerce_fail(self):
        with self.assertRaises(InvalidValueForTypeException) as e:
            typeinference.normalize_column_type([
                u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00',
                u'3/1/08 16:14:45', u'4:45 AM'
            ],
                                                normal_type=datetime.datetime)

        self.assertEqual(e.exception.index, 3)
        self.assertEqual(e.exception.value, '4:45 AM')
        self.assertEqual(e.exception.normal_type, datetime.datetime)
 def test_jeremy_singer_vine_datetimes(self):
     """
     This obscure test named after Jeremy Singer-Vine, who discovered it.
     """
     self.assertEqual(
         (six.text_type, [u'P', u'H', u'H']),
         typeinference.normalize_column_type([u'P', u'H', u'H']))
Example #30
0
    def __init__(self,
                 order,
                 name,
                 l,
                 normal_type=InvalidType,
                 blanks_as_nulls=True,
                 infer_types=True):
        """
        Construct a column from a sequence of values.
        
        If normal_type is not InvalidType, inference will be skipped and values assumed to have already been normalized.
        If infer_types is False, type inference will be skipped and the type assumed to be unicode.
        """
        if normal_type != InvalidType:
            t = normal_type
            data = l
        elif not infer_types:
            t = six.text_type
            data = l
        else:
            t, data = typeinference.normalize_column_type(
                l, blanks_as_nulls=blanks_as_nulls)

        list.__init__(self, data)
        self.order = order
        self.name = name or '_unnamed'  # empty column names don't make sense
        self.type = t
Example #31
0
 def test_datetimes_and_times(self):
     self.assertEqual((unicode, [
         'Jan 1, 2008 at 4:40 AM', '2010-01-27T03:45:00', '16:14:45', None
     ]),
                      typeinference.normalize_column_type([
                          'Jan 1, 2008 at 4:40 AM', '2010-01-27T03:45:00',
                          '16:14:45', ''
                      ]))
 def test_times(self):
     self.assertEqual((datetime.time, [
         datetime.time(4, 40, 0),
         datetime.time(3, 45, 0),
         datetime.time(16, 14, 45), None
     ]),
                      typeinference.normalize_column_type(
                          [u'4:40 AM', u'03:45:00', u'16:14:45', u'']))
 def test_strings(self):
     self.assertEqual((six.text_type, [
         u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', None
     ]),
                      typeinference.normalize_column_type([
                          u'Chicago Tribune', u'435 N Michigan ave',
                          u'Chicago, IL', u''
                      ]))
 def __init__(self, row_number, column_name, value, normal_type):
     self.row_number = row_number
     self.column_name = column_name
     self.value = value
     self.normal_type = normal_type
     self.new_type = normalize_column_type([value])[0] 
     msg = 'Row %i, column "%s": Unable to convert "%s" to %s. New type is %s.' % (row_number, column_name, value, normal_type.__name__, self.new_type.__name__)
     super(InferredNormalFalsifiedException, self).__init__(msg)
 def test_dates(self):
     self.assertEqual((datetime.date, [
         datetime.date(2008, 1, 1),
         datetime.date(2010, 1, 27),
         datetime.date(2008, 3, 1), None
     ]),
                      typeinference.normalize_column_type(
                          [u'Jan 1, 2008', u'2010-01-27', u'3/1/08', u'']))
Example #36
0
 def test_strings_coerce(self):
     self.assertEqual(
         (unicode, [
             u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', None
         ]),
         typeinference.normalize_column_type([
             u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', u''
         ],
                                             normal_type=unicode))
 def test_datetimes_and_times(self):
     self.assertEqual((six.text_type, [
         u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45',
         None
     ]),
                      typeinference.normalize_column_type([
                          u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00',
                          u'16:14:45', u''
                      ]))
 def test_datetimes_and_dates(self):
     self.assertEqual((datetime.datetime, [
         datetime.datetime(2008, 1, 1, 4, 40, 0),
         datetime.datetime(2010, 1, 27, 3, 45, 0),
         datetime.datetime(2008, 3, 1, 0, 0, 0), None
     ]),
                      typeinference.normalize_column_type([
                          u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00',
                          u'3/1/08', u''
                      ]))
 def test_datetimes_coerce(self):
     self.assertEqual(
         (datetime.datetime, [
             datetime.datetime(2008, 1, 1, 4, 40, 0),
             datetime.datetime(2010, 1, 27, 3, 45, 0),
             datetime.datetime(2008, 3, 1, 16, 14, 45), None
         ]),
         typeinference.normalize_column_type([
             u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00',
             u'3/1/08 16:14:45', u''
         ],
                                             normal_type=datetime.datetime))
Example #40
0
 def __init__(self, order, name, l, normal_type=InvalidType, blanks_as_nulls=True, type_inference=True):
     """
     Construct a column from a sequence of values.
     
     If normal_type is not InvalidType, inference will be skipped and values assumed to have already been normalized.
     """
     if not type_inference or normal_type != InvalidType:
         t = normal_type
         data = l
     else:
         t, data = typeinference.normalize_column_type(l, blanks_as_nulls=blanks_as_nulls)
     
     list.__init__(self, data)
     self.order = order
     self.name = name or '_unnamed' # empty column names don't make sense 
     self.type = t
Example #41
0
    def __init__(self, order, name, l, normal_type=InvalidType):
        """
        Construct a column from a sequence of values.
        
        If normal_type is not InvalidType, inference will be skipped and values assumed to have already been normalized.
        """
        if normal_type != InvalidType:
            t = normal_type
            data = l
        else:
            t, data = typeinference.normalize_column_type(l)

        list.__init__(self, data)
        self.order = order
        self.name = name or '_unnamed'  # empty column names don't make sense
        self.type = t
Example #42
0
 def __init__(self, order, name, l, normal_type=InvalidType):
     """
     Construct a column from a sequence of values.
     
     If normal_type is not None, inference will be skipped and values assumed to have already been normalized.
     """
     if normal_type != InvalidType:
         t = normal_type
         data = l
     else:
         t, data = typeinference.normalize_column_type(l)
     
     list.__init__(self, data)
     self.order = order
     self.name = name or '_unnamed' # empty column names don't make sense 
     self.type = t
     
     self._compute_nullable() 
     self._compute_max_length()
Example #43
0
 def test_ints_floats(self):
     self.assertEqual((float, [1.01, -87, 418000000, None]), typeinference.normalize_column_type([u'1.01', u'-87', u'418000000', u'']))
Example #44
0
 def test_strings_coerce(self):
     self.assertEqual((unicode, [u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', None]), typeinference.normalize_column_type([u'Chicago Tribune', u'435 N Michigan ave', u'Chicago, IL', u''], normal_type=unicode))
Example #45
0
 def test_comma_floats(self):
     self.assertEqual((float, [1.01, -87.413, 418000000.0, None]), typeinference.normalize_column_type([u'1.01', u'-87.413', u'418,000,000.0', u'']))
Example #46
0
 def test_floats_coerce(self):
     self.assertEqual((float, [1.01, -87.413, 418000000.0, None]), typeinference.normalize_column_type([u'1.01', u'-87.413', u'418000000.0', u''], normal_type=float))
 def test_nulls_coerce(self):
     self.assertEqual((NoneType, [None, None, None, None, None, None]),
                      typeinference.normalize_column_type(
                          [u'n/a', u'NA', u'.', u'null', u'none', u''],
                          normal_type=NoneType))
 def test_ints_coerce(self):
     self.assertEqual((int, [1, -87, 418000000, None]),
                      typeinference.normalize_column_type(
                          [u'1', u'-87', u'418000000', u''],
                          normal_type=int))
Example #49
0
 def test_padded_ints_coerce(self):
     self.assertEqual((unicode, [u'0001', u'0997', u'8.7', None]), typeinference.normalize_column_type([u'0001', u'0997', u'8.7', u''], normal_type='unicode'))
 def test_comma_ints(self):
     self.assertEqual((int, [1, -87, 418000000, None]),
                      typeinference.normalize_column_type(
                          [u'1', u'-87', u'418,000,000', u'']))
Example #51
0
 def test_ints_coerce(self): 
     self.assertEqual((int, [1, -87, 418000000, None]), typeinference.normalize_column_type([u'1', u'-87', u'418000000', u''], normal_type=int))
Example #52
0
 def test_datetimes_and_times(self):
     self.assertEqual((unicode, [u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45', None]), typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'16:14:45', u'']))
Example #53
0
 def test_datetimes_and_dates_coerce(self):
     self.assertEqual((datetime.datetime, [datetime.datetime(2008, 1, 1, 4, 40, 0), datetime.datetime(2010, 1, 27, 3, 45, 0), datetime.datetime(2008, 3, 1, 0, 0, 0), None]), typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08', u''], normal_type=datetime.datetime))
Example #54
0
 def test_nulls_coerce(self):
     self.assertEqual((NoneType, [None, None, None, None, None, None]), typeinference.normalize_column_type([u'n/a', u'NA', u'.', u'null', u'none', u''], normal_type=NoneType))
Example #55
0
 def test_mixed(self):
     self.assertEqual((unicode, [u'Chicago Tribune', u'-87.413', u'418000000', None]), typeinference.normalize_column_type([u'Chicago Tribune', u'-87.413', u'418000000', u'']))
 def test_padded_ints_coerce(self):
     self.assertEqual((six.text_type, [u'0001', u'0997', u'8.7', None]),
                      typeinference.normalize_column_type(
                          [u'0001', u'0997', u'8.7', u''],
                          normal_type='six.text_type'))
Example #57
0
 def test_booleans_coerce(self):
     self.assertEqual((bool, [False, True, False, True, None]), typeinference.normalize_column_type([u'False', u'TRUE', u'FALSE', u'yes', u''], normal_type=bool))
Example #58
0
 def test_comma_ints(self):
     self.assertEqual((int, [1, -87, 418000000, None]), typeinference.normalize_column_type([u'1', u'-87', u'418,000,000', u'']))
Example #59
0
 def test_datetimes(self):
     self.assertEqual((datetime.datetime, [datetime.datetime(2008, 1, 1, 4, 40, 0), datetime.datetime(2010, 1, 27, 3, 45, 0), datetime.datetime(2008, 3, 1, 16, 14, 45), None]), typeinference.normalize_column_type([u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'']))
 def test_floats_coerce(self):
     self.assertEqual((float, [1.01, -87.413, 418000000.0, None]),
                      typeinference.normalize_column_type(
                          [u'1.01', u'-87.413', u'418000000.0', u''],
                          normal_type=float))