Ejemplo n.º 1
0
    def test_array_types(self):
        # This test need to make sure that the Scala type selected is at least
        # as large as the python's types. This is necessary because python's
        # array types depend on C implementation on the machine. Therefore there
        # is no machine independent correspondence between python's array types
        # and Scala types.
        # See: https://docs.python.org/2/library/array.html

        def assert_collect_success(typecode, value, element_type):
            self.assertEqual(element_type,
                             str(_infer_type(array.array(typecode, [value])).element_type))

        # supported string types
        #
        # String types in python's array are "u" for Py_UNICODE and "c" for char.
        # "u" will be removed in python 4, and "c" is not supported in python 3.
        supported_string_types = []
        if sys.version_info[0] < 4:
            supported_string_types += ['u']
            # test unicode
            assert_collect_success('u', u'a', 'CHAR')
        if sys.version_info[0] < 3:
            supported_string_types += ['c']
            # test string
            assert_collect_success('c', 'a', 'CHAR')

        # supported float and double
        #
        # Test max, min, and precision for float and double, assuming IEEE 754
        # floating-point format.
        supported_fractional_types = ['f', 'd']
        assert_collect_success('f', ctypes.c_float(1e+38).value, 'FLOAT')
        assert_collect_success('f', ctypes.c_float(1e-38).value, 'FLOAT')
        assert_collect_success('f', ctypes.c_float(1.123456).value, 'FLOAT')
        assert_collect_success('d', sys.float_info.max, 'DOUBLE')
        assert_collect_success('d', sys.float_info.min, 'DOUBLE')
        assert_collect_success('d', sys.float_info.epsilon, 'DOUBLE')

        def get_int_data_type(size):
            if size <= 8:
                return "TINYINT"
            if size <= 16:
                return "SMALLINT"
            if size <= 32:
                return "INT"
            if size <= 64:
                return "BIGINT"

        # supported signed int types
        #
        # The size of C types changes with implementation, we need to make sure
        # that there is no overflow error on the platform running this test.
        supported_signed_int_types = list(
            set(_array_signed_int_typecode_ctype_mappings.keys()).intersection(
                set(_array_type_mappings.keys())))
        for t in supported_signed_int_types:
            ctype = _array_signed_int_typecode_ctype_mappings[t]
            max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1)
            assert_collect_success(t, max_val - 1, get_int_data_type(ctypes.sizeof(ctype) * 8))
            assert_collect_success(t, -max_val, get_int_data_type(ctypes.sizeof(ctype) * 8))

        # supported unsigned int types
        #
        # JVM does not have unsigned types. We need to be very careful to make
        # sure that there is no overflow error.
        supported_unsigned_int_types = list(
            set(_array_unsigned_int_typecode_ctype_mappings.keys()).intersection(
                set(_array_type_mappings.keys())))
        for t in supported_unsigned_int_types:
            ctype = _array_unsigned_int_typecode_ctype_mappings[t]
            max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1)
            assert_collect_success(t, max_val, get_int_data_type(ctypes.sizeof(ctype) * 8 + 1))

        # all supported types
        #
        # Make sure the types tested above:
        # 1. are all supported types
        # 2. cover all supported types
        supported_types = (supported_string_types +
                           supported_fractional_types +
                           supported_signed_int_types +
                           supported_unsigned_int_types)
        self.assertEqual(set(supported_types), set(_array_type_mappings.keys()))

        # all unsupported types
        #
        # Keys in _array_type_mappings is a complete list of all supported types,
        # and types not in _array_type_mappings are considered unsupported.
        # `array.typecodes` are not supported in python 2.
        if sys.version_info[0] < 3:
            all_types = {'c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'}
        else:
            all_types = set(array.typecodes)
        unsupported_types = all_types - set(supported_types)
        # test unsupported types
        for t in unsupported_types:
            with self.assertRaises(TypeError):
                _infer_schema_from_data([Row(myarray=array.array(t))])
Ejemplo n.º 2
0
    def test_array_types(self):
        # This test need to make sure that the Scala type selected is at least
        # as large as the python's types. This is necessary because python's
        # array types depend on C implementation on the machine. Therefore there
        # is no machine independent correspondence between python's array types
        # and Scala types.
        # See: https://docs.python.org/2/library/array.html

        def assert_collect_success(typecode, value, element_type):
            self.assertEqual(
                element_type,
                str(_infer_type(array.array(typecode, [value])).element_type))

        # supported string types
        #
        # String types in python's array are "u" for Py_UNICODE and "c" for char.
        # "u" will be removed in python 4, and "c" is not supported in python 3.
        supported_string_types = []
        if sys.version_info[0] < 4:
            supported_string_types += ['u']
            # test unicode
            assert_collect_success('u', u'a', 'CHAR')

        # supported float and double
        #
        # Test max, min, and precision for float and double, assuming IEEE 754
        # floating-point format.
        supported_fractional_types = ['f', 'd']
        assert_collect_success('f', ctypes.c_float(1e+38).value, 'FLOAT')
        assert_collect_success('f', ctypes.c_float(1e-38).value, 'FLOAT')
        assert_collect_success('f', ctypes.c_float(1.123456).value, 'FLOAT')
        assert_collect_success('d', sys.float_info.max, 'DOUBLE')
        assert_collect_success('d', sys.float_info.min, 'DOUBLE')
        assert_collect_success('d', sys.float_info.epsilon, 'DOUBLE')

        def get_int_data_type(size):
            if size <= 8:
                return "TINYINT"
            if size <= 16:
                return "SMALLINT"
            if size <= 32:
                return "INT"
            if size <= 64:
                return "BIGINT"

        # supported signed int types
        #
        # The size of C types changes with implementation, we need to make sure
        # that there is no overflow error on the platform running this test.
        supported_signed_int_types = list(
            set(_array_signed_int_typecode_ctype_mappings.keys()).intersection(
                set(_array_type_mappings.keys())))
        for t in supported_signed_int_types:
            ctype = _array_signed_int_typecode_ctype_mappings[t]
            max_val = 2**(ctypes.sizeof(ctype) * 8 - 1)
            assert_collect_success(t, max_val - 1,
                                   get_int_data_type(ctypes.sizeof(ctype) * 8))
            assert_collect_success(t, -max_val,
                                   get_int_data_type(ctypes.sizeof(ctype) * 8))

        # supported unsigned int types
        #
        # JVM does not have unsigned types. We need to be very careful to make
        # sure that there is no overflow error.
        supported_unsigned_int_types = list(
            set(_array_unsigned_int_typecode_ctype_mappings.keys()).
            intersection(set(_array_type_mappings.keys())))
        for t in supported_unsigned_int_types:
            ctype = _array_unsigned_int_typecode_ctype_mappings[t]
            max_val = 2**(ctypes.sizeof(ctype) * 8 - 1)
            assert_collect_success(
                t, max_val, get_int_data_type(ctypes.sizeof(ctype) * 8 + 1))

        # all supported types
        #
        # Make sure the types tested above:
        # 1. are all supported types
        # 2. cover all supported types
        supported_types = (supported_string_types +
                           supported_fractional_types +
                           supported_signed_int_types +
                           supported_unsigned_int_types)
        self.assertEqual(set(supported_types),
                         set(_array_type_mappings.keys()))

        # all unsupported types
        #
        # Keys in _array_type_mappings is a complete list of all supported types,
        # and types not in _array_type_mappings are considered unsupported.
        all_types = set(array.typecodes)
        unsupported_types = all_types - set(supported_types)
        # test unsupported types
        for t in unsupported_types:
            with self.assertRaises(TypeError):
                _infer_schema_from_data([Row(myarray=array.array(t))])