def test_array_types(self): # This test need to make sure that the Scala type selected is at least # as large as the python's types. This is necessary because python's # array types depend on C implementation on the machine. Therefore there # is no machine independent correspondence between python's array types # and Scala types. # See: https://docs.python.org/2/library/array.html def assert_collect_success(typecode, value, element_type): self.assertEqual(element_type, str(_infer_type(array.array(typecode, [value])).element_type)) # supported string types # # String types in python's array are "u" for Py_UNICODE and "c" for char. # "u" will be removed in python 4, and "c" is not supported in python 3. supported_string_types = [] if sys.version_info[0] < 4: supported_string_types += ['u'] # test unicode assert_collect_success('u', u'a', 'CHAR') if sys.version_info[0] < 3: supported_string_types += ['c'] # test string assert_collect_success('c', 'a', 'CHAR') # supported float and double # # Test max, min, and precision for float and double, assuming IEEE 754 # floating-point format. supported_fractional_types = ['f', 'd'] assert_collect_success('f', ctypes.c_float(1e+38).value, 'FLOAT') assert_collect_success('f', ctypes.c_float(1e-38).value, 'FLOAT') assert_collect_success('f', ctypes.c_float(1.123456).value, 'FLOAT') assert_collect_success('d', sys.float_info.max, 'DOUBLE') assert_collect_success('d', sys.float_info.min, 'DOUBLE') assert_collect_success('d', sys.float_info.epsilon, 'DOUBLE') def get_int_data_type(size): if size <= 8: return "TINYINT" if size <= 16: return "SMALLINT" if size <= 32: return "INT" if size <= 64: return "BIGINT" # supported signed int types # # The size of C types changes with implementation, we need to make sure # that there is no overflow error on the platform running this test. supported_signed_int_types = list( set(_array_signed_int_typecode_ctype_mappings.keys()).intersection( set(_array_type_mappings.keys()))) for t in supported_signed_int_types: ctype = _array_signed_int_typecode_ctype_mappings[t] max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1) assert_collect_success(t, max_val - 1, get_int_data_type(ctypes.sizeof(ctype) * 8)) assert_collect_success(t, -max_val, get_int_data_type(ctypes.sizeof(ctype) * 8)) # supported unsigned int types # # JVM does not have unsigned types. We need to be very careful to make # sure that there is no overflow error. supported_unsigned_int_types = list( set(_array_unsigned_int_typecode_ctype_mappings.keys()).intersection( set(_array_type_mappings.keys()))) for t in supported_unsigned_int_types: ctype = _array_unsigned_int_typecode_ctype_mappings[t] max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1) assert_collect_success(t, max_val, get_int_data_type(ctypes.sizeof(ctype) * 8 + 1)) # all supported types # # Make sure the types tested above: # 1. are all supported types # 2. cover all supported types supported_types = (supported_string_types + supported_fractional_types + supported_signed_int_types + supported_unsigned_int_types) self.assertEqual(set(supported_types), set(_array_type_mappings.keys())) # all unsupported types # # Keys in _array_type_mappings is a complete list of all supported types, # and types not in _array_type_mappings are considered unsupported. # `array.typecodes` are not supported in python 2. if sys.version_info[0] < 3: all_types = {'c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'} else: all_types = set(array.typecodes) unsupported_types = all_types - set(supported_types) # test unsupported types for t in unsupported_types: with self.assertRaises(TypeError): _infer_schema_from_data([Row(myarray=array.array(t))])
def test_array_types(self): # This test need to make sure that the Scala type selected is at least # as large as the python's types. This is necessary because python's # array types depend on C implementation on the machine. Therefore there # is no machine independent correspondence between python's array types # and Scala types. # See: https://docs.python.org/2/library/array.html def assert_collect_success(typecode, value, element_type): self.assertEqual( element_type, str(_infer_type(array.array(typecode, [value])).element_type)) # supported string types # # String types in python's array are "u" for Py_UNICODE and "c" for char. # "u" will be removed in python 4, and "c" is not supported in python 3. supported_string_types = [] if sys.version_info[0] < 4: supported_string_types += ['u'] # test unicode assert_collect_success('u', u'a', 'CHAR') # supported float and double # # Test max, min, and precision for float and double, assuming IEEE 754 # floating-point format. supported_fractional_types = ['f', 'd'] assert_collect_success('f', ctypes.c_float(1e+38).value, 'FLOAT') assert_collect_success('f', ctypes.c_float(1e-38).value, 'FLOAT') assert_collect_success('f', ctypes.c_float(1.123456).value, 'FLOAT') assert_collect_success('d', sys.float_info.max, 'DOUBLE') assert_collect_success('d', sys.float_info.min, 'DOUBLE') assert_collect_success('d', sys.float_info.epsilon, 'DOUBLE') def get_int_data_type(size): if size <= 8: return "TINYINT" if size <= 16: return "SMALLINT" if size <= 32: return "INT" if size <= 64: return "BIGINT" # supported signed int types # # The size of C types changes with implementation, we need to make sure # that there is no overflow error on the platform running this test. supported_signed_int_types = list( set(_array_signed_int_typecode_ctype_mappings.keys()).intersection( set(_array_type_mappings.keys()))) for t in supported_signed_int_types: ctype = _array_signed_int_typecode_ctype_mappings[t] max_val = 2**(ctypes.sizeof(ctype) * 8 - 1) assert_collect_success(t, max_val - 1, get_int_data_type(ctypes.sizeof(ctype) * 8)) assert_collect_success(t, -max_val, get_int_data_type(ctypes.sizeof(ctype) * 8)) # supported unsigned int types # # JVM does not have unsigned types. We need to be very careful to make # sure that there is no overflow error. supported_unsigned_int_types = list( set(_array_unsigned_int_typecode_ctype_mappings.keys()). intersection(set(_array_type_mappings.keys()))) for t in supported_unsigned_int_types: ctype = _array_unsigned_int_typecode_ctype_mappings[t] max_val = 2**(ctypes.sizeof(ctype) * 8 - 1) assert_collect_success( t, max_val, get_int_data_type(ctypes.sizeof(ctype) * 8 + 1)) # all supported types # # Make sure the types tested above: # 1. are all supported types # 2. cover all supported types supported_types = (supported_string_types + supported_fractional_types + supported_signed_int_types + supported_unsigned_int_types) self.assertEqual(set(supported_types), set(_array_type_mappings.keys())) # all unsupported types # # Keys in _array_type_mappings is a complete list of all supported types, # and types not in _array_type_mappings are considered unsupported. all_types = set(array.typecodes) unsupported_types = all_types - set(supported_types) # test unsupported types for t in unsupported_types: with self.assertRaises(TypeError): _infer_schema_from_data([Row(myarray=array.array(t))])