예제 #1
0
    def test_array_types(self):
        # This test need to make sure that the Scala type selected is at least
        # as large as the python's types. This is necessary because python's
        # array types depend on C implementation on the machine. Therefore there
        # is no machine independent correspondence between python's array types
        # and Scala types.
        # See: https://docs.python.org/2/library/array.html

        def assertCollectSuccess(typecode, value):
            row = Row(myarray=array.array(typecode, [value]))
            df = self.spark.createDataFrame([row])
            self.assertEqual(df.first()["myarray"][0], value)

        # supported string types
        #
        # String types in python's array are "u" for Py_UNICODE and "c" for char.
        # "u" will be removed in python 4, and "c" is not supported in python 3.
        supported_string_types = []
        if sys.version_info[0] < 4:
            supported_string_types += ['u']
            # test unicode
            assertCollectSuccess('u', u'a')
        if sys.version_info[0] < 3:
            supported_string_types += ['c']
            # test string
            assertCollectSuccess('c', 'a')

        # supported float and double
        #
        # Test max, min, and precision for float and double, assuming IEEE 754
        # floating-point format.
        supported_fractional_types = ['f', 'd']
        assertCollectSuccess('f', ctypes.c_float(1e+38).value)
        assertCollectSuccess('f', ctypes.c_float(1e-38).value)
        assertCollectSuccess('f', ctypes.c_float(1.123456).value)
        assertCollectSuccess('d', sys.float_info.max)
        assertCollectSuccess('d', sys.float_info.min)
        assertCollectSuccess('d', sys.float_info.epsilon)

        # supported signed int types
        #
        # The size of C types changes with implementation, we need to make sure
        # that there is no overflow error on the platform running this test.
        supported_signed_int_types = list(
            set(_array_signed_int_typecode_ctype_mappings.keys())
            .intersection(set(_array_type_mappings.keys())))
        for t in supported_signed_int_types:
            ctype = _array_signed_int_typecode_ctype_mappings[t]
            max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1)
            assertCollectSuccess(t, max_val - 1)
            assertCollectSuccess(t, -max_val)

        # supported unsigned int types
        #
        # JVM does not have unsigned types. We need to be very careful to make
        # sure that there is no overflow error.
        supported_unsigned_int_types = list(
            set(_array_unsigned_int_typecode_ctype_mappings.keys())
            .intersection(set(_array_type_mappings.keys())))
        for t in supported_unsigned_int_types:
            ctype = _array_unsigned_int_typecode_ctype_mappings[t]
            assertCollectSuccess(t, 2 ** (ctypes.sizeof(ctype) * 8) - 1)

        # all supported types
        #
        # Make sure the types tested above:
        # 1. are all supported types
        # 2. cover all supported types
        supported_types = (supported_string_types +
                           supported_fractional_types +
                           supported_signed_int_types +
                           supported_unsigned_int_types)
        self.assertEqual(set(supported_types), set(_array_type_mappings.keys()))

        # all unsupported types
        #
        # Keys in _array_type_mappings is a complete list of all supported types,
        # and types not in _array_type_mappings are considered unsupported.
        # `array.typecodes` are not supported in python 2.
        if sys.version_info[0] < 3:
            all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'])
        else:
            # PyPy seems not having array.typecodes.
            all_types = set(['b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'f', 'd'])
        unsupported_types = all_types - set(supported_types)
        # test unsupported types
        for t in unsupported_types:
            with self.assertRaises(TypeError):
                a = array.array(t)
                self.spark.createDataFrame([Row(myarray=a)]).collect()
예제 #2
0
    def test_array_types(self):
        # This test need to make sure that the Scala type selected is at least
        # as large as the python's types. This is necessary because python's
        # array types depend on C implementation on the machine. Therefore there
        # is no machine independent correspondence between python's array types
        # and Scala types.
        # See: https://docs.python.org/2/library/array.html

        def assertCollectSuccess(typecode, value):
            row = Row(myarray=array.array(typecode, [value]))
            df = self.spark.createDataFrame([row])
            self.assertEqual(df.first()["myarray"][0], value)

        # supported string types
        #
        # String types in python's array are "u" for Py_UNICODE and "c" for char.
        # "u" will be removed in python 4, and "c" is not supported in python 3.
        supported_string_types = []
        if sys.version_info[0] < 4:
            supported_string_types += ['u']
            # test unicode
            assertCollectSuccess('u', u'a')
        if sys.version_info[0] < 3:
            supported_string_types += ['c']
            # test string
            assertCollectSuccess('c', 'a')

        # supported float and double
        #
        # Test max, min, and precision for float and double, assuming IEEE 754
        # floating-point format.
        supported_fractional_types = ['f', 'd']
        assertCollectSuccess('f', ctypes.c_float(1e+38).value)
        assertCollectSuccess('f', ctypes.c_float(1e-38).value)
        assertCollectSuccess('f', ctypes.c_float(1.123456).value)
        assertCollectSuccess('d', sys.float_info.max)
        assertCollectSuccess('d', sys.float_info.min)
        assertCollectSuccess('d', sys.float_info.epsilon)

        # supported signed int types
        #
        # The size of C types changes with implementation, we need to make sure
        # that there is no overflow error on the platform running this test.
        supported_signed_int_types = list(
            set(_array_signed_int_typecode_ctype_mappings.keys())
            .intersection(set(_array_type_mappings.keys())))
        for t in supported_signed_int_types:
            ctype = _array_signed_int_typecode_ctype_mappings[t]
            max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1)
            assertCollectSuccess(t, max_val - 1)
            assertCollectSuccess(t, -max_val)

        # supported unsigned int types
        #
        # JVM does not have unsigned types. We need to be very careful to make
        # sure that there is no overflow error.
        supported_unsigned_int_types = list(
            set(_array_unsigned_int_typecode_ctype_mappings.keys())
            .intersection(set(_array_type_mappings.keys())))
        for t in supported_unsigned_int_types:
            ctype = _array_unsigned_int_typecode_ctype_mappings[t]
            assertCollectSuccess(t, 2 ** (ctypes.sizeof(ctype) * 8) - 1)

        # all supported types
        #
        # Make sure the types tested above:
        # 1. are all supported types
        # 2. cover all supported types
        supported_types = (supported_string_types +
                           supported_fractional_types +
                           supported_signed_int_types +
                           supported_unsigned_int_types)
        self.assertEqual(set(supported_types), set(_array_type_mappings.keys()))

        # all unsupported types
        #
        # Keys in _array_type_mappings is a complete list of all supported types,
        # and types not in _array_type_mappings are considered unsupported.
        # `array.typecodes` are not supported in python 2.
        if sys.version_info[0] < 3:
            all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'])
        else:
            all_types = set(array.typecodes)
        unsupported_types = all_types - set(supported_types)
        # test unsupported types
        for t in unsupported_types:
            with self.assertRaises(TypeError):
                a = array.array(t)
                self.spark.createDataFrame([Row(myarray=a)]).collect()