Exemple #1
0
    def test_max_len_string_array(self):

        arr = a = np.array(["foo", "b", np.nan], dtype="object")
        assert libwriters.max_len_string_array(arr) == 3

        # unicode
        arr = a.astype("U").astype(object)
        assert libwriters.max_len_string_array(arr) == 3

        # bytes for python3
        arr = a.astype("S").astype(object)
        assert libwriters.max_len_string_array(arr) == 3

        # raises
        with pytest.raises(TypeError):
            libwriters.max_len_string_array(arr.astype("U"))
Exemple #2
0
    def test_max_len_string_array(self):

        arr = a = np.array(['foo', 'b', np.nan], dtype='object')
        assert libwriters.max_len_string_array(arr) == 3

        # unicode
        arr = a.astype('U').astype(object)
        assert libwriters.max_len_string_array(arr) == 3

        # bytes for python3
        arr = a.astype('S').astype(object)
        assert libwriters.max_len_string_array(arr) == 3

        # raises
        with pytest.raises(TypeError):
            libwriters.max_len_string_array(arr.astype('U'))
Exemple #3
0
    def test_max_len_string_array(self):

        arr = a = np.array(['foo', 'b', np.nan], dtype='object')
        assert libwriters.max_len_string_array(arr) == 3

        # unicode
        arr = a.astype('U').astype(object)
        assert libwriters.max_len_string_array(arr) == 3

        # bytes for python3
        arr = a.astype('S').astype(object)
        assert libwriters.max_len_string_array(arr) == 3

        # raises
        pytest.raises(TypeError,
                      lambda: libwriters.max_len_string_array(arr.astype('U')))
Exemple #4
0
    def _convert_types(self, a):
        """
        Converts object arrays of strings to numpy string arrays
        """
        # No conversion for scalar type
        if a.dtype != 'object':
            return a, None

        # We can't infer the type of an empty array, so just
        # assume strings
        if len(a) == 0:
            return a.astype('U1'), None

        # Compute a mask of missing values. Replace NaNs and Nones with
        # empty strings so that type inference has a chance.
        mask = pd.isnull(a)
        if mask.sum() > 0:
            a = a.copy()
            np.putmask(a, mask, '')
        else:
            mask = None

        if infer_dtype(a, skipna=False) == 'mixed':
            # assume its a string, otherwise raise an error
            try:
                a = np.array([s.encode('ascii') for s in a])
                a = a.astype('O')
            except:
                raise ValueError(
                    "Column of type 'mixed' cannot be converted to string")

        type_ = infer_dtype(a, skipna=False)
        if type_ in ['unicode', 'string']:
            max_len = max_len_string_array(a)
            return a.astype('U{:d}'.format(max_len)), mask
        else:
            raise ValueError('Cannot store arrays with {} dtype'.format(type_))
Exemple #5
0
    def _convert_types(self, a):
        """
        Converts object arrays of strings to numpy string arrays
        """
        # No conversion for scalar type
        if a.dtype != 'object':
            return a, None

        # We can't infer the type of an empty array, so just
        # assume strings
        if len(a) == 0:
            return a.astype('U1'), None

        # Compute a mask of missing values. Replace NaNs and Nones with
        # empty strings so that type inference has a chance.
        mask = pd.isnull(a)
        if mask.sum() > 0:
            a = a.copy()
            np.putmask(a, mask, '')
        else:
            mask = None

        if infer_dtype(a, skipna=False) == 'mixed':
            # assume its a string, otherwise raise an error
            try:
                a = np.array([s.encode('ascii') for s in a])
                a = a.astype('O')
            except:
                raise ValueError("Column of type 'mixed' cannot be converted to string")

        type_ = infer_dtype(a, skipna=False)
        if type_ in ['unicode', 'string']:
            max_len = max_len_string_array(a)
            return a.astype('U{:d}'.format(max_len)), mask
        else:
            raise ValueError('Cannot store arrays with {} dtype'.format(type_))