def test_max_len_string_array(self): arr = a = np.array(["foo", "b", np.nan], dtype="object") assert libwriters.max_len_string_array(arr) == 3 # unicode arr = a.astype("U").astype(object) assert libwriters.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype("S").astype(object) assert libwriters.max_len_string_array(arr) == 3 # raises with pytest.raises(TypeError): libwriters.max_len_string_array(arr.astype("U"))
def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') assert libwriters.max_len_string_array(arr) == 3 # unicode arr = a.astype('U').astype(object) assert libwriters.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype('S').astype(object) assert libwriters.max_len_string_array(arr) == 3 # raises with pytest.raises(TypeError): libwriters.max_len_string_array(arr.astype('U'))
def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') assert libwriters.max_len_string_array(arr) == 3 # unicode arr = a.astype('U').astype(object) assert libwriters.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype('S').astype(object) assert libwriters.max_len_string_array(arr) == 3 # raises pytest.raises(TypeError, lambda: libwriters.max_len_string_array(arr.astype('U')))
def _convert_types(self, a): """ Converts object arrays of strings to numpy string arrays """ # No conversion for scalar type if a.dtype != 'object': return a, None # We can't infer the type of an empty array, so just # assume strings if len(a) == 0: return a.astype('U1'), None # Compute a mask of missing values. Replace NaNs and Nones with # empty strings so that type inference has a chance. mask = pd.isnull(a) if mask.sum() > 0: a = a.copy() np.putmask(a, mask, '') else: mask = None if infer_dtype(a, skipna=False) == 'mixed': # assume its a string, otherwise raise an error try: a = np.array([s.encode('ascii') for s in a]) a = a.astype('O') except: raise ValueError( "Column of type 'mixed' cannot be converted to string") type_ = infer_dtype(a, skipna=False) if type_ in ['unicode', 'string']: max_len = max_len_string_array(a) return a.astype('U{:d}'.format(max_len)), mask else: raise ValueError('Cannot store arrays with {} dtype'.format(type_))
def _convert_types(self, a): """ Converts object arrays of strings to numpy string arrays """ # No conversion for scalar type if a.dtype != 'object': return a, None # We can't infer the type of an empty array, so just # assume strings if len(a) == 0: return a.astype('U1'), None # Compute a mask of missing values. Replace NaNs and Nones with # empty strings so that type inference has a chance. mask = pd.isnull(a) if mask.sum() > 0: a = a.copy() np.putmask(a, mask, '') else: mask = None if infer_dtype(a, skipna=False) == 'mixed': # assume its a string, otherwise raise an error try: a = np.array([s.encode('ascii') for s in a]) a = a.astype('O') except: raise ValueError("Column of type 'mixed' cannot be converted to string") type_ = infer_dtype(a, skipna=False) if type_ in ['unicode', 'string']: max_len = max_len_string_array(a) return a.astype('U{:d}'.format(max_len)), mask else: raise ValueError('Cannot store arrays with {} dtype'.format(type_))