class Block(PandasObject): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas data structure Index-ignorant; let the container take care of that """ __slots__ = ['_mgr_locs', 'values', 'ndim'] is_numeric = False is_float = False is_integer = False is_complex = False is_datetime = False is_timedelta = False is_bool = False is_object = False is_categorical = False is_sparse = False _can_hold_na = False _downcast_dtype = None _can_consolidate = True _verify_integrity = True _validate_ndim = True _ftype = 'dense' _holder = None def __init__(self, values, placement, ndim=None, fastpath=False): if ndim is None: ndim = values.ndim elif values.ndim != ndim: raise ValueError('Wrong number of dimensions') self.ndim = ndim self.mgr_locs = placement self.values = values if len(self.mgr_locs) != len(self.values): raise ValueError('Wrong number of items passed %d,' ' placement implies %d' % ( len(self.values), len(self.mgr_locs))) @property def _consolidate_key(self): return (self._can_consolidate, self.dtype.name) @property def _is_single_block(self): return self.ndim == 1 @property def is_view(self): """ return a boolean if I am possibly a view """ return self.values.base is not None @property def is_datelike(self): """ return True if I am a non-datelike """ return self.is_datetime or self.is_timedelta def is_categorical_astype(self, dtype): """ validate that we have a astypeable to categorical, returns a boolean if we are a categorical """ if com.is_categorical_dtype(dtype): if dtype == com.CategoricalDtype(): return True # this is a pd.Categorical, but is not # a valid type for astypeing raise TypeError("invalid type {0} for astype".format(dtype)) return False def to_dense(self): return self.values.view() @property def fill_value(self): return np.nan @property def mgr_locs(self): return self._mgr_locs @property def array_dtype(self): """ the dtype to return if I want to construct this block as an array """ return self.dtype def make_block_same_class(self, values, placement, copy=False, fastpath=True, **kwargs): """ Wrap given values in a block of same type as self. `kwargs` are used in SparseBlock override. """ if copy: values = values.copy() return make_block(values, placement, klass=self.__class__, fastpath=fastpath, **kwargs) @mgr_locs.setter def mgr_locs(self, new_mgr_locs): if not isinstance(new_mgr_locs, BlockPlacement): new_mgr_locs = BlockPlacement(new_mgr_locs) self._mgr_locs = new_mgr_locs def __unicode__(self): # don't want to print out all of the items here name = com.pprint_thing(self.__class__.__name__) if self._is_single_block: result = '%s: %s dtype: %s' % ( name, len(self), self.dtype) else: shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) result = '%s: %s, %s, dtype: %s' % ( name, com.pprint_thing(self.mgr_locs.indexer), shape, self.dtype) return result def __len__(self): return len(self.values) def __getstate__(self): return self.mgr_locs.indexer, self.values def __setstate__(self, state): self.mgr_locs = BlockPlacement(state[0]) self.values = state[1] self.ndim = self.values.ndim def _slice(self, slicer): """ return a slice of my values """ return self.values[slicer] def reshape_nd(self, labels, shape, ref_items): """ Parameters ---------- labels : list of new axis labels shape : new shape ref_items : new ref_items return a new block that is transformed to a nd block """ return _block2d_to_blocknd( values=self.get_values().T, placement=self.mgr_locs, shape=shape, labels=labels, ref_items=ref_items) def getitem_block(self, slicer, new_mgr_locs=None): """ Perform __getitem__-like, return result as block. As of now, only supports slices that preserve dimensionality. """ if new_mgr_locs is None: if isinstance(slicer, tuple): axis0_slicer = slicer[0] else: axis0_slicer = slicer new_mgr_locs = self.mgr_locs[axis0_slicer] new_values = self._slice(slicer) if self._validate_ndim and new_values.ndim != self.ndim: raise ValueError("Only same dim slicing is allowed") return self.make_block_same_class(new_values, new_mgr_locs) @property def shape(self): return self.values.shape @property def itemsize(self): return self.values.itemsize @property def dtype(self): return self.values.dtype @property def ftype(self): return "%s:%s" % (self.dtype, self._ftype) def merge(self, other): return _merge_blocks([self, other]) def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None, mask_info=None): """ Reindex using pre-computed indexer information """ if axis < 1: raise AssertionError('axis must be at least 1, got %d' % axis) if fill_value is None: fill_value = self.fill_value new_values = com.take_nd(self.values, indexer, axis, fill_value=fill_value, mask_info=mask_info) return make_block(new_values, ndim=self.ndim, fastpath=True, placement=self.mgr_locs) def get(self, item): loc = self.items.get_loc(item) return self.values[loc] def iget(self, i): return self.values[i] def set(self, locs, values, check=False): """ Modify Block in-place with new item value Returns ------- None """ self.values[locs] = values def delete(self, loc): """ Delete given loc(-s) from block in-place. """ self.values = np.delete(self.values, loc, 0) self.mgr_locs = self.mgr_locs.delete(loc) def apply(self, func, **kwargs): """ apply the function to my values; return a block if we are not one """ result = func(self.values, **kwargs) if not isinstance(result, Block): result = make_block(values=_block_shape(result), placement=self.mgr_locs,) return result def fillna(self, value, limit=None, inplace=False, downcast=None): if not self._can_hold_na: if inplace: return [self] else: return [self.copy()] mask = isnull(self.values) if limit is not None: if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") mask[mask.cumsum(self.ndim-1) > limit] = False value = self._try_fill(value) blocks = self.putmask(mask, value, inplace=inplace) return self._maybe_downcast(blocks, downcast) def _maybe_downcast(self, blocks, downcast=None): # no need to downcast our float # unless indicated if downcast is None and self.is_float: return blocks elif downcast is None and (self.is_timedelta or self.is_datetime): return blocks result_blocks = [] for b in blocks: result_blocks.extend(b.downcast(downcast)) return result_blocks def downcast(self, dtypes=None): """ try to downcast each item to the dict of dtypes if present """ # turn it off completely if dtypes is False: return [self] values = self.values # single block handling if self._is_single_block: # try to cast all non-floats here if dtypes is None: dtypes = 'infer' nv = _possibly_downcast_to_dtype(values, dtypes) return [make_block(nv, ndim=self.ndim, fastpath=True, placement=self.mgr_locs)] # ndim > 1 if dtypes is None: return [self] if not (dtypes == 'infer' or isinstance(dtypes, dict)): raise ValueError("downcast must have a dictionary or 'infer' as " "its argument") # item-by-item # this is expensive as it splits the blocks items-by-item blocks = [] for i, rl in enumerate(self.mgr_locs): if dtypes == 'infer': dtype = 'infer' else: raise AssertionError("dtypes as dict is not supported yet") dtype = dtypes.get(item, self._downcast_dtype) if dtype is None: nv = _block_shape(values[i], ndim=self.ndim) else: nv = _possibly_downcast_to_dtype(values[i], dtype) nv = _block_shape(nv, ndim=self.ndim) blocks.append(make_block(nv, ndim=self.ndim, fastpath=True, placement=[rl])) return blocks def astype(self, dtype, copy=False, raise_on_error=True, values=None, **kwargs): return self._astype(dtype, copy=copy, raise_on_error=raise_on_error, values=values, **kwargs) def _astype(self, dtype, copy=False, raise_on_error=True, values=None, klass=None, **kwargs): """ Coerce to the new type (if copy=True, return a new copy) raise on an except if raise == True """ # may need to convert to categorical # this is only called for non-categoricals if self.is_categorical_astype(dtype): return make_block(Categorical(self.values, **kwargs), ndim=self.ndim, placement=self.mgr_locs) # astype processing dtype = np.dtype(dtype) if self.dtype == dtype: if copy: return self.copy() return self if klass is None: if dtype == np.object_: klass = ObjectBlock try: # force the copy here if values is None: # _astype_nansafe works fine with 1-d only values = com._astype_nansafe(self.values.ravel(), dtype, copy=True) values = values.reshape(self.values.shape) newb = make_block(values, ndim=self.ndim, placement=self.mgr_locs, fastpath=True, dtype=dtype, klass=klass) except: if raise_on_error is True: raise newb = self.copy() if copy else self if newb.is_numeric and self.is_numeric: if newb.shape != self.shape: raise TypeError("cannot set astype for copy = [%s] for dtype " "(%s [%s]) with smaller itemsize that current " "(%s [%s])" % (copy, self.dtype.name, self.itemsize, newb.dtype.name, newb.itemsize)) return newb def convert(self, copy=True, **kwargs): """ attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock here! """ return [self.copy()] if copy else [self] def _can_hold_element(self, value): raise NotImplementedError() def _try_cast(self, value): raise NotImplementedError() def _try_cast_result(self, result, dtype=None): """ try to cast the result to our original type, we may have roundtripped thru object in the mean-time """ if dtype is None: dtype = self.dtype if self.is_integer or self.is_bool or self.is_datetime: pass elif self.is_float and result.dtype == self.dtype: # protect against a bool/object showing up here if isinstance(dtype, compat.string_types) and dtype == 'infer': return result if not isinstance(dtype, type): dtype = dtype.type if issubclass(dtype, (np.bool_, np.object_)): if issubclass(dtype, np.bool_): if isnull(result).all(): return result.astype(np.bool_) else: result = result.astype(np.object_) result[result == 1] = True result[result == 0] = False return result else: return result.astype(np.object_) return result # may need to change the dtype here return _possibly_downcast_to_dtype(result, dtype) def _try_operate(self, values): """ return a version to operate on as the input """ return values def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ return values, other def _try_coerce_result(self, result): """ reverse of try_coerce_args """ return result def _try_coerce_and_cast_result(self, result, dtype=None): result = self._try_coerce_result(result) result = self._try_cast_result(result, dtype=dtype) return result def _try_fill(self, value): return value def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values if slicer is not None: values = values[:, slicer] mask = isnull(values) if not self.is_object and not quoting: values = values.astype(str) else: values = np.array(values, dtype='object') values[mask] = na_rep return values # block actions #### def copy(self, deep=True): values = self.values if deep: values = values.copy() return make_block(values, ndim=self.ndim, klass=self.__class__, fastpath=True, placement=self.mgr_locs) def replace(self, to_replace, value, inplace=False, filter=None, regex=False): """ replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. It is used in ObjectBlocks. It is here for API compatibility.""" mask = com.mask_missing(self.values, to_replace) if filter is not None: filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False if not mask.any(): if inplace: return [self] return [self.copy()] return self.putmask(mask, value, inplace=inplace) def setitem(self, indexer, value): """ set the value inplace; return a new block (of a possibly different dtype) indexer is a direct slice/positional indexer; value must be a compatible shape """ # coerce None values, if appropriate if value is None: if self.is_numeric: value = np.nan # coerce args values, value = self._try_coerce_args(self.values, value) arr_value = np.array(value) # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): dtype, _ = com._maybe_promote(arr_value.dtype) values = values.astype(dtype) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) values = transf(values) l = len(values) # length checking # boolean with truth values == len of the value is ok too if isinstance(indexer, (np.ndarray, list)): if is_list_like(value) and len(indexer) != len(value): if not (isinstance(indexer, np.ndarray) and indexer.dtype == np.bool_ and len(indexer[indexer]) == len(value)): raise ValueError("cannot set using a list-like indexer " "with a different length than the value") # slice elif isinstance(indexer, slice): if is_list_like(value) and l: if len(value) != length_of_indexer(indexer, values): raise ValueError("cannot set using a slice indexer with a " "different length than the value") try: def _is_scalar_indexer(indexer): # return True if we are all scalar indexers if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) return all([ np.isscalar(idx) for idx in indexer ]) return False def _is_empty_indexer(indexer): # return a boolean if we have an empty indexer if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) return False # empty indexers # 8669 (empty) if _is_empty_indexer(indexer): pass # setting a single element for each dim and with a rhs that could be say a list # GH 6043 elif _is_scalar_indexer(indexer): values[indexer] = value # if we are an exact match (ex-broadcasting), # then use the resultant dtype elif len(arr_value.shape) and arr_value.shape[0] == values.shape[0] and np.prod(arr_value.shape) == np.prod(values.shape): values[indexer] = value values = values.astype(arr_value.dtype) # set else: values[indexer] = value # coerce and try to infer the dtypes of the result if np.isscalar(value): dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' values = self._try_coerce_and_cast_result(values, dtype) block = make_block(transf(values), ndim=self.ndim, placement=self.mgr_locs, fastpath=True) # may have to soft convert_objects here if block.is_object and not self.is_object: block = block.convert(numeric=False) return block except (ValueError, TypeError) as detail: raise except Exception as detail: pass return [self] def putmask(self, mask, new, align=True, inplace=False): """ putmask the data to the block; it is possible that we may create a new dtype of block return the resulting block(s) Parameters ---------- mask : the condition to respect new : a ndarray/object align : boolean, perform alignment on other/cond, default is True inplace : perform inplace modification, default is False Returns ------- a new block(s), the result of the putmask """ new_values = self.values if inplace else self.values.copy() # may need to align the new if hasattr(new, 'reindex_axis'): new = new.values.T # may need to align the mask if hasattr(mask, 'reindex_axis'): mask = mask.values.T # if we are passed a scalar None, convert it here if not is_list_like(new) and isnull(new) and not self.is_object: new = self.fill_value if self._can_hold_element(new): new = self._try_cast(new) # pseudo-broadcast if isinstance(new, np.ndarray) and new.ndim == self.ndim - 1: new = np.repeat(new, self.shape[-1]).reshape(self.shape) np.putmask(new_values, mask, new) # maybe upcast me elif mask.any(): # need to go column by column new_blocks = [] if self.ndim > 1: for i, ref_loc in enumerate(self.mgr_locs): m = mask[i] v = new_values[i] # need a new block if m.any(): n = new[i] if isinstance( new, np.ndarray) else np.array(new) # type of the new block dtype, _ = com._maybe_promote(n.dtype) # we need to exiplicty astype here to make a copy n = n.astype(dtype) nv = _putmask_smart(v, m, n) else: nv = v if inplace else v.copy() # Put back the dimension that was taken from it and make # a block out of the result. block = make_block(values=nv[np.newaxis], placement=[ref_loc], fastpath=True) new_blocks.append(block) else: nv = _putmask_smart(new_values, mask, new) new_blocks.append(make_block(values=nv, placement=self.mgr_locs, fastpath=True)) return new_blocks if inplace: return [self] return [make_block(new_values, placement=self.mgr_locs, fastpath=True)] def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, fill_value=None, coerce=False, downcast=None, **kwargs): def check_int_bool(self, inplace): # Only FloatBlocks will contain NaNs. # timedelta subclasses IntBlock if (self.is_bool or self.is_integer) and not self.is_timedelta: if inplace: return self else: return self.copy() # a fill na type method try: m = com._clean_fill_method(method) except: m = None if m is not None: r = check_int_bool(self, inplace) if r is not None: return r return self._interpolate_with_fill(method=m, axis=axis, inplace=inplace, limit=limit, fill_value=fill_value, coerce=coerce, downcast=downcast) # try an interp method try: m = com._clean_interp_method(method, **kwargs) except: m = None if m is not None: r = check_int_bool(self, inplace) if r is not None: return r return self._interpolate(method=m, index=index, values=values, axis=axis, limit=limit, fill_value=fill_value, inplace=inplace, downcast=downcast, **kwargs) raise ValueError("invalid method '{0}' to interpolate.".format(method)) def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, coerce=False, downcast=None): """ fillna but using the interpolate machinery """ # if we are coercing, then don't force the conversion # if the block can't hold the type if coerce: if not self._can_hold_na: if inplace: return [self] else: return [self.copy()] fill_value = self._try_fill(fill_value) values = self.values if inplace else self.values.copy() values = self._try_operate(values) values = com.interpolate_2d(values, method=method, axis=axis, limit=limit, fill_value=fill_value, dtype=self.dtype) values = self._try_coerce_result(values) blocks = [make_block(values, ndim=self.ndim, klass=self.__class__, fastpath=True, placement=self.mgr_locs)] return self._maybe_downcast(blocks, downcast) def _interpolate(self, method=None, index=None, values=None, fill_value=None, axis=0, limit=None, inplace=False, downcast=None, **kwargs): """ interpolate using scipy wrappers """ data = self.values if inplace else self.values.copy() # only deal with floats if not self.is_float: if not self.is_integer: return self data = data.astype(np.float64) if fill_value is None: fill_value = self.fill_value if method in ('krogh', 'piecewise_polynomial', 'pchip'): if not index.is_monotonic: raise ValueError("{0} interpolation requires that the " "index be monotonic.".format(method)) # process 1-d slices in the axis direction def func(x): # process a 1-d slice, returning it # should the axis argument be handled below in apply_along_axis? # i.e. not an arg to com.interpolate_1d return com.interpolate_1d(index, x, method=method, limit=limit, fill_value=fill_value, bounds_error=False, **kwargs) # interp each column independently interp_values = np.apply_along_axis(func, axis, data) blocks = [make_block(interp_values, ndim=self.ndim, klass=self.__class__, fastpath=True, placement=self.mgr_locs)] return self._maybe_downcast(blocks, downcast) def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): """ Take values according to indexer and return them as a block.bb """ if fill_tuple is None: fill_value = self.fill_value new_values = com.take_nd(self.get_values(), indexer, axis=axis, allow_fill=False) else: fill_value = fill_tuple[0] new_values = com.take_nd(self.get_values(), indexer, axis=axis, allow_fill=True, fill_value=fill_value) if new_mgr_locs is None: if axis == 0: slc = lib.indexer_as_slice(indexer) if slc is not None: new_mgr_locs = self.mgr_locs[slc]
def __setstate__(self, state): self.mgr_locs = BlockPlacement(state[0]) self.values = state[1] self.ndim = self.values.ndim