def create_single_mgr(typestr, num_rows=None): if num_rows is None: num_rows = N return SingleBlockManager( create_block(typestr, placement=slice(0, num_rows), item_shape=()), np.arange(num_rows), )
def test_concat_series(): # GH17728 values = np.arange(3, dtype="int64") block = CustomBlock(values, placement=slice(0, 3)) mgr = SingleBlockManager(block, pd.RangeIndex(3)) s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True) res = pd.concat([s, s]) assert isinstance(res._data.blocks[0], CustomBlock)
def __new__(cls, data=None, index=None, crs=None, **kwargs): # we need to use __new__ because we want to return Series instance # instead of GeoSeries instance in case of non-geometry data if isinstance(data, SingleBlockManager): if isinstance(data.blocks[0].dtype, GeometryDtype): if not PANDAS_GE_024 and (data.blocks[0].ndim == 2): # bug in pandas 0.23 where in certain indexing operations # (such as .loc) a 2D ExtensionBlock (still with 1D values # is created) which results in other failures from pandas.core.internals import ExtensionBlock values = data.blocks[0].values block = ExtensionBlock(values, slice(0, len(values), 1)) data = SingleBlockManager([block], data.axes[0], fastpath=True) self = super(GeoSeries, cls).__new__(cls) super(GeoSeries, self).__init__(data, index=index, **kwargs) self.crs = crs return self return Series(data, index=index, **kwargs) if isinstance(data, BaseGeometry): # fix problem for scalar geometries passed, ensure the list of # scalars is of correct length if index is specified n = len(index) if index is not None else 1 data = [data] * n name = kwargs.pop('name', None) if not is_geometry_type(data): # if data is None and dtype is specified (eg from empty overlay # test), specifying dtype raises an error: # https://github.com/pandas-dev/pandas/issues/26469 kwargs.pop('dtype', None) # Use Series constructor to handle input data s = pd.Series(data, index=index, name=name, **kwargs) # prevent trying to convert non-geometry objects if s.dtype != object: if s.empty: s = s.astype(object) else: return s # try to convert to GeometryArray, if fails return plain Series try: data = from_shapely(s.values) except TypeError: return s index = s.index name = s.name self = super(GeoSeries, cls).__new__(cls) super(GeoSeries, self).__init__(data, index=index, name=name, **kwargs) self.crs = crs self._invalidate_sindex() return self
def _set_value(self, label, value, takeable=False): values = self.to_dense() # if the label doesn't exist, we will create a new object here # and possibly change the index new_values = values._set_value(label, value, takeable=takeable) if new_values is not None: values = new_values new_index = values.index values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) self._data = SingleBlockManager(values, new_index) self._index = new_index
def _set_values(self, key, value): # this might be inefficient as we have to recreate the sparse array # rather than setting individual elements, but have to convert # the passed slice/boolean that's in dense space into a sparse indexer # not sure how to do that! if isinstance(key, Series): key = key.values values = self.values.to_dense() values[key] = libindex.convert_scalar(values, value) values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) self._data = SingleBlockManager(values, self.index)
def test_custom_repr(): values = np.arange(3, dtype='int64') # series block = CustomBlock(values, placement=slice(0, 3)) s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3))) assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64' # dataframe block = CustomBlock(values, placement=slice(0, 1)) blk_mgr = BlockManager([block], [['col'], range(3)]) df = pd.DataFrame(blk_mgr) assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2'
def __getitem__(self, key): if _HAS_EXTENSION_ARRAY: return super(GeoSeries, self).__getitem__(key) else: if isinstance(key, (slice, list, Series, np.ndarray)): block = self._data._block._getitem(key) index = self.index[key] return GeoSeries(SingleBlockManager(block, axis=index), crs=self.crs, index=index) try: if key in self.index: loc = self.index.get_loc(key) return self._geometry_array[loc] except TypeError: pass raise KeyError(key)
def sparse_reindex(self, new_index): """ Conform sparse values to new SparseIndex Parameters ---------- new_index : {BlockIndex, IntIndex} Returns ------- reindexed : SparseSeries """ if not isinstance(new_index, splib.SparseIndex): raise TypeError('new index must be a SparseIndex') block = self.block.sparse_reindex(new_index) new_data = SingleBlockManager(block, self.index) return self._constructor(new_data, index=self.index, sparse_index=new_index, fill_value=self.fill_value).__finalize__(self)
def sparse_reindex(self, new_index): """ Conform sparse values to new SparseIndex Parameters ---------- new_index : {BlockIndex, IntIndex} Returns ------- reindexed : SparseSeries """ if not (isinstance(new_index, splib.SparseIndex)): raise AssertionError() block = self.block.sparse_reindex(new_index) new_data = SingleBlockManager(block, block.ref_items) return self._constructor(new_data, index=self.index, sparse_index=new_index, fill_value=self.fill_value)
def _set_value(self, label, value, takeable=False): """ Quickly set single value at passed label. If label is not contained, a new object is created with the label placed at the end of the result index .. deprecated:: 0.21.0 Please use .at[] or .iat[] accessors. Parameters ---------- label : object Partial indexing with MultiIndex not allowed value : object Scalar value takeable : interpret the index as indexers, default False Notes ----- This method *always* returns a new object. It is not particularly efficient but is provided for API compatibility with Series Returns ------- series : SparseSeries """ values = self.to_dense() # if the label doesn't exist, we will create a new object here # and possibly change the index new_values = values._set_value(label, value, takeable=takeable) if new_values is not None: values = new_values new_index = values.index values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) self._data = SingleBlockManager(values, new_index) self._index = new_index
def _unpickle_series_compat(self, state): nd_state, own_state = state # recreate the ndarray data = np.empty(nd_state[1], dtype=nd_state[2]) np.ndarray.__setstate__(data, nd_state) index, fill_value, sp_index = own_state[:3] name = None if len(own_state) > 3: name = own_state[3] # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sp_index, fill_value=fill_value, copy=False) # recreate data = SingleBlockManager(data, index, fastpath=True) generic.NDFrame.__init__(self, data) self._set_axis(0, index) self.name = name
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = np.nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index.view() elif index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): if index is None: index = data.index.view() data = Series(data) data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() else: data = data.reindex(index, copy=False) else: length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def test_single_block_manager_fastpath_deprecated(): # GH#33092 ser = Series(range(3)) blk = ser._data.blocks[0] with tm.assert_produces_warning(FutureWarning): SingleBlockManager(blk, ser.index, fastpath=True)
def __new__(cls, data=None, index=None, crs=None, **kwargs): # we need to use __new__ because we want to return Series instance # instead of GeoSeries instance in case of non-geometry data if hasattr(data, "crs") and crs: if not data.crs: # make a copy to avoid setting CRS to passed GeometryArray data = data.copy() else: if not data.crs == crs: warnings.warn( "CRS mismatch between CRS of the passed geometries " "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " "allow_override=True)' to overwrite CRS or " "'GeoSeries.to_crs(crs)' to reproject geometries. " "CRS mismatch will raise an error in the future versions " "of GeoPandas.", FutureWarning, stacklevel=2, ) # TODO: raise error in 0.9 or 0.10. if isinstance(data, SingleBlockManager): if isinstance(data.blocks[0].dtype, GeometryDtype): if data.blocks[0].ndim == 2: # bug in pandas 0.23 where in certain indexing operations # (such as .loc) a 2D ExtensionBlock (still with 1D values # is created) which results in other failures # bug in pandas <= 0.25.0 when len(values) == 1 # (https://github.com/pandas-dev/pandas/issues/27785) from pandas.core.internals import ExtensionBlock values = data.blocks[0].values block = ExtensionBlock(values, slice(0, len(values), 1), ndim=1) data = SingleBlockManager([block], data.axes[0], fastpath=True) self = super(GeoSeries, cls).__new__(cls) super(GeoSeries, self).__init__(data, index=index, **kwargs) self.crs = getattr(self.values, "crs", crs) return self warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) return Series(data, index=index, **kwargs) if isinstance(data, BaseGeometry): # fix problem for scalar geometries passed, ensure the list of # scalars is of correct length if index is specified n = len(index) if index is not None else 1 data = [data] * n name = kwargs.pop("name", None) if not is_geometry_type(data): # if data is None and dtype is specified (eg from empty overlay # test), specifying dtype raises an error: # https://github.com/pandas-dev/pandas/issues/26469 kwargs.pop("dtype", None) # Use Series constructor to handle input data s = pd.Series(data, index=index, name=name, **kwargs) # prevent trying to convert non-geometry objects if s.dtype != object: if s.empty: s = s.astype(object) else: warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) return s # try to convert to GeometryArray, if fails return plain Series try: data = from_shapely(s.values, crs) except TypeError: warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) return s index = s.index name = s.name self = super(GeoSeries, cls).__new__(cls) super(GeoSeries, self).__init__(data, index=index, name=name, **kwargs) if not self.crs: self.crs = crs self._invalidate_sindex() return self
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name if isinstance(data, SparseArray): if index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() if fill_value is None: fill_value = data.fill_value # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): data = Series(data, index=index) index = data.index.view() res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() elif not data.index.equals(index) or copy: # pragma: no cover # GH#19275 SingleBlockManager input should only be called # internally raise AssertionError('Cannot pass both SingleBlockManager ' '`data` argument and a different ' '`index` argument. `copy` must ' 'be False.') else: length = len(index) if data == fill_value or (isna(data) and isna(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def __init__(self, data=None, index=None, crs=None, **kwargs): if hasattr(data, "crs") and crs: if not data.crs: # make a copy to avoid setting CRS to passed GeometryArray data = data.copy() else: if not data.crs == crs: raise ValueError( "CRS mismatch between CRS of the passed geometries " "and 'crs'. Use 'GeoSeries.set_crs(crs, " "allow_override=True)' to overwrite CRS or " "'GeoSeries.to_crs(crs)' to reproject geometries. ") if isinstance(data, SingleBlockManager): if isinstance(data.blocks[0].dtype, GeometryDtype): if data.blocks[0].ndim == 2: # bug in pandas 0.23 where in certain indexing operations # (such as .loc) a 2D ExtensionBlock (still with 1D values # is created) which results in other failures # bug in pandas <= 0.25.0 when len(values) == 1 # (https://github.com/pandas-dev/pandas/issues/27785) from pandas.core.internals import ExtensionBlock values = data.blocks[0].values block = ExtensionBlock(values, slice(0, len(values), 1), ndim=1) data = SingleBlockManager([block], data.axes[0], fastpath=True) else: raise TypeError( "Non geometry data passed to GeoSeries constructor, " f"received data of dtype '{data.blocks[0].dtype}'") if isinstance(data, BaseGeometry): # fix problem for scalar geometries passed, ensure the list of # scalars is of correct length if index is specified n = len(index) if index is not None else 1 data = [data] * n name = kwargs.pop("name", None) if not is_geometry_type(data): # if data is None and dtype is specified (eg from empty overlay # test), specifying dtype raises an error: # https://github.com/pandas-dev/pandas/issues/26469 kwargs.pop("dtype", None) # Use Series constructor to handle input data with compat.ignore_shapely2_warnings(): # suppress additional warning from pandas for empty data # (will always give object dtype instead of float dtype in the future, # making the `if s.empty: s = s.astype(object)` below unnecessary) empty_msg = "The default dtype for empty Series" warnings.filterwarnings("ignore", empty_msg, DeprecationWarning) warnings.filterwarnings("ignore", empty_msg, FutureWarning) s = pd.Series(data, index=index, name=name, **kwargs) # prevent trying to convert non-geometry objects if s.dtype != object: if (s.empty and s.dtype == "float64") or data is None: # pd.Series with empty data gives float64 for older pandas versions s = s.astype(object) else: raise TypeError( "Non geometry data passed to GeoSeries constructor, " f"received data of dtype '{s.dtype}'") # try to convert to GeometryArray, if fails return plain Series try: data = from_shapely(s.values, crs) except TypeError: raise TypeError( "Non geometry data passed to GeoSeries constructor, " f"received data of dtype '{s.dtype}'") index = s.index name = s.name super().__init__(data, index=index, name=name, **kwargs) if not self.crs: self.crs = crs