def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # if calling index is category, don't check dtype of others try: codes = np.concatenate( [self._is_dtype_compat(c).codes for c in to_concat]) except TypeError: # not all to_concat elements are among our categories (or NA) from pandas.core.dtypes.concat import concat_compat res = concat_compat(to_concat) return Index(res, name=name) else: cat = self._data._from_backing_data(codes) return type(self)._simple_new(cat, name=name)
def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT: # Caller is responsible for ensuring self and other are non-empty # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other elif sort is False: # TDIs are not in the "correct" order and we don't want # to sort but want to remove overlaps left, right = self, other left_start = left[0] loc = right.searchsorted(left_start, side="left") right_chunk = right._values[:loc] dates = concat_compat((left._values, right_chunk)) # With sort being False, we can't infer that result.freq == self.freq # TODO: no tests rely on the _with_freq("infer"); needed? result = type(self)._simple_new(dates, name=self.name) result = result._with_freq("infer") return result else: left, right = other, self left_end = left[-1] right_end = right[-1] # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right._values[loc:] dates = concat_compat([left._values, right_chunk]) # The can_fast_union check ensures that the result.freq # should match self.freq dates = type(self._data)(dates, freq=self.freq) result = type(self)._simple_new(dates) return result else: return left
def _fast_union(self, other, sort=None): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other elif sort is False: # TDIs are not in the "correct" order and we don't want # to sort but want to remove overlaps left, right = self, other left_start = left[0] loc = right.searchsorted(left_start, side="left") right_chunk = right._values[:loc] dates = concat_compat((left._values, right_chunk)) # TODO: can we infer that it has self.freq? result = self._shallow_copy(dates)._with_freq("infer") return result else: left, right = other, self left_end = left[-1] right_end = right[-1] # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right._values[loc:] dates = concat_compat([left._values, right_chunk]) # TODO: can we infer that it has self.freq? result = self._shallow_copy(dates)._with_freq("infer") return result else: return left
def concatenate_array_managers(mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool) -> Manager: """ Concatenate array managers into one. Parameters ---------- mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- ArrayManager """ # reindex all arrays mgrs = [] for mgr, indexers in mgrs_indexers: for ax, indexer in indexers.items(): mgr = mgr.reindex_indexer(axes[ax], indexer, axis=ax, allow_dups=True) mgrs.append(mgr) if concat_axis == 1: # concatting along the rows -> concat the reindexed arrays # TODO(ArrayManager) doesn't yet preserve the correct dtype arrays = [ concat_compat([mgrs[i].arrays[j] for i in range(len(mgrs))]) for j in range(len(mgrs[0].arrays)) ] return ArrayManager(arrays, [axes[1], axes[0]], do_integrity_check=False) else: # concatting along the columns -> combine reindexed arrays in a single manager assert concat_axis == 0 arrays = list( itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) return ArrayManager(arrays, [axes[1], axes[0]], do_integrity_check=False)
def test_concat_periodarray_2d(): pi = pd.period_range("2016-01-01", periods=36, freq="D") arr = pi._data.reshape(6, 6) result = _concat.concat_compat([arr[:2], arr[2:]], axis=0) tm.assert_period_array_equal(result, arr) result = _concat.concat_compat([arr[:, :2], arr[:, 2:]], axis=1) tm.assert_period_array_equal(result, arr) msg = "all the input array dimensions for the concatenation axis must match exactly" with pytest.raises(ValueError, match=msg): _concat.concat_compat([arr[:, :2], arr[:, 2:]], axis=0) with pytest.raises(ValueError, match=msg): _concat.concat_compat([arr[:2], arr[2:]], axis=1)
def _fast_union(self, other): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self left_end = left[-1] right_end = right[-1] # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) else: return left
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: """ Reshape long-format data to wide. Generalized inverse of DataFrame.pivot Parameters ---------- data : DataFrame groups : dict {new_name : list_of_columns} dropna : boolean, default True Examples -------- >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], ... 'team': ['Red Sox', 'Yankees'], ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) team year hr 0 Red Sox 2007 514 1 Yankees 2007 573 2 Red Sox 2008 545 3 Yankees 2008 526 Returns ------- reshaped : DataFrame """ if isinstance(groups, dict): keys = list(groups.keys()) values = list(groups.values()) else: keys, values = zip(*groups) all_cols = list(set.union(*[set(x) for x in values])) id_cols = list(data.columns.difference(all_cols)) K = len(values[0]) for seq in values: if len(seq) != K: raise ValueError("All column lists must be same length") mdata = {} pivot_cols = [] for target, names in zip(keys, values): to_concat = [data[col].values for col in names] mdata[target] = concat_compat(to_concat) pivot_cols.append(target) for col in id_cols: mdata[col] = np.tile(data[col].values, K) if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) for c in pivot_cols: mask &= notna(mdata[c]) if not mask.all(): mdata = {k: v[mask] for k, v in mdata.items()} return data._constructor(mdata, columns=id_cols + pivot_cols)
def concatenate_managers( mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool ) -> Manager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) concat_plans = [ _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans, concat_axis) blocks = [] for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block if len(join_units) == 1 and not join_units[0].indexers: values = blk.values if copy: values = values.copy() else: values = values.view() fastpath = True elif _is_uniform_join_units(join_units): vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=blk.ndim - 1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals, axis=1) values = ensure_block_shape(values, blk.ndim) values = ensure_wrapped_if_datetimelike(values) fastpath = blk.values.dtype == values.dtype else: values = _concatenate_join_units(join_units, concat_axis, copy=copy) fastpath = False if fastpath: b = blk.make_block_same_class(values, placement=placement) else: b = new_block(values, placement=placement, ndim=len(axes)) blocks.append(b) return BlockManager(tuple(blocks), axes)
def get_result(self): cons: Type[FrameOrSeriesUnion] sample: FrameOrSeriesUnion # series only if self._is_series: sample = cast("Series", self.objs[0]) # stack blocks if self.bm_axis == 0: name = com.consensus_name_attr(self.objs) cons = sample._constructor arrs = [ser._values for ser in self.objs] res = concat_compat(arrs, axis=0) result = cons(res, index=self.new_axes[0], name=name, dtype=res.dtype) return result.__finalize__(self, method="concat") # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) # GH28330 Preserves subclassed objects through concat cons = sample._constructor_expanddim index, columns = self.new_axes df = cons(data, index=index) df.columns = columns return df.__finalize__(self, method="concat") # combine block managers else: sample = cast("DataFrame", self.objs[0]) mgrs_indexers = [] for obj in self.objs: indexers = {} for ax, new_labels in enumerate(self.new_axes): # ::-1 to convert BlockManager ax to DataFrame ax if ax == self.bm_axis: # Suppress reindexing on concat axis continue # 1-ax to convert BlockManager axis to DataFrame axis obj_labels = obj.axes[1 - ax] if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.get_indexer(new_labels) mgrs_indexers.append((obj._mgr, indexers)) new_data = concatenate_block_managers(mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy) if not self.copy: new_data._consolidate_inplace() cons = sample._constructor return cons(new_data).__finalize__(self, method="concat")
def lreshape(data: "DataFrame", groups, dropna: bool = True, label=None) -> "DataFrame": """ Reshape wide-format data to long. Generalized inverse of DataFrame.pivot. Accepts a dictionary, ``groups``, in which each key is a new column name and each value is a list of old column names that will be "melted" under the new column name as part of the reshape. Parameters ---------- data : DataFrame The wide-format DataFrame. groups : dict {new_name : list_of_columns}. dropna : bool, default True Do not include columns whose entries are all NaN. label : None Not used. .. deprecated:: 1.0.0 Returns ------- DataFrame Reshaped DataFrame. See Also -------- melt : Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. pivot : Create a spreadsheet-style pivot table as a DataFrame. DataFrame.pivot : Pivot without aggregation that can handle non-numeric data. DataFrame.pivot_table : Generalization of pivot that can handle duplicate values for one index/column pair. DataFrame.unstack : Pivot based on the index values instead of a column. wide_to_long : Wide panel to long format. Less flexible but more user-friendly than melt. Examples -------- >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], ... 'team': ['Red Sox', 'Yankees'], ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) team year hr 0 Red Sox 2007 514 1 Yankees 2007 573 2 Red Sox 2008 545 3 Yankees 2008 526 """ if isinstance(groups, dict): keys = list(groups.keys()) values = list(groups.values()) else: keys, values = zip(*groups) all_cols = list(set.union(*[set(x) for x in values])) id_cols = list(data.columns.difference(all_cols)) K = len(values[0]) for seq in values: if len(seq) != K: raise ValueError("All column lists must be same length") mdata = {} pivot_cols = [] for target, names in zip(keys, values): to_concat = [data[col]._values for col in names] mdata[target] = concat_compat(to_concat) pivot_cols.append(target) for col in id_cols: mdata[col] = np.tile(data[col]._values, K) if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) for c in pivot_cols: mask &= notna(mdata[c]) if not mask.all(): mdata = {k: v[mask] for k, v in mdata.items()} return data._constructor(mdata, columns=id_cols + pivot_cols)
def concatenate_block_managers(mgrs_indexers, axes: List["Index"], concat_axis: int, copy: bool) -> BlockManager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ concat_plans = [ _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans, concat_axis) blocks = [] for placement, join_units in concat_plan: if len(join_units) == 1 and not join_units[0].indexers: b = join_units[0].block values = b.values if copy: values = values.copy() else: values = values.view() b = b.make_block_same_class(values, placement=placement) elif _is_uniform_join_units(join_units): blk = join_units[0].block vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=blk.ndim - 1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals) if not isinstance(values, ExtensionArray): values = values.reshape(1, len(values)) if blk.values.dtype == values.dtype: # Fast-path b = blk.make_block_same_class(values, placement=placement) else: b = make_block(values, placement=placement, ndim=blk.ndim) else: b = make_block( _concatenate_join_units(join_units, concat_axis, copy=copy), placement=placement, ndim=len(axes), ) blocks.append(b) return BlockManager(blocks, axes)
def concatenate_managers(mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool) -> Manager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) # Assertions disabled for performance # for tup in mgrs_indexers: # # caller is responsible for ensuring this # indexers = tup[1] # assert concat_axis not in indexers if concat_axis == 0: return _concat_managers_axis0(mgrs_indexers, axes, copy) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) # Assertion disabled for performance # assert all(not x[1] for x in mgrs_indexers) concat_plans = [ _get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans) blocks = [] for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block # Assertion disabled for performance # assert len(join_units) == len(mgrs_indexers) if len(join_units) == 1: values = blk.values if copy: values = values.copy() else: values = values.view() fastpath = True elif _is_uniform_join_units(join_units): vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals, axis=1) values = ensure_block_shape(values, ndim=2) values = ensure_wrapped_if_datetimelike(values) fastpath = blk.values.dtype == values.dtype else: values = _concatenate_join_units(join_units, copy=copy) fastpath = False if fastpath: b = blk.make_block_same_class(values, placement=placement) else: b = new_block_2d(values, placement=placement) blocks.append(b) return BlockManager(tuple(blocks), axes)
def concatenate_block_managers(mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool) -> Manager: """ Concatenate block managers into one. Parameters ---------- mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples axes : list of Index concat_axis : int copy : bool Returns ------- BlockManager """ if isinstance(mgrs_indexers[0][0], ArrayManager): if concat_axis == 1: # TODO for now only fastpath without indexers mgrs = [t[0] for t in mgrs_indexers] arrays = [ concat_compat([mgrs[i].arrays[j] for i in range(len(mgrs))], axis=0) for j in range(len(mgrs[0].arrays)) ] return ArrayManager(arrays, [axes[1], axes[0]]) elif concat_axis == 0: mgrs = [t[0] for t in mgrs_indexers] arrays = list( itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) return ArrayManager(arrays, [axes[1], axes[0]]) concat_plans = [ _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers ] concat_plan = _combine_concat_plans(concat_plans, concat_axis) blocks = [] for placement, join_units in concat_plan: if len(join_units) == 1 and not join_units[0].indexers: b = join_units[0].block values = b.values if copy: values = values.copy() else: values = values.view() b = b.make_block_same_class(values, placement=placement) elif _is_uniform_join_units(join_units): blk = join_units[0].block vals = [ju.block.values for ju in join_units] if not blk.is_extension: # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=blk.ndim - 1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals) if not isinstance(values, ExtensionArray): values = values.reshape(1, len(values)) if blk.values.dtype == values.dtype: # Fast-path b = blk.make_block_same_class(values, placement=placement) else: b = make_block(values, placement=placement, ndim=blk.ndim) else: b = make_block( _concatenate_join_units(join_units, concat_axis, copy=copy), placement=placement, ndim=len(axes), ) blocks.append(b) return BlockManager(blocks, axes)