def test_order_stability_compat(): # GH#35922. sort_values is stable both for normal and datetime-like Index pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) tm.assert_numpy_array_equal(indexer1, indexer2)
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): # non-monotonic non-unique index1 = Index(["A", "B", "A", "C"]) expected = Index(expected_arr, dtype="object") result = index1.intersection(index2, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected)
def test_order_stability_compat(): # GH 35584. The new implementation of sort_values for Index.sort_values # is stable when sorting in descending order. Datetime-like sort_values # currently aren't stable. xfail should be removed after # the implementations' behavior is synchronized (xref GH 35922) pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) tm.assert_numpy_array_equal(indexer1, indexer2)
def test_intersection_monotonic(self, index2, keeps_name, sort): index1 = Index([5, 3, 2, 4, 1], name="index") expected = Index([5, 3, 4]) if keeps_name: expected.name = "index" result = index1.intersection(index2, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected)
def _build_result(self, cfg: pd.DataFrame, common_keys: pd.Index, csvs: [pd.DataFrame]) -> pd.DataFrame: common_keys = common_keys.sort_values(ascending=False) result = pd.DataFrame(common_keys) for index, row in cfg.iterrows(): id_column = row[1] id_column_name = self._get_column_name(csvs[index], id_column) data_column = row[2] data = csvs[index].query(f"`{id_column_name}` in @common_keys") data = data.sort_values(by=id_column_name, ascending=False) data = pd.Series(data.iloc[:, data_column].values) result = pd.concat([result, data], axis=1) result = result.dropna() return result
def _generate_candidates(S_a: pd.Index, n_event_tot: int, max_length: int = 100): S_a = S_a.sort_values() dS = S_a[-1] - S_a[0] cycles, covered = compute_cycles_dyn(S_a, n_event_tot, max_length) covered = Bitmap(covered) if len(S_a) - len(covered) > 3: # add triples if necessary _all = Bitmap(range(len(S_a))) _S_a = S_a[_all - covered] triples = extract_triples(_S_a, dS) merged = merge_triples(triples) cycles.extend(merged) return list(sorted(cycles, key=lambda _: _.shape[1], reverse=True))
def test_symmetric_difference(self, sort): # smoke index1 = Index([5, 2, 3, 4], name="index1") index2 = Index([2, 3, 4, 1]) result = index1.symmetric_difference(index2, sort=sort) expected = Index([5, 1]) assert tm.equalContents(result, expected) assert result.name is None if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) # __xor__ syntax with tm.assert_produces_warning(FutureWarning): expected = index1 ^ index2 assert tm.equalContents(result, expected) assert result.name is None
def union_categoricals(to_union, sort_categories=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. .. versionadded:: 0.19.0 Parameters ---------- to_union : list-like of Categorical, CategoricalIndex, or Series with dtype='category' sort_categories : boolean, default False If true, resulting categories will be lexsorted, otherwise they will be ordered as they appear in the data. Returns ------- result : Categorical Raises ------ TypeError - all inputs do not have the same dtype - all inputs do not have the same ordered property - all inputs are ordered and their categories are not identical - sort_categories=True and Categoricals are ordered ValueError Emmpty list of categoricals passed """ from pandas import Index, Categorical, CategoricalIndex, Series if len(to_union) == 0: raise ValueError('No Categoricals to union') def _maybe_unwrap(x): if isinstance(x, (CategoricalIndex, Series)): return x.values elif isinstance(x, Categorical): return x else: raise TypeError("all components to combine must be Categorical") to_union = [_maybe_unwrap(x) for x in to_union] first = to_union[0] if not all( is_dtype_equal(other.categories.dtype, first.categories.dtype) for other in to_union[1:]): raise TypeError("dtype of categories must be the same") ordered = False if all(first.is_dtype_equal(other) for other in to_union[1:]): # identical categories - fastpath categories = first.categories ordered = first.ordered new_codes = np.concatenate([c.codes for c in to_union]) if sort_categories and ordered: raise TypeError("Cannot use sort_categories=True with " "ordered Categoricals") if sort_categories and not categories.is_monotonic_increasing: categories = categories.sort_values() indexer = categories.get_indexer(first.categories) new_codes = take_1d(indexer, new_codes, fill_value=-1) elif all(not c.ordered for c in to_union): # different categories - union and recode cats = first.categories.append([c.categories for c in to_union[1:]]) categories = Index(cats.unique()) if sort_categories: categories = categories.sort_values() new_codes = [] for c in to_union: if len(c.categories) > 0: indexer = categories.get_indexer(c.categories) new_codes.append(take_1d(indexer, c.codes, fill_value=-1)) else: # must be all NaN new_codes.append(c.codes) new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message if all(c.ordered for c in to_union): msg = ("to union ordered Categoricals, " "all categories must be the same") raise TypeError(msg) else: raise TypeError('Categorical.ordered must be the same') return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
def union_categoricals(to_union, sort_categories=False, ignore_order=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. .. versionadded:: 0.19.0 Parameters ---------- to_union : list-like of Categorical, CategoricalIndex, or Series with dtype='category' sort_categories : boolean, default False If true, resulting categories will be lexsorted, otherwise they will be ordered as they appear in the data. ignore_order: boolean, default False If true, the ordered attribute of the Categoricals will be ignored. Results in an unordered categorical. .. versionadded:: 0.20.0 Returns ------- result : Categorical Raises ------ TypeError - all inputs do not have the same dtype - all inputs do not have the same ordered property - all inputs are ordered and their categories are not identical - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed Notes ----- To learn more about categories, see `link <http://pandas.pydata.org/pandas-docs/stable/categorical.html#unioning>`__ Examples -------- >>> from pandas.api.types import union_categoricals If you want to combine categoricals that do not necessarily have the same categories, `union_categoricals` will combine a list-like of categoricals. The new categories will be the union of the categories being combined. >>> a = pd.Categorical(["b", "c"]) >>> b = pd.Categorical(["a", "b"]) >>> union_categoricals([a, b]) [b, c, a, b] Categories (3, object): [b, c, a] By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be lexsorted, use `sort_categories=True` argument. >>> union_categoricals([a, b], sort_categories=True) [b, c, a, b] Categories (3, object): [a, b, c] `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what you could also `append` for). >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> union_categoricals([a, b]) [a, b, a, b, a] Categories (2, object): [a < b] Raises `TypeError` because the categories are ordered and not identical. >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "c"], ordered=True) >>> union_categoricals([a, b]) TypeError: to union ordered Categoricals, all categories must be the same New in version 0.20.0 Ordered categoricals with different categories or orderings can be combined by using the `ignore_ordered=True` argument. >>> a = pd.Categorical(["a", "b", "c"], ordered=True) >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) [a, b, c, c, b, a] Categories (3, object): [a, b, c] `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will always be a plain `Categorical` >>> a = pd.Series(["b", "c"], dtype='category') >>> b = pd.Series(["a", "b"], dtype='category') >>> union_categoricals([a, b]) [b, c, a, b] Categories (3, object): [b, c, a] """ from pandas import Index, Categorical, CategoricalIndex, Series from pandas.core.categorical import _recode_for_categories if len(to_union) == 0: raise ValueError('No Categoricals to union') def _maybe_unwrap(x): if isinstance(x, (CategoricalIndex, Series)): return x.values elif isinstance(x, Categorical): return x else: raise TypeError("all components to combine must be Categorical") to_union = [_maybe_unwrap(x) for x in to_union] first = to_union[0] if not all(is_dtype_equal(other.categories.dtype, first.categories.dtype) for other in to_union[1:]): raise TypeError("dtype of categories must be the same") ordered = False if all(first.is_dtype_equal(other) for other in to_union[1:]): # identical categories - fastpath categories = first.categories ordered = first.ordered new_codes = np.concatenate([c.codes for c in to_union]) if sort_categories and not ignore_order and ordered: raise TypeError("Cannot use sort_categories=True with " "ordered Categoricals") if sort_categories and not categories.is_monotonic_increasing: categories = categories.sort_values() indexer = categories.get_indexer(first.categories) from pandas.core.algorithms import take_1d new_codes = take_1d(indexer, new_codes, fill_value=-1) elif ignore_order or all(not c.ordered for c in to_union): # different categories - union and recode cats = first.categories.append([c.categories for c in to_union[1:]]) categories = Index(cats.unique()) if sort_categories: categories = categories.sort_values() new_codes = [] for c in to_union: new_codes.append(_recode_for_categories(c.codes, c.categories, categories)) new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message if all(c.ordered for c in to_union): msg = ("to union ordered Categoricals, " "all categories must be the same") raise TypeError(msg) else: raise TypeError('Categorical.ordered must be the same') if ignore_order: ordered = False return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
def union_categoricals(to_union, sort_categories=False, ignore_order=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. .. versionadded:: 0.19.0 Parameters ---------- to_union : list-like of Categorical, CategoricalIndex, or Series with dtype='category' sort_categories : boolean, default False If true, resulting categories will be lexsorted, otherwise they will be ordered as they appear in the data. ignore_order: boolean, default False If true, the ordered attribute of the Categoricals will be ignored. Results in an unordered categorical. .. versionadded:: 0.20.0 Returns ------- result : Categorical Raises ------ TypeError - all inputs do not have the same dtype - all inputs do not have the same ordered property - all inputs are ordered and their categories are not identical - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed """ from pandas import Index, Categorical, CategoricalIndex, Series if len(to_union) == 0: raise ValueError('No Categoricals to union') def _maybe_unwrap(x): if isinstance(x, (CategoricalIndex, Series)): return x.values elif isinstance(x, Categorical): return x else: raise TypeError("all components to combine must be Categorical") to_union = [_maybe_unwrap(x) for x in to_union] first = to_union[0] if not all(is_dtype_equal(other.categories.dtype, first.categories.dtype) for other in to_union[1:]): raise TypeError("dtype of categories must be the same") ordered = False if all(first.is_dtype_equal(other) for other in to_union[1:]): # identical categories - fastpath categories = first.categories ordered = first.ordered new_codes = np.concatenate([c.codes for c in to_union]) if sort_categories and not ignore_order and ordered: raise TypeError("Cannot use sort_categories=True with " "ordered Categoricals") if sort_categories and not categories.is_monotonic_increasing: categories = categories.sort_values() indexer = categories.get_indexer(first.categories) new_codes = take_1d(indexer, new_codes, fill_value=-1) elif ignore_order or all(not c.ordered for c in to_union): # different categories - union and recode cats = first.categories.append([c.categories for c in to_union[1:]]) categories = Index(cats.unique()) if sort_categories: categories = categories.sort_values() new_codes = [] for c in to_union: if len(c.categories) > 0: indexer = categories.get_indexer(c.categories) new_codes.append(take_1d(indexer, c.codes, fill_value=-1)) else: # must be all NaN new_codes.append(c.codes) new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message if all(c.ordered for c in to_union): msg = ("to union ordered Categoricals, " "all categories must be the same") raise TypeError(msg) else: raise TypeError('Categorical.ordered must be the same') if ignore_order: ordered = False return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
def _have_same_elements(idx1: pd.Index, idx2: pd.Index) -> bool: return idx1.sort_values().equals(idx2.sort_values())
def assert_index_equal( left: Index, right: Index, exact: Union[bool, str] = "equiv", check_names: bool = True, check_less_precise: Union[bool, int] = no_default, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "Index", ) -> None: """ Check that left and right Index are equal. Parameters ---------- left : Index right : Index exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for Int64Index as well. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the digits to compare. .. deprecated:: 1.1.0 Use `rtol` and `atol` instead to define relative/absolute tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default True Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_order : bool, default True Whether to compare the order of index entries as well as their values. If True, both indexes must contain the same elements, in the same order. If False, both indexes must contain the same elements, but in any order. .. versionadded:: 1.2.0 rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 obj : str, default 'Index' Specify object name being compared, internally used to show appropriate assertion message. Examples -------- >>> from pandas.testing import assert_index_equal >>> a = pd.Index([1, 2, 3]) >>> b = pd.Index([1, 2, 3]) >>> assert_index_equal(a, b) """ __tracebackhide__ = True def _check_types(left, right, obj="Index"): if exact: assert_class_equal(left, right, exact=exact, obj=obj) # Skip exact dtype checking when `check_categorical` is False if check_categorical: assert_attr_equal("dtype", left, right, obj=obj) # allow string-like to have different inferred_types if left.inferred_type in ("string"): assert right.inferred_type in ("string") else: assert_attr_equal("inferred_type", left, right, obj=obj) def _get_ilevel_values(index, level): # accept level number only unique = index.levels[level] level_codes = index.codes[level] filled = take_1d(unique._values, level_codes, fill_value=unique._na_value) return unique._shallow_copy(filled, name=index.names[level]) if check_less_precise is not no_default: warnings.warn( "The 'check_less_precise' keyword in testing.assert_*_equal " "is deprecated and will be removed in a future version. " "You can stop passing 'check_less_precise' to silence this warning.", FutureWarning, stacklevel=2, ) rtol = atol = _get_tol_from_less_precise(check_less_precise) # instance validation _check_isinstance(left, right, Index) # class / dtype comparison _check_types(left, right, obj=obj) # level comparison if left.nlevels != right.nlevels: msg1 = f"{obj} levels are different" msg2 = f"{left.nlevels}, {left}" msg3 = f"{right.nlevels}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) # length comparison if len(left) != len(right): msg1 = f"{obj} length are different" msg2 = f"{len(left)}, {left}" msg3 = f"{len(right)}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) # If order doesn't matter then sort the index entries if not check_order: left = left.sort_values() right = right.sort_values() # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: left = cast(MultiIndex, left) right = cast(MultiIndex, right) for level in range(left.nlevels): # cannot use get_level_values here because it can change dtype llevel = _get_ilevel_values(left, level) rlevel = _get_ilevel_values(right, level) lobj = f"MultiIndex level [{level}]" assert_index_equal( llevel, rlevel, exact=exact, check_names=check_names, check_exact=check_exact, rtol=rtol, atol=atol, obj=lobj, ) # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) # skip exact index checking when `check_categorical` is False if check_exact and check_categorical: if not left.equals(right): diff = (np.sum((left._values != right._values).astype(int)) * 100.0 / len(left)) msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) else: _testing.assert_almost_equal( left.values, right.values, rtol=rtol, atol=atol, check_dtype=exact, obj=obj, lobj=left, robj=right, ) # metadata comparison if check_names: assert_attr_equal("names", left, right, obj=obj) if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex): assert_attr_equal("freq", left, right, obj=obj) if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex): assert_interval_array_equal(left._values, right._values) if check_categorical: if is_categorical_dtype(left.dtype) or is_categorical_dtype( right.dtype): assert_categorical_equal(left._values, right._values, obj=f"{obj} category")