def drop_duplicate_levels(index: tp.Index, keep: tp.Optional[str] = None) -> tp.Index: """Drop levels in `index` with the same name and values. Set `keep` to 'last' to keep last levels, otherwise 'first'. Set `keep` to None to use the default.""" from vectorbt._settings import settings broadcasting_cfg = settings['broadcasting'] if keep is None: keep = broadcasting_cfg['keep'] if not isinstance(index, pd.MultiIndex): return index checks.assert_in(keep.lower(), ['first', 'last']) levels = [] levels_to_drop = [] if keep == 'first': r = range(0, index.nlevels) else: r = range(index.nlevels - 1, -1, -1) # loop backwards for i in r: level = (index.levels[i].name, tuple(index.get_level_values(i).to_numpy().tolist())) if level not in levels: levels.append(level) else: levels_to_drop.append(i) return index.droplevel(levels_to_drop)
def align_index_to(index1: tp.Index, index2: tp.Index) -> pd.IndexSlice: """Align `index1` to have the same shape as `index2` if they have any levels in common. Returns index slice for the aligning.""" if not isinstance(index1, pd.MultiIndex): index1 = pd.MultiIndex.from_arrays([index1]) if not isinstance(index2, pd.MultiIndex): index2 = pd.MultiIndex.from_arrays([index2]) if pd.Index.equals(index1, index2): return pd.IndexSlice[:] # Build map between levels in first and second index mapper = {} for i in range(index1.nlevels): for j in range(index2.nlevels): name1 = index1.names[i] name2 = index2.names[j] if name1 == name2: if set(index2.levels[j]).issubset(set(index1.levels[i])): if i in mapper: raise ValueError( f"There are multiple candidate levels with name {name1} in second index" ) mapper[i] = j continue if name1 is not None: raise ValueError( f"Level {name1} in second index contains values not in first index" ) if len(mapper) == 0: raise ValueError("Can't find common levels to align both indexes") # Factorize first to be accepted by Numba factorized = [] for k, v in mapper.items(): factorized.append( pd.factorize( pd.concat((index1.get_level_values(k).to_series(), index2.get_level_values(v).to_series())))[0]) stacked = np.transpose(np.stack(factorized)) indices1 = stacked[:len(index1)] indices2 = stacked[len(index1):] if len(np.unique(indices1, axis=0)) != len(indices1): raise ValueError("Duplicated values in first index are not allowed") # Try to tile if len(index2) % len(index1) == 0: tile_times = len(index2) // len(index1) index1_tiled = np.tile(indices1, (tile_times, 1)) if np.array_equal(index1_tiled, indices2): return pd.IndexSlice[np.tile(np.arange(len(index1)), tile_times)] # Do element-wise comparison unique_indices = np.unique(stacked, axis=0, return_inverse=True)[1] unique1 = unique_indices[:len(index1)] unique2 = unique_indices[len(index1):] return pd.IndexSlice[_align_index_to_nb(unique1, unique2)]
def select_levels(index: tp.Index, level_names: tp.MaybeLevelSequence) -> tp.Index: """Build a new index by selecting one or multiple `level_names` from `index`.""" checks.assert_instance_of(index, pd.MultiIndex) if isinstance(level_names, (int, str)): return index.get_level_values(level_names) levels = [index.get_level_values(level_name) for level_name in level_names] return pd.MultiIndex.from_arrays(levels)
def rename_levels(index: tp.Index, name_dict: tp.Dict[str, tp.Any]) -> tp.Index: """Rename levels in `index` by `name_dict`.""" for k, v in name_dict.items(): if isinstance(index, pd.MultiIndex): if k in index.names: index = index.rename(v, level=k) else: if index.name == k: index.name = v return index
def drop_redundant_levels(index: tp.Index) -> tp.Index: """Drop levels in `index` that either have a single unnamed value or a range from 0 to n.""" if not isinstance(index, pd.MultiIndex): return index levels_to_drop = [] for i in range(index.nlevels): if len(index) > 1 and len( index.levels[i]) == 1 and index.levels[i].name is None: levels_to_drop.append(i) elif checks.is_default_index(index.get_level_values(i)): levels_to_drop.append(i) # Remove redundant levels only if there are some non-redundant levels left if len(levels_to_drop) < index.nlevels: return index.droplevel(levels_to_drop) return index
def rename_levels(index: tp.Index, name_dict: tp.Dict[str, tp.Any], strict: bool = True) -> tp.Index: """Rename levels in `index` by `name_dict`.""" for k, v in name_dict.items(): if isinstance(index, pd.MultiIndex): if k in index.names: index = index.rename(v, level=k) elif strict: raise KeyError(f"Level '{k}' not found") else: if index.name == k: index.name = v elif strict: raise KeyError(f"Level '{k}' not found") return index
def find_first_occurrence(index_value: tp.Any, index: tp.Index) -> int: """Return index of the first occurrence in `index`.""" loc = index.get_loc(index_value) if isinstance(loc, slice): return loc.start elif isinstance(loc, list): return loc[0] elif isinstance(loc, np.ndarray): return np.flatnonzero(loc)[0] return loc
def drop_levels(index: tp.Index, levels: tp.MaybeLevelSequence, strict: bool = True) -> tp.Index: """Drop `levels` in `index` by their name/position.""" if not isinstance(index, pd.MultiIndex): return index if strict: return index.droplevel(levels) levels_to_drop = set() if isinstance(levels, (int, str)): levels = (levels, ) for level in levels: if level in index.names: levels_to_drop.add(level) elif isinstance(level, int) and 0 <= level < index.nlevels or level == -1: levels_to_drop.add(level) if len(levels_to_drop) < index.nlevels: # Drop only if there will be some indexes left return index.droplevel(list(levels_to_drop)) return index
def drop_levels(index: tp.Index, levels: tp.MaybeLevelSequence) -> tp.Index: """Softly drop `levels` in `index` by their name/position.""" if not isinstance(index, pd.MultiIndex): return index levels_to_drop = [] if isinstance(levels, (int, str)): levels = (levels, ) for level in levels: if level in index.names: if level not in levels_to_drop: levels_to_drop.append(level) elif isinstance(level, int): if 0 <= level < index.nlevels or level == -1: if level not in levels_to_drop: levels_to_drop.append(level) if len(levels_to_drop) < index.nlevels: # Drop only if there will be some indexes left return index.droplevel(levels_to_drop) return index