Ejemplo n.º 1
0
def drop_duplicate_levels(index: tp.Index,
                          keep: tp.Optional[str] = None) -> tp.Index:
    """Drop levels in `index` with the same name and values.

    Set `keep` to 'last' to keep last levels, otherwise 'first'.

    Set `keep` to None to use the default."""
    from vectorbt._settings import settings
    broadcasting_cfg = settings['broadcasting']

    if keep is None:
        keep = broadcasting_cfg['keep']
    if not isinstance(index, pd.MultiIndex):
        return index
    checks.assert_in(keep.lower(), ['first', 'last'])

    levels = []
    levels_to_drop = []
    if keep == 'first':
        r = range(0, index.nlevels)
    else:
        r = range(index.nlevels - 1, -1, -1)  # loop backwards
    for i in r:
        level = (index.levels[i].name,
                 tuple(index.get_level_values(i).to_numpy().tolist()))
        if level not in levels:
            levels.append(level)
        else:
            levels_to_drop.append(i)
    return index.droplevel(levels_to_drop)
Ejemplo n.º 2
0
def align_index_to(index1: tp.Index, index2: tp.Index) -> pd.IndexSlice:
    """Align `index1` to have the same shape as `index2` if they have any levels in common.

    Returns index slice for the aligning."""
    if not isinstance(index1, pd.MultiIndex):
        index1 = pd.MultiIndex.from_arrays([index1])
    if not isinstance(index2, pd.MultiIndex):
        index2 = pd.MultiIndex.from_arrays([index2])
    if pd.Index.equals(index1, index2):
        return pd.IndexSlice[:]

    # Build map between levels in first and second index
    mapper = {}
    for i in range(index1.nlevels):
        for j in range(index2.nlevels):
            name1 = index1.names[i]
            name2 = index2.names[j]
            if name1 == name2:
                if set(index2.levels[j]).issubset(set(index1.levels[i])):
                    if i in mapper:
                        raise ValueError(
                            f"There are multiple candidate levels with name {name1} in second index"
                        )
                    mapper[i] = j
                    continue
                if name1 is not None:
                    raise ValueError(
                        f"Level {name1} in second index contains values not in first index"
                    )
    if len(mapper) == 0:
        raise ValueError("Can't find common levels to align both indexes")

    # Factorize first to be accepted by Numba
    factorized = []
    for k, v in mapper.items():
        factorized.append(
            pd.factorize(
                pd.concat((index1.get_level_values(k).to_series(),
                           index2.get_level_values(v).to_series())))[0])
    stacked = np.transpose(np.stack(factorized))
    indices1 = stacked[:len(index1)]
    indices2 = stacked[len(index1):]
    if len(np.unique(indices1, axis=0)) != len(indices1):
        raise ValueError("Duplicated values in first index are not allowed")

    # Try to tile
    if len(index2) % len(index1) == 0:
        tile_times = len(index2) // len(index1)
        index1_tiled = np.tile(indices1, (tile_times, 1))
        if np.array_equal(index1_tiled, indices2):
            return pd.IndexSlice[np.tile(np.arange(len(index1)), tile_times)]

    # Do element-wise comparison
    unique_indices = np.unique(stacked, axis=0, return_inverse=True)[1]
    unique1 = unique_indices[:len(index1)]
    unique2 = unique_indices[len(index1):]
    return pd.IndexSlice[_align_index_to_nb(unique1, unique2)]
Ejemplo n.º 3
0
def select_levels(index: tp.Index,
                  level_names: tp.MaybeLevelSequence) -> tp.Index:
    """Build a new index by selecting one or multiple `level_names` from `index`."""
    checks.assert_instance_of(index, pd.MultiIndex)

    if isinstance(level_names, (int, str)):
        return index.get_level_values(level_names)
    levels = [index.get_level_values(level_name) for level_name in level_names]
    return pd.MultiIndex.from_arrays(levels)
Ejemplo n.º 4
0
def rename_levels(index: tp.Index, name_dict: tp.Dict[str,
                                                      tp.Any]) -> tp.Index:
    """Rename levels in `index` by `name_dict`."""
    for k, v in name_dict.items():
        if isinstance(index, pd.MultiIndex):
            if k in index.names:
                index = index.rename(v, level=k)
        else:
            if index.name == k:
                index.name = v
    return index
Ejemplo n.º 5
0
def drop_redundant_levels(index: tp.Index) -> tp.Index:
    """Drop levels in `index` that either have a single unnamed value or a range from 0 to n."""
    if not isinstance(index, pd.MultiIndex):
        return index

    levels_to_drop = []
    for i in range(index.nlevels):
        if len(index) > 1 and len(
                index.levels[i]) == 1 and index.levels[i].name is None:
            levels_to_drop.append(i)
        elif checks.is_default_index(index.get_level_values(i)):
            levels_to_drop.append(i)
    # Remove redundant levels only if there are some non-redundant levels left
    if len(levels_to_drop) < index.nlevels:
        return index.droplevel(levels_to_drop)
    return index
Ejemplo n.º 6
0
def rename_levels(index: tp.Index,
                  name_dict: tp.Dict[str, tp.Any],
                  strict: bool = True) -> tp.Index:
    """Rename levels in `index` by `name_dict`."""
    for k, v in name_dict.items():
        if isinstance(index, pd.MultiIndex):
            if k in index.names:
                index = index.rename(v, level=k)
            elif strict:
                raise KeyError(f"Level '{k}' not found")
        else:
            if index.name == k:
                index.name = v
            elif strict:
                raise KeyError(f"Level '{k}' not found")
    return index
Ejemplo n.º 7
0
def find_first_occurrence(index_value: tp.Any, index: tp.Index) -> int:
    """Return index of the first occurrence in `index`."""
    loc = index.get_loc(index_value)
    if isinstance(loc, slice):
        return loc.start
    elif isinstance(loc, list):
        return loc[0]
    elif isinstance(loc, np.ndarray):
        return np.flatnonzero(loc)[0]
    return loc
Ejemplo n.º 8
0
def drop_levels(index: tp.Index,
                levels: tp.MaybeLevelSequence,
                strict: bool = True) -> tp.Index:
    """Drop `levels` in `index` by their name/position."""
    if not isinstance(index, pd.MultiIndex):
        return index
    if strict:
        return index.droplevel(levels)

    levels_to_drop = set()
    if isinstance(levels, (int, str)):
        levels = (levels, )
    for level in levels:
        if level in index.names:
            levels_to_drop.add(level)
        elif isinstance(level,
                        int) and 0 <= level < index.nlevels or level == -1:
            levels_to_drop.add(level)
    if len(levels_to_drop) < index.nlevels:
        # Drop only if there will be some indexes left
        return index.droplevel(list(levels_to_drop))
    return index
Ejemplo n.º 9
0
def drop_levels(index: tp.Index, levels: tp.MaybeLevelSequence) -> tp.Index:
    """Softly drop `levels` in `index` by their name/position."""
    if not isinstance(index, pd.MultiIndex):
        return index

    levels_to_drop = []
    if isinstance(levels, (int, str)):
        levels = (levels, )
    for level in levels:
        if level in index.names:
            if level not in levels_to_drop:
                levels_to_drop.append(level)
        elif isinstance(level, int):
            if 0 <= level < index.nlevels or level == -1:
                if level not in levels_to_drop:
                    levels_to_drop.append(level)
    if len(levels_to_drop) < index.nlevels:
        # Drop only if there will be some indexes left
        return index.droplevel(levels_to_drop)
    return index