コード例 #1
0
ファイル: segment.py プロジェクト: audeering/audinterface
def merge_index(index: pd.MultiIndex, ) -> pd.MultiIndex:
    r"""Merge overlapping segments.

    Assumes that index is sorted by 'start' level.

    """
    if index.empty:
        return index

    starts = index.get_level_values('start')
    ends = index.get_level_values('end')
    new_starts = []
    new_ends = []
    new_start = starts[0]
    new_end = ends[0]
    for start, end in zip(starts[1:], ends[1:]):
        if start > new_end:
            new_starts.append(new_start)
            new_ends.append(new_end)
            new_start = start
            new_end = end
        elif end > new_end:
            new_end = end
    new_starts.append(new_start)
    new_ends.append(new_end)

    return utils.signal_index(new_starts, new_ends)
コード例 #2
0
    def test_unicode_repr_issues(self):
        levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
        codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
        index = MultiIndex(levels=levels, codes=codes)

        repr(index.levels)
        repr(index.get_level_values(1))
コード例 #3
0
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ["hash", "category"]

        ser.name = "value"
        df = ser.reset_index()
        assert "value" in df

        df = ser.reset_index(name="value2")
        assert "value2" in df

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        tm.assert_series_equal(s, s2)

        # level
        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        assert len(rs.columns) == 2

        rs = s.reset_index(level=[0, 2], drop=True)
        tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
        assert isinstance(rs, Series)
コード例 #4
0
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ['hash', 'category']

        ser.name = 'value'
        df = ser.reset_index()
        assert 'value' in df

        df = ser.reset_index(name='value2')
        assert 'value2' in df

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        tm.assert_series_equal(s, s2)

        # level
        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                  [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        assert len(rs.columns) == 2

        rs = s.reset_index(level=[0, 2], drop=True)
        tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
        assert isinstance(rs, Series)
コード例 #5
0
ファイル: test_alter_axes.py プロジェクト: tianran10/pandas
    def test_reset_index(self):
        df = tm.makeDataFrame()[:5]
        ser = df.stack()
        ser.index.names = ['hash', 'category']

        ser.name = 'value'
        df = ser.reset_index()
        assert 'value' in df

        df = ser.reset_index(name='value2')
        assert 'value2' in df

        # check inplace
        s = ser.reset_index(drop=True)
        s2 = ser
        s2.reset_index(drop=True, inplace=True)
        tm.assert_series_equal(s, s2)

        # level
        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                  [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        rs = s.reset_index(level=1)
        assert len(rs.columns) == 2

        rs = s.reset_index(level=[0, 2], drop=True)
        tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
        assert isinstance(rs, Series)
コード例 #6
0
def remove_constant_levels(index: pd.MultiIndex) -> pd.MultiIndex:
    index = index.copy()
    levels = index.names
    for level in levels:
        if len(index.get_level_values(
                level).unique()) == 1 and level not in WHITELISTED_LEVELS:
            index = index.droplevel(level=level)
    return index
コード例 #7
0
def indexFillNAs(indexdata: pd.MultiIndex, replacementValues: dict):
    """
    Reemplaza los NAs de niveles de índice por valores configurables por nivel.
    :param indexdata: Indice a tratar
    :param replacementValues: diccionario con "nivel":"valor de reemplazo"
    :return:
    """
    newData = []
    for name in indexdata.names:
        dataLevel = indexdata.get_level_values(name).fillna(
            replacementValues[name]
        ) if name in replacementValues else indexdata.get_level_values(name)
        newData.append(dataLevel)

    result = pd.MultiIndex.from_arrays(newData, names=indexdata.names)

    return result
コード例 #8
0
    def test_get_level_values_box_datetime64(self):

        dates = date_range("1/1/2000", periods=4)
        levels = [dates, [0, 1]]
        codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]

        index = MultiIndex(levels=levels, codes=codes)

        assert isinstance(index.get_level_values(0)[0], Timestamp)
コード例 #9
0
ファイル: test_timeseries.py プロジェクト: ivannz/pandas
    def test_get_level_values_box(self):
        from pandas import MultiIndex

        dates = date_range('1/1/2000', periods=4)
        levels = [dates, [0, 1]]
        labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]

        index = MultiIndex(levels=levels, labels=labels)

        self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp))
コード例 #10
0
    def test_get_level_values_box(self):
        from pandas import MultiIndex

        dates = date_range('1/1/2000', periods=4)
        levels = [dates, [0, 1]]
        labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]

        index = MultiIndex(levels=levels, labels=labels)

        assert isinstance(index.get_level_values(0)[0], Timestamp)
コード例 #11
0
def test_get_level_values(idx):
    result = idx.get_level_values(0)
    expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
    tm.assert_index_equal(result, expected)
    assert result.name == "first"

    result = idx.get_level_values("first")
    expected = idx.get_level_values(0)
    tm.assert_index_equal(result, expected)

    # GH 10460
    index = MultiIndex(
        levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
        codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
    )

    exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
    tm.assert_index_equal(index.get_level_values(0), exp)
    exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
    tm.assert_index_equal(index.get_level_values(1), exp)
コード例 #12
0
ファイル: test_get_set.py プロジェクト: ziggi0703/pandas
def test_get_level_values(idx):
    result = idx.get_level_values(0)
    expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], name='first')
    tm.assert_index_equal(result, expected)
    assert result.name == 'first'

    result = idx.get_level_values('first')
    expected = idx.get_level_values(0)
    tm.assert_index_equal(result, expected)

    # GH 10460
    index = MultiIndex(
        levels=[CategoricalIndex(['A', 'B']),
                CategoricalIndex([1, 2, 3])],
        labels=[np.array([0, 0, 0, 1, 1, 1]),
                np.array([0, 1, 2, 0, 1, 2])])

    exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
    tm.assert_index_equal(index.get_level_values(0), exp)
    exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
    tm.assert_index_equal(index.get_level_values(1), exp)
コード例 #13
0
 def product_combine_frames(data: List[pd.DataFrame], index: pd.MultiIndex,
                            cols: pd.MultiIndex) -> pd.DataFrame:
     """Iterate through the dataframes, filling data into the combined
     dataframe with duplicate indexes being resolved using a 'last one wins'
     logic.
     """
     df = pd.DataFrame([], index=index, columns=cols)
     for idx, f in enumerate(data):
         data = f.loc[:, cols.get_level_values(idx)]
         data.columns = cols
         df.loc[data.index, :] = data
     return df
コード例 #14
0
ファイル: test_get_set.py プロジェクト: bwignall/pandas
def test_get_level_values(idx):
    result = idx.get_level_values(0)
    expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],
                     name='first')
    tm.assert_index_equal(result, expected)
    assert result.name == 'first'

    result = idx.get_level_values('first')
    expected = idx.get_level_values(0)
    tm.assert_index_equal(result, expected)

    # GH 10460
    index = MultiIndex(
        levels=[CategoricalIndex(['A', 'B']),
                CategoricalIndex([1, 2, 3])],
        codes=[np.array([0, 0, 0, 1, 1, 1]),
               np.array([0, 1, 2, 0, 1, 2])])

    exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
    tm.assert_index_equal(index.get_level_values(0), exp)
    exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
    tm.assert_index_equal(index.get_level_values(1), exp)
コード例 #15
0
ファイル: segment.py プロジェクト: audeering/audinterface
def invert_index(
    index: pd.MultiIndex,
    dur: pd.Timedelta,
) -> pd.MultiIndex:
    r"""Invert index.

    Assumes that index is sorted by 'start' level.

    """
    if index.empty:
        return utils.signal_index(0, dur)

    starts = index.get_level_values('start')
    ends = index.get_level_values('end')
    new_starts = ends[:-1]
    new_ends = starts[1:]
    if starts[0] != pd.to_timedelta(0):
        new_starts = new_starts.insert(0, pd.to_timedelta(0))
        new_ends = new_ends.insert(0, starts[0])
    if ends[-1] != dur:
        new_starts = new_starts.insert(len(new_starts), ends[-1])
        new_ends = new_ends.insert(len(new_ends), dur)
    return utils.signal_index(new_starts, new_ends)
コード例 #16
0
def test_nan_stays_float():

    # GH 7031
    idx0 = MultiIndex(levels=[["A", "B"], []],
                      codes=[[1, 0], [-1, -1]],
                      names=[0, 1])
    idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
    idxm = idx0.join(idx1, how="outer")
    assert pd.isna(idx0.get_level_values(1)).all()
    # the following failed in 0.14.1
    assert pd.isna(idxm.get_level_values(1)[:-1]).all()

    df0 = pd.DataFrame([[1, 2]], index=idx0)
    df1 = pd.DataFrame([[3, 4]], index=idx1)
    dfm = df0 - df1
    assert pd.isna(df0.index.get_level_values(1)).all()
    # the following failed in 0.14.1
    assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
コード例 #17
0
    def coerce_dtype(self, obj: pd.MultiIndex) -> pd.MultiIndex:
        """Coerce type of a pd.Series by type specified in dtype.

        :param obj: multi-index to coerce.
        :returns: ``MultiIndex`` with coerced data type
        """
        error_handler = SchemaErrorHandler(lazy=True)

        # construct MultiIndex with coerced data types
        coerced_multi_index = {}
        for i, index in enumerate(self.indexes):
            if all(x is None for x in self.names):
                index_levels = [i]
            else:
                index_levels = [
                    i for i, name in enumerate(obj.names) if name == index.name
                ]
            for index_level in index_levels:
                index_array = obj.get_level_values(index_level)
                if index.coerce or self._coerce:
                    try:
                        index_array = index.coerce_dtype(index_array)
                    except errors.SchemaError as err:
                        error_handler.collect_error("dtype_coercion_error",
                                                    err)
                coerced_multi_index[index_level] = index_array

        if error_handler.collected_errors:
            raise errors.SchemaErrors(error_handler.collected_errors, obj)

        multiindex_cls = pd.MultiIndex
        # NOTE: this is a hack to support koalas
        if type(obj).__module__.startswith("databricks.koalas"):
            # pylint: disable=import-outside-toplevel
            import databricks.koalas as ks

            multiindex_cls = ks.MultiIndex
        return multiindex_cls.from_arrays(
            [
                v.to_numpy() for k, v in sorted(coerced_multi_index.items(),
                                                key=lambda x: x[0])
            ],
            names=obj.names,
        )
コード例 #18
0
    def coerce_dtype(self, multi_index: pd.MultiIndex) -> pd.MultiIndex:
        """Coerce type of a pd.Series by type specified in pandas_dtype.

        :param multi_index: multi-index to coerce.
        :returns: ``MultiIndex`` with coerced data type
        """
        _coerced_multi_index = []
        if multi_index.nlevels != len(self.indexes):
            raise errors.SchemaError(
                "multi_index does not have equal number of levels as "
                "MultiIndex schema %d != %d." %
                (multi_index.nlevels, len(self.indexes)))

        for level_i, index in enumerate(self.indexes):
            index_array = multi_index.get_level_values(level_i)
            if index.coerce or self.coerce:
                index_array = index.coerce_dtype(index_array)
            _coerced_multi_index.append(index_array)

        return pd.MultiIndex.from_arrays(_coerced_multi_index,
                                         names=multi_index.names)
コード例 #19
0
ファイル: schema_components.py プロジェクト: lkadin/pandera
    def coerce_dtype(self, obj: pd.MultiIndex) -> pd.MultiIndex:
        """Coerce type of a pd.Series by type specified in pandas_dtype.

        :param obj: multi-index to coerce.
        :returns: ``MultiIndex`` with coerced data type
        """
        error_handler = SchemaErrorHandler(lazy=True)

        # construct MultiIndex with coerced data types
        coerced_multi_index = {}
        for i, index in enumerate(self.indexes):
            if all(x is None for x in self.names):
                index_levels = [i]
            else:
                index_levels = [
                    i for i, name in enumerate(obj.names) if name == index.name
                ]
            for index_level in index_levels:
                index_array = obj.get_level_values(index_level)
                if index.coerce or self._coerce:
                    try:
                        index_array = index.coerce_dtype(index_array)
                    except errors.SchemaError as err:
                        error_handler.collect_error(
                            "dtype_coercion_error", err
                        )
                coerced_multi_index[index_level] = index_array

        if error_handler.collected_errors:
            raise errors.SchemaErrors(error_handler.collected_errors, obj)

        return pd.MultiIndex.from_arrays(
            [
                v
                for k, v in sorted(
                    coerced_multi_index.items(), key=lambda x: x[0]
                )
            ],
            names=obj.names,
        )
コード例 #20
0
def merge_tuples(
    sq: Tuple[Union[str, slice], Union[str, slice], Union[str, slice],
              Union[str, slice]],
    res: pd.MultiIndex,
) -> Tuple[str, str, str, str]:
    """Replace tuple values where the index is an empty slice.

    Behaviour change in pandas 1.4, in previous versions the full index was returned.
    Post 1.4, pandas returns only the missing levels.

    :param sq: query tuple
    :type sq: Tuple[ Union[str, slice], Union[str, slice], Union[str, slice], Union[str, slice] ]
    :param res: index part
    :type res: Tuple
    :return: Full lookup value
    :rtype: Tuple[str, str, str, str]
    """
    out = list(sq)
    for n in res.names:
        idx = INDEX_COLS.index(n)
        out[idx] = res.get_level_values(n)[0]
    return tuple(out)