Example #1
0
    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range("2012-01-01", periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {
                "a": [dv[0], pd.NaT, dv[3]],
                "b": [dv[1], dv[2], pd.NaT]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame(
            {
                "a": [dv[0], dv[0], dv[3]],
                "b": [dv[1], dv[2], dv[0]]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)
Example #2
0
    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {
                "a": [td[0], pd.NaT, td[3]],
                "b": [td[1], td[2], pd.NaT]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame(
            {
                "a": [td[0], td[1], td[3]],
                "b": [td[1], td[2], td[1]]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)
Example #3
0
    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [
            Period('2012-01'),
            Period('2012-02'),
            Period('2012-03'),
            Period('2012-04')
        ]
        data = Series(periods)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame(
            {
                'a': [periods[0], None, periods[3]],
                'b': [periods[1], periods[2], None]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame(
            {
                'a': [periods[0], periods[1], periods[3]],
                'b': [periods[1], periods[2], periods[1]]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)
Example #4
0
    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [
            Period("2012-01"),
            Period("2012-02"),
            Period("2012-03"),
            Period("2012-04"),
        ]
        data = Series(periods)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {
                "a": [periods[0], None, periods[3]],
                "b": [periods[1], periods[2], None]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame(
            {
                "a": [periods[0], periods[1], periods[3]],
                "b": [periods[1], periods[2], periods[1]],
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)
Example #5
0
    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range('2012-01-01', periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame(
            {
                'a': [dv[0], pd.NaT, dv[3]],
                'b': [dv[1], dv[2], pd.NaT]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame(
            {
                'a': [dv[0], dv[0], dv[3]],
                'b': [dv[1], dv[2], dv[0]]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)
Example #6
0
    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame(
            {
                'a': [td[0], pd.NaT, td[3]],
                'b': [td[1], td[2], pd.NaT]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame(
            {
                'a': [td[0], td[1], td[3]],
                'b': [td[1], td[2], td[1]]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)
Example #7
0
    def test_unstack(self):
        from numpy import nan
        from pandas.util.testing import assert_frame_equal

        index = MultiIndex(levels=[['bar', 'foo'], ['one', 'three', 'two']],
                           labels=[[1, 1, 0, 0], [0, 1, 0, 2]])

        s = Series(np.arange(4.), index=index)
        unstacked = s.unstack()

        expected = DataFrame([[2., nan, 3.], [0., 1., nan]],
                             index=['bar', 'foo'],
                             columns=['one', 'three', 'two'])

        assert_frame_equal(unstacked, expected)

        unstacked = s.unstack(level=0)
        assert_frame_equal(unstacked, expected.T)

        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           labels=[[0, 0, 0, 0, 0, 0],
                                   [0, 1, 2, 0, 1, 2],
                                   [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        exp_index = MultiIndex(levels=[['one', 'two', 'three'], [0, 1]],
                               labels=[[0, 1, 2, 0, 1, 2],
                                       [0, 1, 0, 1, 0, 1]])
        expected = DataFrame({'bar' : s.values}, index=exp_index).sortlevel(0)
        unstacked = s.unstack(0)
        assert_frame_equal(unstacked, expected)
Example #8
0
def practice_five():
    data = Series(np.randomrandn(10),
                  index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                         [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
    data.index
    data['b']
    data['b':'c']
    data.ix[['b', 'd']]
    data[:, 2]
    data.unstack()
    data.unstack().stack()

    # 重排分级顺序
    frame = DataFrame(np.arange(12).reshape((4, 3)),
                      index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                      columns=[['O', 'O', 'C'], ['G', 'R', 'G']])
    frame.index.names = ['key1', 'key2']
    frame.columns.names = ['state', 'color']
    frame.swaplevel('key1', 'key2')
    frame.sortlevel(1)
    frame.swaplevel(0, 1).sortlevel(0)

    # 根据级别汇总统计
    frame.sum(level='key2')
    frame.sum(level='color', axis=1)

    pass
Example #9
0
    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({
            'a': [1, -1, 5],
            'b': [2, 4, -1]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({
            'a': [1, 0.5, 5],
            'b': [2, 4, 0.5]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.float)
        assert_frame_equal(result, expected)
Example #10
0
    def test_unstack_fill_frame_categorical(self):

        # Test unstacking with categorical
        data = Series(["a", "b", "c", "a"], dtype="category")
        data.index = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                                ("y", "b"), ("z", "a")])

        # By default missing values will be NaN
        result = data.unstack()
        expected = DataFrame(
            {
                "a": pd.Categorical(list("axa"), categories=list("abc")),
                "b": pd.Categorical(list("bcx"), categories=list("abc")),
            },
            index=list("xyz"),
        )
        tm.assert_frame_equal(result, expected)

        # Fill with non-category results in a ValueError
        msg = r"'fill_value=d' is not present in"
        with pytest.raises(ValueError, match=msg):
            data.unstack(fill_value="d")

        # Fill with category value replaces missing values as expected
        result = data.unstack(fill_value="c")
        expected = DataFrame(
            {
                "a": pd.Categorical(list("aca"), categories=list("abc")),
                "b": pd.Categorical(list("bcc"), categories=list("abc")),
            },
            index=list("xyz"),
        )
        tm.assert_frame_equal(result, expected)
Example #11
0
    def test_unstack(self):

        index = MultiIndex(
            levels=[["bar", "foo"], ["one", "three", "two"]],
            codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
        )

        s = Series(np.arange(4.0), index=index)
        unstacked = s.unstack()

        expected = DataFrame(
            [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
            index=["bar", "foo"],
            columns=["one", "three", "two"],
        )

        tm.assert_frame_equal(unstacked, expected)

        unstacked = s.unstack(level=0)
        tm.assert_frame_equal(unstacked, expected.T)

        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        s = Series(np.random.randn(6), index=index)
        exp_index = MultiIndex(
            levels=[["one", "two", "three"], [0, 1]],
            codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
        unstacked = s.unstack(0).sort_index()
        tm.assert_frame_equal(unstacked, expected)

        # GH5873
        idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
        ts = pd.Series([1, 2], index=idx)
        left = ts.unstack()
        right = DataFrame(
            [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
        )
        tm.assert_frame_equal(left, right)

        idx = pd.MultiIndex.from_arrays(
            [
                ["cat", "cat", "cat", "dog", "dog"],
                ["a", "a", "b", "a", "b"],
                [1, 2, 1, 1, np.nan],
            ]
        )
        ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
        right = DataFrame(
            [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
            columns=["cat", "dog"],
        )
        tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
        right.index = pd.MultiIndex.from_tuples(tpls)
        tm.assert_frame_equal(ts.unstack(level=0), right)
def test_unstack_preserves_object():
    mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])

    ser = Series(np.arange(4.0), index=mi, dtype=object)

    res1 = ser.unstack()
    assert (res1.dtypes == object).all()

    res2 = ser.unstack(level=0)
    assert (res2.dtypes == object).all()
Example #13
0
    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({
            'a': [1, -1, 5],
            'b': [2, 4, -1]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({
            'a': [1, 0.5, 5],
            'b': [2, 4, 0.5]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.float)
        assert_frame_equal(result, expected)

        # GH #13971: fill_value when unstacking multiple levels:
        df = DataFrame({
            'x': ['a', 'a', 'b'],
            'y': ['j', 'k', 'j'],
            'z': [0, 1, 2],
            'w': [0, 1, 2]
        }).set_index(['x', 'y', 'z'])
        unstacked = df.unstack(['x', 'y'], fill_value=0)
        key = ('w', 'b', 'j')
        expected = unstacked[key]
        result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
        assert_series_equal(result, expected)

        stacked = unstacked.stack(['x', 'y'])
        stacked.index = stacked.index.reorder_levels(df.index.names)
        # Workaround for GH #17886 (unnecessarily casts to float):
        stacked = stacked.astype(np.int64)
        result = stacked.loc[df.index]
        assert_frame_equal(result, df)

        # From a series
        s = df['w']
        result = s.unstack(['x', 'y'], fill_value=0)
        expected = unstacked['w']
        assert_frame_equal(result, expected)
Example #14
0
    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({
            "a": [1, -1, 5],
            "b": [2, 4, -1]
        },
                             index=["x", "y", "z"],
                             dtype=np.int16)
        tm.assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({
            "a": [1, 0.5, 5],
            "b": [2, 4, 0.5]
        },
                             index=["x", "y", "z"],
                             dtype=np.float)
        tm.assert_frame_equal(result, expected)

        # GH #13971: fill_value when unstacking multiple levels:
        df = DataFrame({
            "x": ["a", "a", "b"],
            "y": ["j", "k", "j"],
            "z": [0, 1, 2],
            "w": [0, 1, 2]
        }).set_index(["x", "y", "z"])
        unstacked = df.unstack(["x", "y"], fill_value=0)
        key = ("w", "b", "j")
        expected = unstacked[key]
        result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
        tm.assert_series_equal(result, expected)

        stacked = unstacked.stack(["x", "y"])
        stacked.index = stacked.index.reorder_levels(df.index.names)
        # Workaround for GH #17886 (unnecessarily casts to float):
        stacked = stacked.astype(np.int64)
        result = stacked.loc[df.index]
        tm.assert_frame_equal(result, df)

        # From a series
        s = df["w"]
        result = s.unstack(["x", "y"], fill_value=0)
        expected = unstacked["w"]
        tm.assert_frame_equal(result, expected)
Example #15
0
    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame({"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame({"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)
Example #16
0
    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range("2012-01-01", periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame({"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame({"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)
Example #17
0
def normalize_dissimilarity(s: pd.Series) -> pd.Series:
    """Divides by distance from Zero reward, an upper bound on the distance."""
    df = s.unstack(level=["source_reward_type", "source_reward_path"])
    zero_col_name = (serialize.ZERO_REWARD, "dummy")
    zero_dissimilarity = df.pop(zero_col_name)
    df = df.apply(lambda x: x / zero_dissimilarity)
    return df.unstack(level=df.index.names)
Example #18
0
def create_source(counts: pd.Series) -> ColumnDataSource:
    # Create source dataframe with total and cumulative counts.
    data = counts.unstack()
    data["total"] = data.sum(axis=1)
    data["cumulative"] = data["total"].cumsum()
    data["x_range"] = parse_x_range(counts)
    return ColumnDataSource(data=data)
Example #19
0
    def _compute_correlations(ratings: Series, min_ratings: int) -> Series:
        """
        Computes the correlations between every user, based on the items they have both rated.

        :param ratings: A series indexing the ratings by user_ids and item_ids
                        (it is recommended that the ratings be normalized at this step).
        :param min_ratings: The minimum number of items rated by both users to take into account the correlation
                            between them.
        :return: A series indexing the correlation between a user and other users by user_ids and (neighbors') user_ids.
        """

        ratings_matrix = ratings.unstack(level='user_id')
        correlation_matrix = ratings_matrix.corr(min_periods=min_ratings)

        def _compute_user_correlations(user_id: UserId):
            user_correlation = correlation_matrix[user_id].dropna(
            ).sort_values(ascending=False)
            user_correlation = pd.concat([user_correlation], keys=[user_id])
            user_correlation.index.names = ['user_id', 'neighbor_id']
            return user_correlation

        users_neighbors: List[Series] = []
        pbar = tqdm(ratings.index.get_level_values('user_id').unique(),
                    desc='Computing correlation between every user',
                    unit=' users',
                    file=sys.stdout)
        for user_id in pbar:
            users_neighbors.append(_compute_user_correlations(user_id))

        return pd.concat(users_neighbors)
Example #20
0
    def test_reindex_datetimelike_to_object(self, dtype):
        # GH#39755 dont cast dt64/td64 to ints
        mi = MultiIndex.from_product([list("ABCDE"), range(2)])

        dti = date_range("2016-01-01", periods=10)
        fv = np.timedelta64("NaT", "ns")
        if dtype == "m8[ns]":
            dti = dti - dti[0]
            fv = np.datetime64("NaT", "ns")

        ser = Series(dti, index=mi)
        ser[::3] = pd.NaT

        df = ser.unstack()

        index = df.index.append(Index([1]))
        columns = df.columns.append(Index(["foo"]))

        res = df.reindex(index=index, columns=columns, fill_value=fv)

        expected = DataFrame(
            {
                0: df[0].tolist() + [fv],
                1: df[1].tolist() + [fv],
                "foo": np.array(["NaT"] * 6, dtype=fv.dtype),
            },
            index=index,
        )
        assert (res.dtypes[[0, 1]] == object).all()
        assert res.iloc[0, 0] is pd.NaT
        assert res.iloc[-1, 0] is fv
        assert res.iloc[-1, 1] is fv
        tm.assert_frame_equal(res, expected)
Example #21
0
    def industry_w(self, index_weight: pd.Series,
                   industry_exposure: pd.Series) -> pd.Series:
        """
        生成行业权重
        如果某个行业权重为零则舍弃掉
        """
        indW = index_weight.unstack()
        indW = indW.div(indW.sum(axis=1), axis=0).stack()
        data_ = pd.concat([indW, industry_exposure], axis=1).dropna()
        # industry weight
        ind_weight = data_.groupby(
            [KN.TRADE_DATE.value, SN.INDUSTRY_FLAG.value]).sum()
        index_ = industry_exposure.index.get_level_values(
            KN.TRADE_DATE.value).drop_duplicates()
        ind_weight_new = ind_weight.unstack().reindex(index_).fillna(
            method='ffill').stack(dropna=False)
        ind_weight_new.name = SN.INDUSTRY_WEIGHT.value
        # fill weight and industry
        res_ = pd.merge(ind_weight_new.reset_index(),
                        industry_exposure.reset_index(),
                        on=[KN.TRADE_DATE.value, SN.INDUSTRY_FLAG.value],
                        how='right')
        res_ = res_.set_index([KN.TRADE_DATE.value,
                               KN.STOCK_ID.value]).sort_index()

        # 改名字 TODO
        return res_[index_weight.columns]
def index_reformat(series: pd.Series, preserve_order: bool) -> pd.DataFrame:
    """Helper to reformat labels for ease of interpretability."""
    series = series.copy()
    series = rewrite_index(series)
    series.index = remove_constant_levels(series.index)
    series.index.names = [
        LEVEL_NAMES.get(name, name) for name in series.index.names
    ]
    series = series.rename(index=pretty_rewrite)

    # Preserve order of inputs
    df = series.unstack("Target")
    if preserve_order:
        df = df.reindex(
            columns=series.index.get_level_values("Target").unique())
        for level in series.index.names:
            kwargs = {}
            if isinstance(df.index, pd.MultiIndex):
                kwargs = dict(level=level)
            if level != "Target":
                df = df.reindex(
                    index=series.index.get_level_values(level).unique(),
                    **kwargs)
    else:
        df = df.sort_index()
    return df
Example #23
0
    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=np.float)
        assert_frame_equal(result, expected)
Example #24
0
    def test_unstack_multiple_no_empty_columns(self):
        index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0),
                                        (1, 'baz', 1), (1, 'qux', 1)])

        s = Series(np.random.randn(4), index=index)

        unstacked = s.unstack([1, 2])
        expected = unstacked.dropna(axis=1, how='all')
        assert_frame_equal(unstacked, expected)
Example #25
0
def calculate_prob_of_features(N: Series) -> Series:
    """
    The function calculates the probability of a specific set of features in the data.
    :param N: number of checks per day and per vector of features.
    :return: series of features and the probability of each feature in the data.
    """
    nominator = N.unstack().fillna(0).sum()
    denominator = sum(N)
    return nominator / denominator
Example #26
0
    def test_unstack_multiple_no_empty_columns(self):
        index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0),
                                        (1, 'baz', 1), (1, 'qux', 1)])

        s = Series(np.random.randn(4), index=index)

        unstacked = s.unstack([1, 2])
        expected = unstacked.dropna(axis=1, how='all')
        assert_frame_equal(unstacked, expected)
Example #27
0
    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({'a': [1, -1, 5], 'b': [2, 4, -1]},
                             index=['x', 'y', 'z'], dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({'a': [1, 0.5, 5], 'b': [2, 4, 0.5]},
                             index=['x', 'y', 'z'], dtype=np.float)
        assert_frame_equal(result, expected)

        # GH #13971: fill_value when unstacking multiple levels:
        df = DataFrame({'x': ['a', 'a', 'b'],
                        'y': ['j', 'k', 'j'],
                        'z': [0, 1, 2],
                        'w': [0, 1, 2]}).set_index(['x', 'y', 'z'])
        unstacked = df.unstack(['x', 'y'], fill_value=0)
        key = ('w', 'b', 'j')
        expected = unstacked[key]
        result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
        assert_series_equal(result, expected)

        stacked = unstacked.stack(['x', 'y'])
        stacked.index = stacked.index.reorder_levels(df.index.names)
        # Workaround for GH #17886 (unnecessarily casts to float):
        stacked = stacked.astype(np.int64)
        result = stacked.loc[df.index]
        assert_frame_equal(result, df)

        # From a series
        s = df['w']
        result = s.unstack(['x', 'y'], fill_value=0)
        expected = unstacked['w']
        assert_frame_equal(result, expected)
Example #28
0
    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range('2012-01-01', periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [dv[0], pd.NaT, dv[3]],
                              'b': [dv[1], dv[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame({'a': [dv[0], dv[0], dv[3]],
                              'b': [dv[1], dv[2], dv[0]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)
Example #29
0
    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [Period("2012-01"), Period("2012-02"), Period("2012-03"), Period("2012-04")]
        data = Series(periods)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]}, index=["x", "y", "z"]
        )
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame(
            {"a": [periods[0], periods[1], periods[3]], "b": [periods[1], periods[2], periods[1]]},
            index=["x", "y", "z"],
        )
        assert_frame_equal(result, expected)
Example #30
0
    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [td[0], pd.NaT, td[3]],
                              'b': [td[1], td[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame({'a': [td[0], td[1], td[3]],
                              'b': [td[1], td[2], td[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)
Example #31
0
    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
                   Period('2012-04')]
        data = Series(periods)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [periods[0], None, periods[3]],
                              'b': [periods[1], periods[2], None]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame({'a': [periods[0], periods[1], periods[3]],
                              'b': [periods[1], periods[2], periods[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)
Example #32
0
def test_unstack_mixed_type_name_in_multiindex(unstack_idx, expected_values,
                                               expected_index,
                                               expected_columns):
    # GH 19966
    idx = MultiIndex.from_product([["a", "b"], [1, 2], [3, 4]],
                                  names=[("A", "a"), "B", "C"])
    ser = Series(1, index=idx)
    result = ser.unstack(unstack_idx)

    expected = DataFrame(expected_values,
                         columns=expected_columns,
                         index=expected_index)
    tm.assert_frame_equal(result, expected)
Example #33
0
def form_basic_data(time, money, type):
    index = pd.MultiIndex.from_arrays([time, type], names=['time', 'type'])
    df = Series(money, index=index)
    grouped = df.groupby(level=['time', 'type'])
    df = grouped.sum()
    df = df.unstack()
    df.fillna(0, inplace=True)
    time = df.index
    time = pd.to_datetime(time)
    df = df.reindex(index=time)
    df = df.resample('M').sum()
    df.loc['Col_sum'] = df.apply(lambda x: x.sum())
    return df
Example #34
0
def test_unstack_tuplename_in_multiindex():
    # GH 19966
    idx = pd.MultiIndex.from_product(
        [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
    )
    ser = Series(1, index=idx)
    result = ser.unstack(("A", "a"))

    expected = DataFrame(
        [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
        columns=pd.MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
        index=pd.Index([1, 2, 3], name=("B", "b")),
    )
    tm.assert_frame_equal(result, expected)
Example #35
0
def test_unstack_fill_frame_object():
    # GH12815 Test unstacking with object.
    data = Series(["a", "b", "c", "a"], dtype="object")
    data.index = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"),
                                            ("z", "a")])

    # By default missing values will be NaN
    result = data.unstack()
    expected = DataFrame({
        "a": ["a", np.nan, "a"],
        "b": ["b", "c", np.nan]
    },
                         index=list("xyz"))
    tm.assert_frame_equal(result, expected)

    # Fill with any value replaces missing values as expected
    result = data.unstack(fill_value="d")
    expected = DataFrame({
        "a": ["a", "d", "a"],
        "b": ["b", "c", "d"]
    },
                         index=list("xyz"))
    tm.assert_frame_equal(result, expected)
    def factStability(self, data: pd.Series):
        """
        因子暴露稳定性,spearman相关性
        Parameters
        ----------
        data :

        Returns
        -------

        """
        fact_df = data.unstack()
        self.Res["Stability"] = fact_df.corrwith(
            fact_df.shift(1), axis=1, drop=True,
            method='spearman').sort_index()
Example #37
0
def test_unstack_multi_index_categorical_values():

    mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"])
    ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")

    result = ser.unstack()

    dti = ser.index.levels[0]
    c = pd.Categorical(["foo"] * len(dti))
    expected = DataFrame(
        {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
        columns=pd.Index(list("ABCD"), name="minor"),
        index=dti.rename("major"),
    )
    tm.assert_frame_equal(result, expected)
Example #38
0
def get_y_weights(y: pd.Series, normalize=False):
    """
    For each series, compute the denominator in the MSSE loss function, i.e. the
    day-to-day variations squared, averaged by number of training observations.
    The weights can be normalized so that they add up to 1.
    This is provided to the lgb.Dataset for computing loss function and evaluation metric
    """
    scales = (y.unstack(level='date').diff(axis=1)**2).mean(axis=1)
    scales = scales.replace(0, pd.NA)
    weights = 1 / scales
    if normalize:
        weights = weights.divide(weights.sum())
    weights = y.merge(weights.to_frame('weight'),
                      left_index=True,
                      right_index=True)['weight']
    return weights
Example #39
0
def get_formato_series(counts: pd.Series,
                       colnames: Dict[str, str],
                       zero_dates=True):
    """
    Convierte groupby a formato tidy (columnas son estados e indice es la fecha).

    Input:
    - groupby_series:
        DataFrame en formato groupby agrupada for una columna que corresponde a
        entidades federativas y otra columna que corresponde a una fecha.
    - entidades:
        diccionario de clave_de_entidad => nombre_de_entidad.

    Output:
    - pd.DataFrame
        DataFrame en formato tidy, con los nombres de los estados como columnas
        (la primer columna es el total nacional) y con la fecha como indice.

    """
    df = counts.unstack(level=0)
    df.index = pd.to_datetime(df.index)
    cols = df.columns
    cols.name = None

    # We make sure that all 32 states are present (even with zero counts)
    missing = list(set(range(1, 33)).difference(cols))
    if missing:
        cols = cols.tolist() + missing
        # no need to sort because we use alpahbetically below
        df = df.reindex(columns=cols)

    df = df.rename(columns=colnames).fillna(0).astype('int')

    # Formato de agregado nacional
    cols = ['Nacional'] + sorted(df.columns)
    df.loc[:, 'Nacional'] = df.sum(axis=1)
    # Reordenar columnas para que los casos nacionales queden primero
    df = df[cols]

    if zero_dates:
        # Llenamos ceros para fechas sin informacion
        idx = pd.date_range(df.index.min(), df.index.max())
        df = df.reindex(idx, fill_value=0)

    df.index.name = 'Fecha'

    return df
Example #40
0
 def get_all_player_game_scores(self):
     """给出每个玩家在每个game上的得分"""
     _game_score_dict = self._get_game_score_distribution()
     _game_player_scores = {}
     for _, l in self.log.iterrows():
         info = tuple([l.user, l.seasonId, l.sessionId, l.gameId])
         try:
             score = self._get_player_game_score(*info[1:],
                                                 _game_score_dict,
                                                 l.gameResult)
             _game_player_scores[info] = score
         except:
             pass
     player_scores = Series(_game_player_scores, name='game_score')
     player_scores.index.names = ['user', 'seasonId', 'sessionId', 'gameId']
     player_scores = player_scores.unstack(level='user')
     return player_scores
obj6 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])

#
# print( obj6.reindex(range(6),method='bfill'))

from numpy import nan as NA

data = Series([1, NA, 2])
# print(data.dropna())

# data2 = DataFrame([[1., 6.5, 3], [1., NA, NA], [NA, NA, NA]
#                   ])
# data2[4] = NA
# print(data2)
# print(data2.dropna(axis=1, how='all'))
#
# data2.fillna(0)
# print(data2.fillna(0, inplace=True))
# print(data2)

import numpy as np

data3 = Series(np.random.randn(10),
               index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                      [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])

print( data3.unstack().stack() )

# print ( data3['b':'c'])
Example #42
0
File: pd.py Project: wabu/zeroflo
 def process(self, data : pd.Series, tag):
     yield from data.unstack() >> tag >> self.out
b    0.792968
c   -0.317989
dtype: float64
'''
ser[2]
'''
a   -0.178000
b   -0.243812
c   -0.451486
dtype: float64
'''

ser[:,'a'] # return all from primary index, but use secondary index = 'a'
ser[1,'a'] # returns value at index 1 (primary), 'a' (secondary)

df = ser.unstack() # converts hierarchical index series into dataframe with primary index as rows, and secondary index as columns

#combine_first() method
Series(np.where(pd.isnull(ser1),ser2,ser1), index = ['x','y','z','q','r','s']) #Series meets numpy where meets panda's isnull() method
# the above statement sates where ser1 values are NaN, use ser2 values, else use ser1 values
ser1.combine_first(ser2) #combine_first() does the same

df1.combine_first(df2) # does the same with dataframes. 

ser1.replace(1,10) # replace '1' in your series with '10'
ser1.replace(1,np.nan) # replace '1' in your series with NaN
ser1.replace([1,4],[100,400]) # replace value (1 and 4) with (100 and 400)
ser1.replace({4: 'clown' , 2: 'owl'}) # replace 4 with clown, and 2 with owl

###############################################################
###															###
Example #44
0
# -*- coding: utf-8 -*- 

import numpy as np
from pandas import Series, DataFrame, MultiIndex

print 'Series的层次索引'
data = Series(np.random.randn(10),
              index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                       [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
print data
print data.index
print data.b
print data['b':'c']
print data[:2]
print data.unstack()
print data.unstack().stack()
print

print 'DataFrame的层次索引'
frame = DataFrame(np.arange(12).reshape((4, 3)),
                  index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])
print frame
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
print frame
print frame.ix['a', 1]
print frame.ix['a', 2]['Colorado']
print frame.ix['a', 2]['Ohio']['Red']
print
                       [1,2,3,1,2,3,1,2,2,3]])


print(data)
print('\n')
print(data.index)
print('\n')
print(data['b'])
print('\n')
print(data['b':'c'])
print('\n')
print(data.ix[['b','d']])
print('\n')
print(data[:,2])
print('\n')
print(data.unstack())
print('\n')
print(data.unstack().stack())
print('\n')

###############################################################

#page 154

frame = DataFrame(np.arange(12).reshape((4,3)),
                  index = [['a','a','b','b'],[1,2,1,2]],
                  columns = [['Ohio', 'Ohio', 'Colorado'],['Green','Red','Green']]
                  )

print(frame)
print('\n')
Example #46
0
data = Series(np.random.randn(10),
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
import numpy as np
data = Series(np.random.randn(10),
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
data
data.index
data[b]
data['b']
data['b': 'c']
data.ix[['b', 'd']]
# selection in an inner level
data[:, 2]
data.unstack()
data.unstack().stack()
from pandas import DataFrame
frame = DataFrame(np.arange(12).reshape((4, 3)),
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
columns=[['Ohio', 'Ohio', 'Colorado'],
['Green', 'Red', 'Green']])
frame
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame
# Reordering and Sorting Levels
frame.swaplevel('key1', 'key2')
frame.sortlevel(1)
# Summary Ststistics by Level
frame.sum(level='key2')
def main():
    """
    Handling of not applicable values
    """

    data = Series(np.random.randn(10),
                  index=[list('aaabbbccdd'), map(int, list('1231231223'))])
    print data
    print data.index
    print type(data.index)

    print data['b']
    print data['b':'c']
    print data.ix[['b', 'd']]
    print data[:, 2]

    print data.unstack()
    print data.unstack().stack()

    print '',''
    frame = DataFrame(np.arange(12).reshape((4, 3)),
                      index=[['a','a','b','b'], [1,2,1,2]],
                      columns=[['Ohio', 'Ohio', 'Colorado'],
                               ['Green', 'Red', 'Green']])
    print frame
    frame.index.names = ['key1', 'key2']
    frame.columns.names = ['state', 'color']
    print frame

    print frame['Ohio']
    print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']],
                                 names=['state', 'color'])

    # change hierarchy and sort
    print '',''
    print frame.swaplevel('key1', 'key2')
    print '',''
    print frame.sortlevel(1) # sorted by key2
    print '',''
    print frame.swaplevel(0, 1).sortlevel(0) # swap and sorted by key 2

    # summary statistics for each hierarchy
    print '',''
    print frame.sum(level='key2')
    print '',''
    print frame.sum(level='color', axis=1)
    print '',''

    # Using column of the DataFrame for index
    print '','-------------------------'
    frame = DataFrame({
        'a': range(7),
        'b': range(7, 0, -1),
        'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],
        'd': [0, 1, 2, 0, 1, 2, 3],
    })
    print frame
    frame2 = frame.set_index(['c', 'd'])
    print '',''
    print frame2
    print '',''
    print frame.set_index(['c', 'd'], drop=False)
    print '',''
    print frame2.reset_index()
Example #48
0
    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({'a': [1, -1, 5], 'b': [2, 4, -1]},
                             index=['x', 'y', 'z'], dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({'a': [1, 0.5, 5], 'b': [2, 4, 0.5]},
                             index=['x', 'y', 'z'], dtype=np.float)
        assert_frame_equal(result, expected)

        # From a dataframe
        rows = [[1, 2], [3, 4], [5, 6], [7, 8]]
        df = DataFrame(rows, columns=list('AB'), dtype=np.int32)
        df.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = df.unstack(fill_value=-1)

        rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]]
        expected = DataFrame(rows, index=list('xyz'), dtype=np.int32)
        expected.columns = MultiIndex.from_tuples(
            [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
        assert_frame_equal(result, expected)

        # From a mixed type dataframe
        df['A'] = df['A'].astype(np.int16)
        df['B'] = df['B'].astype(np.float64)

        result = df.unstack(fill_value=-1)
        expected['A'] = expected['A'].astype(np.int16)
        expected['B'] = expected['B'].astype(np.float64)
        assert_frame_equal(result, expected)

        # From a dataframe with incorrect data type for fill_value
        result = df.unstack(fill_value=0.5)

        rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]]
        expected = DataFrame(rows, index=list('xyz'), dtype=np.float)
        expected.columns = MultiIndex.from_tuples(
            [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
        assert_frame_equal(result, expected)

        # Test unstacking with date times
        dv = pd.date_range('2012-01-01', periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [dv[0], pd.NaT, dv[3]],
                              'b': [dv[1], dv[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame({'a': [dv[0], dv[0], dv[3]],
                              'b': [dv[1], dv[2], dv[0]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [td[0], pd.NaT, td[3]],
                              'b': [td[1], td[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame({'a': [td[0], td[1], td[3]],
                              'b': [td[1], td[2], td[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        # Test unstacking with period
        periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
                   Period('2012-04')]
        data = Series(periods)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [periods[0], None, periods[3]],
                              'b': [periods[1], periods[2], None]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame({'a': [periods[0], periods[1], periods[3]],
                              'b': [periods[1], periods[2], periods[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        # Test unstacking with categorical
        data = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
        data.index = pd.MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        # By default missing values will be NaN
        result = data.unstack()
        expected = DataFrame({'a': pd.Categorical(list('axa'),
                                                  categories=list('abc')),
                              'b': pd.Categorical(list('bcx'),
                                                  categories=list('abc'))},
                             index=list('xyz'))
        assert_frame_equal(result, expected)

        # Fill with non-category results in NaN entries similar to above
        result = data.unstack(fill_value='d')
        assert_frame_equal(result, expected)

        # Fill with category value replaces missing values as expected
        result = data.unstack(fill_value='c')
        expected = DataFrame({'a': pd.Categorical(list('aca'),
                                                  categories=list('abc')),
                              'b': pd.Categorical(list('bcc'),
                                                  categories=list('abc'))},
                             index=list('xyz'))
        assert_frame_equal(result, expected)
print df
#sorts by a and then by the assoc b vals
print df.sort_index(by=['a','b'])
print

######

s = Series([8, 2, 5, 9, 4, 7, 5, 3], index=[['a','a','b','b','c','c','d','d'], ['x','y','x','y','x','y','x','y']])
print s
print s['b']
#can slice
print s[1:2]
#can sel particular items
print s.ix[['a','c']]

s2 = s.unstack()
print s2
#can also restack to put back in original form
print s2.stack()
print

#####
d = np.arange(12).reshape((4,3))
df = DataFrame(d, index=[['a','a','b','b'], [1, 2, 1, 2]], columns=[['unc','unc','duke'], ['x','y','x']])
print df

#this sums the outermost thing
print df.sum(level=0)


from numpy.random import randn

ser = Series(randn(6), index = [[1,1,1,2,2,2],['a','b','c','a','b','c']])
ser

ser.index # get number of index levels and labels

# outer indexing
ser[2]

# internal indexing
ser[:,'a']


# creating dataframe from multi-index level Series
dframe = ser.unstack()
dframe

# construct dataframe with multiple index levels
dframe2 = DataFrame(np.arange(16).reshape(4,4),
                    index = [['a','a','b','b'],[1,2,1,2]],
                    columns = [['NY','NY','LA','SF'],['cold','hot','hot','cold']])

dframe2

# naming indexes and columns
dframe2.index.names = ['INDEX_1', 'INDEX_2']
dframe2.columns.names = ['Cities','Temp']
dframe2

# Interchange index level orders