Python Series.unstack Examples, pandas.Series.unstack Python Examples

Example #1

0

Show file

    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range("2012-01-01", periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {
                "a": [dv[0], pd.NaT, dv[3]],
                "b": [dv[1], dv[2], pd.NaT]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame(
            {
                "a": [dv[0], dv[0], dv[3]],
                "b": [dv[1], dv[2], dv[0]]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

Example #2

0

Show file

    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {
                "a": [td[0], pd.NaT, td[3]],
                "b": [td[1], td[2], pd.NaT]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame(
            {
                "a": [td[0], td[1], td[3]],
                "b": [td[1], td[2], td[1]]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

Example #3

0

Show file

    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [
            Period('2012-01'),
            Period('2012-02'),
            Period('2012-03'),
            Period('2012-04')
        ]
        data = Series(periods)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame(
            {
                'a': [periods[0], None, periods[3]],
                'b': [periods[1], periods[2], None]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame(
            {
                'a': [periods[0], periods[1], periods[3]],
                'b': [periods[1], periods[2], periods[1]]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

Example #4

0

Show file

    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [
            Period("2012-01"),
            Period("2012-02"),
            Period("2012-03"),
            Period("2012-04"),
        ]
        data = Series(periods)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {
                "a": [periods[0], None, periods[3]],
                "b": [periods[1], periods[2], None]
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame(
            {
                "a": [periods[0], periods[1], periods[3]],
                "b": [periods[1], periods[2], periods[1]],
            },
            index=["x", "y", "z"],
        )
        tm.assert_frame_equal(result, expected)

Example #5

0

Show file

    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range('2012-01-01', periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame(
            {
                'a': [dv[0], pd.NaT, dv[3]],
                'b': [dv[1], dv[2], pd.NaT]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame(
            {
                'a': [dv[0], dv[0], dv[3]],
                'b': [dv[1], dv[2], dv[0]]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

Example #6

0

Show file

    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame(
            {
                'a': [td[0], pd.NaT, td[3]],
                'b': [td[1], td[2], pd.NaT]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame(
            {
                'a': [td[0], td[1], td[3]],
                'b': [td[1], td[2], td[1]]
            },
            index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

Example #7

0

Show file

File: test_series.py Project: benracine/pandas

    def test_unstack(self):
        from numpy import nan
        from pandas.util.testing import assert_frame_equal

        index = MultiIndex(levels=[['bar', 'foo'], ['one', 'three', 'two']],
                           labels=[[1, 1, 0, 0], [0, 1, 0, 2]])

        s = Series(np.arange(4.), index=index)
        unstacked = s.unstack()

        expected = DataFrame([[2., nan, 3.], [0., 1., nan]],
                             index=['bar', 'foo'],
                             columns=['one', 'three', 'two'])

        assert_frame_equal(unstacked, expected)

        unstacked = s.unstack(level=0)
        assert_frame_equal(unstacked, expected.T)

        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           labels=[[0, 0, 0, 0, 0, 0],
                                   [0, 1, 2, 0, 1, 2],
                                   [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        exp_index = MultiIndex(levels=[['one', 'two', 'three'], [0, 1]],
                               labels=[[0, 1, 2, 0, 1, 2],
                                       [0, 1, 0, 1, 0, 1]])
        expected = DataFrame({'bar' : s.values}, index=exp_index).sortlevel(0)
        unstacked = s.unstack(0)
        assert_frame_equal(unstacked, expected)

Example #8

0

Show file

File: practice_pandas.py Project: YYL99/house

def practice_five():
    data = Series(np.randomrandn(10),
                  index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                         [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
    data.index
    data['b']
    data['b':'c']
    data.ix[['b', 'd']]
    data[:, 2]
    data.unstack()
    data.unstack().stack()

    # 重排分级顺序
    frame = DataFrame(np.arange(12).reshape((4, 3)),
                      index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                      columns=[['O', 'O', 'C'], ['G', 'R', 'G']])
    frame.index.names = ['key1', 'key2']
    frame.columns.names = ['state', 'color']
    frame.swaplevel('key1', 'key2')
    frame.sortlevel(1)
    frame.swaplevel(0, 1).sortlevel(0)

    # 根据级别汇总统计
    frame.sum(level='key2')
    frame.sum(level='color', axis=1)

    pass

Example #9

0

Show file

    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({
            'a': [1, -1, 5],
            'b': [2, 4, -1]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({
            'a': [1, 0.5, 5],
            'b': [2, 4, 0.5]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.float)
        assert_frame_equal(result, expected)

Example #10

0

Show file

File: test_reshape.py Project: botplex/pandas

    def test_unstack_fill_frame_categorical(self):

        # Test unstacking with categorical
        data = Series(["a", "b", "c", "a"], dtype="category")
        data.index = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                                ("y", "b"), ("z", "a")])

        # By default missing values will be NaN
        result = data.unstack()
        expected = DataFrame(
            {
                "a": pd.Categorical(list("axa"), categories=list("abc")),
                "b": pd.Categorical(list("bcx"), categories=list("abc")),
            },
            index=list("xyz"),
        )
        tm.assert_frame_equal(result, expected)

        # Fill with non-category results in a ValueError
        msg = r"'fill_value=d' is not present in"
        with pytest.raises(ValueError, match=msg):
            data.unstack(fill_value="d")

        # Fill with category value replaces missing values as expected
        result = data.unstack(fill_value="c")
        expected = DataFrame(
            {
                "a": pd.Categorical(list("aca"), categories=list("abc")),
                "b": pd.Categorical(list("bcc"), categories=list("abc")),
            },
            index=list("xyz"),
        )
        tm.assert_frame_equal(result, expected)

Example #11

0

Show file

File: test_analytics.py Project: anastasia-si/pandas

    def test_unstack(self):

        index = MultiIndex(
            levels=[["bar", "foo"], ["one", "three", "two"]],
            codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
        )

        s = Series(np.arange(4.0), index=index)
        unstacked = s.unstack()

        expected = DataFrame(
            [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
            index=["bar", "foo"],
            columns=["one", "three", "two"],
        )

        tm.assert_frame_equal(unstacked, expected)

        unstacked = s.unstack(level=0)
        tm.assert_frame_equal(unstacked, expected.T)

        index = MultiIndex(
            levels=[["bar"], ["one", "two", "three"], [0, 1]],
            codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        s = Series(np.random.randn(6), index=index)
        exp_index = MultiIndex(
            levels=[["one", "two", "three"], [0, 1]],
            codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
        )
        expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
        unstacked = s.unstack(0).sort_index()
        tm.assert_frame_equal(unstacked, expected)

        # GH5873
        idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
        ts = pd.Series([1, 2], index=idx)
        left = ts.unstack()
        right = DataFrame(
            [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
        )
        tm.assert_frame_equal(left, right)

        idx = pd.MultiIndex.from_arrays(
            [
                ["cat", "cat", "cat", "dog", "dog"],
                ["a", "a", "b", "a", "b"],
                [1, 2, 1, 1, np.nan],
            ]
        )
        ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
        right = DataFrame(
            [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
            columns=["cat", "dog"],
        )
        tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
        right.index = pd.MultiIndex.from_tuples(tpls)
        tm.assert_frame_equal(ts.unstack(level=0), right)

Example #12

0

Show file

File: test_unstack.py Project: MarceloDL-A/metodos_python

def test_unstack_preserves_object():
    mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])

    ser = Series(np.arange(4.0), index=mi, dtype=object)

    res1 = ser.unstack()
    assert (res1.dtypes == object).all()

    res2 = ser.unstack(level=0)
    assert (res2.dtypes == object).all()

Example #13

0

Show file

    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({
            'a': [1, -1, 5],
            'b': [2, 4, -1]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({
            'a': [1, 0.5, 5],
            'b': [2, 4, 0.5]
        },
                             index=['x', 'y', 'z'],
                             dtype=np.float)
        assert_frame_equal(result, expected)

        # GH #13971: fill_value when unstacking multiple levels:
        df = DataFrame({
            'x': ['a', 'a', 'b'],
            'y': ['j', 'k', 'j'],
            'z': [0, 1, 2],
            'w': [0, 1, 2]
        }).set_index(['x', 'y', 'z'])
        unstacked = df.unstack(['x', 'y'], fill_value=0)
        key = ('w', 'b', 'j')
        expected = unstacked[key]
        result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
        assert_series_equal(result, expected)

        stacked = unstacked.stack(['x', 'y'])
        stacked.index = stacked.index.reorder_levels(df.index.names)
        # Workaround for GH #17886 (unnecessarily casts to float):
        stacked = stacked.astype(np.int64)
        result = stacked.loc[df.index]
        assert_frame_equal(result, df)

        # From a series
        s = df['w']
        result = s.unstack(['x', 'y'], fill_value=0)
        expected = unstacked['w']
        assert_frame_equal(result, expected)

Example #14

0

Show file

    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"),
                                             ("y", "b"), ("z", "a")])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({
            "a": [1, -1, 5],
            "b": [2, 4, -1]
        },
                             index=["x", "y", "z"],
                             dtype=np.int16)
        tm.assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({
            "a": [1, 0.5, 5],
            "b": [2, 4, 0.5]
        },
                             index=["x", "y", "z"],
                             dtype=np.float)
        tm.assert_frame_equal(result, expected)

        # GH #13971: fill_value when unstacking multiple levels:
        df = DataFrame({
            "x": ["a", "a", "b"],
            "y": ["j", "k", "j"],
            "z": [0, 1, 2],
            "w": [0, 1, 2]
        }).set_index(["x", "y", "z"])
        unstacked = df.unstack(["x", "y"], fill_value=0)
        key = ("w", "b", "j")
        expected = unstacked[key]
        result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
        tm.assert_series_equal(result, expected)

        stacked = unstacked.stack(["x", "y"])
        stacked.index = stacked.index.reorder_levels(df.index.names)
        # Workaround for GH #17886 (unnecessarily casts to float):
        stacked = stacked.astype(np.int64)
        result = stacked.loc[df.index]
        tm.assert_frame_equal(result, df)

        # From a series
        s = df["w"]
        result = s.unstack(["x", "y"], fill_value=0)
        expected = unstacked["w"]
        tm.assert_frame_equal(result, expected)

Example #15

0

Show file

File: test_reshape.py Project: ChristopherShort/pandas

    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame({"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame({"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)

Example #16

0

Show file

File: test_reshape.py Project: ChristopherShort/pandas

    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range("2012-01-01", periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame({"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame({"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]}, index=["x", "y", "z"])
        assert_frame_equal(result, expected)

Example #17

0

Show file

def normalize_dissimilarity(s: pd.Series) -> pd.Series:
    """Divides by distance from Zero reward, an upper bound on the distance."""
    df = s.unstack(level=["source_reward_type", "source_reward_path"])
    zero_col_name = (serialize.ZERO_REWARD, "dummy")
    zero_dissimilarity = df.pop(zero_col_name)
    df = df.apply(lambda x: x / zero_dissimilarity)
    return df.unstack(level=df.index.names)

Example #18

0

Show file

def create_source(counts: pd.Series) -> ColumnDataSource:
    # Create source dataframe with total and cumulative counts.
    data = counts.unstack()
    data["total"] = data.sum(axis=1)
    data["cumulative"] = data["total"].cumsum()
    data["x_range"] = parse_x_range(counts)
    return ColumnDataSource(data=data)

Example #19

0

Show file

    def _compute_correlations(ratings: Series, min_ratings: int) -> Series:
        """
        Computes the correlations between every user, based on the items they have both rated.

        :param ratings: A series indexing the ratings by user_ids and item_ids
                        (it is recommended that the ratings be normalized at this step).
        :param min_ratings: The minimum number of items rated by both users to take into account the correlation
                            between them.
        :return: A series indexing the correlation between a user and other users by user_ids and (neighbors') user_ids.
        """

        ratings_matrix = ratings.unstack(level='user_id')
        correlation_matrix = ratings_matrix.corr(min_periods=min_ratings)

        def _compute_user_correlations(user_id: UserId):
            user_correlation = correlation_matrix[user_id].dropna(
            ).sort_values(ascending=False)
            user_correlation = pd.concat([user_correlation], keys=[user_id])
            user_correlation.index.names = ['user_id', 'neighbor_id']
            return user_correlation

        users_neighbors: List[Series] = []
        pbar = tqdm(ratings.index.get_level_values('user_id').unique(),
                    desc='Computing correlation between every user',
                    unit=' users',
                    file=sys.stdout)
        for user_id in pbar:
            users_neighbors.append(_compute_user_correlations(user_id))

        return pd.concat(users_neighbors)

Example #20

0

Show file

File: test_reindex.py Project: ParfaitG/pandas

    def test_reindex_datetimelike_to_object(self, dtype):
        # GH#39755 dont cast dt64/td64 to ints
        mi = MultiIndex.from_product([list("ABCDE"), range(2)])

        dti = date_range("2016-01-01", periods=10)
        fv = np.timedelta64("NaT", "ns")
        if dtype == "m8[ns]":
            dti = dti - dti[0]
            fv = np.datetime64("NaT", "ns")

        ser = Series(dti, index=mi)
        ser[::3] = pd.NaT

        df = ser.unstack()

        index = df.index.append(Index([1]))
        columns = df.columns.append(Index(["foo"]))

        res = df.reindex(index=index, columns=columns, fill_value=fv)

        expected = DataFrame(
            {
                0: df[0].tolist() + [fv],
                1: df[1].tolist() + [fv],
                "foo": np.array(["NaT"] * 6, dtype=fv.dtype),
            },
            index=index,
        )
        assert (res.dtypes[[0, 1]] == object).all()
        assert res.iloc[0, 0] is pd.NaT
        assert res.iloc[-1, 0] is fv
        assert res.iloc[-1, 1] is fv
        tm.assert_frame_equal(res, expected)

Example #21

0

Show file

File: Labelpool.py Project: wesley1001/SecuritySelect

    def industry_w(self, index_weight: pd.Series,
                   industry_exposure: pd.Series) -> pd.Series:
        """
        生成行业权重
        如果某个行业权重为零则舍弃掉
        """
        indW = index_weight.unstack()
        indW = indW.div(indW.sum(axis=1), axis=0).stack()
        data_ = pd.concat([indW, industry_exposure], axis=1).dropna()
        # industry weight
        ind_weight = data_.groupby(
            [KN.TRADE_DATE.value, SN.INDUSTRY_FLAG.value]).sum()
        index_ = industry_exposure.index.get_level_values(
            KN.TRADE_DATE.value).drop_duplicates()
        ind_weight_new = ind_weight.unstack().reindex(index_).fillna(
            method='ffill').stack(dropna=False)
        ind_weight_new.name = SN.INDUSTRY_WEIGHT.value
        # fill weight and industry
        res_ = pd.merge(ind_weight_new.reset_index(),
                        industry_exposure.reset_index(),
                        on=[KN.TRADE_DATE.value, SN.INDUSTRY_FLAG.value],
                        how='right')
        res_ = res_.set_index([KN.TRADE_DATE.value,
                               KN.STOCK_ID.value]).sort_index()

        # 改名字 TODO
        return res_[index_weight.columns]

Example #22

0

Show file

File: transformations.py Project: HumanCompatibleAI/evaluating-rewards

def index_reformat(series: pd.Series, preserve_order: bool) -> pd.DataFrame:
    """Helper to reformat labels for ease of interpretability."""
    series = series.copy()
    series = rewrite_index(series)
    series.index = remove_constant_levels(series.index)
    series.index.names = [
        LEVEL_NAMES.get(name, name) for name in series.index.names
    ]
    series = series.rename(index=pretty_rewrite)

    # Preserve order of inputs
    df = series.unstack("Target")
    if preserve_order:
        df = df.reindex(
            columns=series.index.get_level_values("Target").unique())
        for level in series.index.names:
            kwargs = {}
            if isinstance(df.index, pd.MultiIndex):
                kwargs = dict(level=level)
            if level != "Target":
                df = df.reindex(
                    index=series.index.get_level_values(level).unique(),
                    **kwargs)
    else:
        df = df.sort_index()
    return df

Example #23

0

Show file

File: test_reshape.py Project: ChristopherShort/pandas

    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=np.float)
        assert_frame_equal(result, expected)

Example #24

0

Show file

File: test_multilevel.py Project: afonit/pandas

    def test_unstack_multiple_no_empty_columns(self):
        index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0),
                                        (1, 'baz', 1), (1, 'qux', 1)])

        s = Series(np.random.randn(4), index=index)

        unstacked = s.unstack([1, 2])
        expected = unstacked.dropna(axis=1, how='all')
        assert_frame_equal(unstacked, expected)

Example #25

0

Show file

def calculate_prob_of_features(N: Series) -> Series:
    """
    The function calculates the probability of a specific set of features in the data.
    :param N: number of checks per day and per vector of features.
    :return: series of features and the probability of each feature in the data.
    """
    nominator = N.unstack().fillna(0).sum()
    denominator = sum(N)
    return nominator / denominator

Example #26

0

Show file

File: test_multilevel.py Project: zkluo1/pandas

    def test_unstack_multiple_no_empty_columns(self):
        index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0),
                                        (1, 'baz', 1), (1, 'qux', 1)])

        s = Series(np.random.randn(4), index=index)

        unstacked = s.unstack([1, 2])
        expected = unstacked.dropna(axis=1, how='all')
        assert_frame_equal(unstacked, expected)

Example #27

0

Show file

File: test_reshape.py Project: dmjvictory/pandas

    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({'a': [1, -1, 5], 'b': [2, 4, -1]},
                             index=['x', 'y', 'z'], dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({'a': [1, 0.5, 5], 'b': [2, 4, 0.5]},
                             index=['x', 'y', 'z'], dtype=np.float)
        assert_frame_equal(result, expected)

        # GH #13971: fill_value when unstacking multiple levels:
        df = DataFrame({'x': ['a', 'a', 'b'],
                        'y': ['j', 'k', 'j'],
                        'z': [0, 1, 2],
                        'w': [0, 1, 2]}).set_index(['x', 'y', 'z'])
        unstacked = df.unstack(['x', 'y'], fill_value=0)
        key = ('w', 'b', 'j')
        expected = unstacked[key]
        result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
        assert_series_equal(result, expected)

        stacked = unstacked.stack(['x', 'y'])
        stacked.index = stacked.index.reorder_levels(df.index.names)
        # Workaround for GH #17886 (unnecessarily casts to float):
        stacked = stacked.astype(np.int64)
        result = stacked.loc[df.index]
        assert_frame_equal(result, df)

        # From a series
        s = df['w']
        result = s.unstack(['x', 'y'], fill_value=0)
        expected = unstacked['w']
        assert_frame_equal(result, expected)

Example #28

0

Show file

File: test_reshape.py Project: dmjvictory/pandas

    def test_unstack_fill_frame_datetime(self):

        # Test unstacking with date times
        dv = pd.date_range('2012-01-01', periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [dv[0], pd.NaT, dv[3]],
                              'b': [dv[1], dv[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame({'a': [dv[0], dv[0], dv[3]],
                              'b': [dv[1], dv[2], dv[0]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

Example #29

0

Show file

File: test_reshape.py Project: ChristopherShort/pandas

    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [Period("2012-01"), Period("2012-02"), Period("2012-03"), Period("2012-04")]
        data = Series(periods)
        data.index = MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")])

        result = data.unstack()
        expected = DataFrame(
            {"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]}, index=["x", "y", "z"]
        )
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame(
            {"a": [periods[0], periods[1], periods[3]], "b": [periods[1], periods[2], periods[1]]},
            index=["x", "y", "z"],
        )
        assert_frame_equal(result, expected)

Example #30

0

Show file

File: test_reshape.py Project: dmjvictory/pandas

    def test_unstack_fill_frame_timedelta(self):

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [td[0], pd.NaT, td[3]],
                              'b': [td[1], td[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame({'a': [td[0], td[1], td[3]],
                              'b': [td[1], td[2], td[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

Example #31

0

Show file

File: test_reshape.py Project: dmjvictory/pandas

    def test_unstack_fill_frame_period(self):

        # Test unstacking with period
        periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
                   Period('2012-04')]
        data = Series(periods)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [periods[0], None, periods[3]],
                              'b': [periods[1], periods[2], None]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame({'a': [periods[0], periods[1], periods[3]],
                              'b': [periods[1], periods[2], periods[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

Example #32

0

Show file

File: test_unstack.py Project: 701789262a/arbobotti

def test_unstack_mixed_type_name_in_multiindex(unstack_idx, expected_values,
                                               expected_index,
                                               expected_columns):
    # GH 19966
    idx = MultiIndex.from_product([["a", "b"], [1, 2], [3, 4]],
                                  names=[("A", "a"), "B", "C"])
    ser = Series(1, index=idx)
    result = ser.unstack(unstack_idx)

    expected = DataFrame(expected_values,
                         columns=expected_columns,
                         index=expected_index)
    tm.assert_frame_equal(result, expected)

Example #33

0

Show file

def form_basic_data(time, money, type):
    index = pd.MultiIndex.from_arrays([time, type], names=['time', 'type'])
    df = Series(money, index=index)
    grouped = df.groupby(level=['time', 'type'])
    df = grouped.sum()
    df = df.unstack()
    df.fillna(0, inplace=True)
    time = df.index
    time = pd.to_datetime(time)
    df = df.reindex(index=time)
    df = df.resample('M').sum()
    df.loc['Col_sum'] = df.apply(lambda x: x.sum())
    return df

Example #34

0

Show file

def test_unstack_tuplename_in_multiindex():
    # GH 19966
    idx = pd.MultiIndex.from_product(
        [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
    )
    ser = Series(1, index=idx)
    result = ser.unstack(("A", "a"))

    expected = DataFrame(
        [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
        columns=pd.MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
        index=pd.Index([1, 2, 3], name=("B", "b")),
    )
    tm.assert_frame_equal(result, expected)

Example #35

0

Show file

File: test_reshape.py Project: botplex/pandas

def test_unstack_fill_frame_object():
    # GH12815 Test unstacking with object.
    data = Series(["a", "b", "c", "a"], dtype="object")
    data.index = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "b"),
                                            ("z", "a")])

    # By default missing values will be NaN
    result = data.unstack()
    expected = DataFrame({
        "a": ["a", np.nan, "a"],
        "b": ["b", "c", np.nan]
    },
                         index=list("xyz"))
    tm.assert_frame_equal(result, expected)

    # Fill with any value replaces missing values as expected
    result = data.unstack(fill_value="d")
    expected = DataFrame({
        "a": ["a", "d", "a"],
        "b": ["b", "c", "d"]
    },
                         index=list("xyz"))
    tm.assert_frame_equal(result, expected)

Example #36

0

Show file

File: FactorAnalysis.py Project: wesley1001/SecuritySelect

    def factStability(self, data: pd.Series):
        """
        因子暴露稳定性，spearman相关性
        Parameters
        ----------
        data :

        Returns
        -------

        """
        fact_df = data.unstack()
        self.Res["Stability"] = fact_df.corrwith(
            fact_df.shift(1), axis=1, drop=True,
            method='spearman').sort_index()

Example #37

0

Show file

def test_unstack_multi_index_categorical_values():

    mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"])
    ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")

    result = ser.unstack()

    dti = ser.index.levels[0]
    c = pd.Categorical(["foo"] * len(dti))
    expected = DataFrame(
        {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
        columns=pd.Index(list("ABCD"), name="minor"),
        index=dti.rename("major"),
    )
    tm.assert_frame_equal(result, expected)

Example #38

0

Show file

def get_y_weights(y: pd.Series, normalize=False):
    """
    For each series, compute the denominator in the MSSE loss function, i.e. the
    day-to-day variations squared, averaged by number of training observations.
    The weights can be normalized so that they add up to 1.
    This is provided to the lgb.Dataset for computing loss function and evaluation metric
    """
    scales = (y.unstack(level='date').diff(axis=1)**2).mean(axis=1)
    scales = scales.replace(0, pd.NA)
    weights = 1 / scales
    if normalize:
        weights = weights.divide(weights.sum())
    weights = y.merge(weights.to_frame('weight'),
                      left_index=True,
                      right_index=True)['weight']
    return weights

Example #39

0

Show file

File: utils.py Project: rodrigolece/Mexico-datos

def get_formato_series(counts: pd.Series,
                       colnames: Dict[str, str],
                       zero_dates=True):
    """
    Convierte groupby a formato tidy (columnas son estados e indice es la fecha).

    Input:
    - groupby_series:
        DataFrame en formato groupby agrupada for una columna que corresponde a
        entidades federativas y otra columna que corresponde a una fecha.
    - entidades:
        diccionario de clave_de_entidad => nombre_de_entidad.

    Output:
    - pd.DataFrame
        DataFrame en formato tidy, con los nombres de los estados como columnas
        (la primer columna es el total nacional) y con la fecha como indice.

    """
    df = counts.unstack(level=0)
    df.index = pd.to_datetime(df.index)
    cols = df.columns
    cols.name = None

    # We make sure that all 32 states are present (even with zero counts)
    missing = list(set(range(1, 33)).difference(cols))
    if missing:
        cols = cols.tolist() + missing
        # no need to sort because we use alpahbetically below
        df = df.reindex(columns=cols)

    df = df.rename(columns=colnames).fillna(0).astype('int')

    # Formato de agregado nacional
    cols = ['Nacional'] + sorted(df.columns)
    df.loc[:, 'Nacional'] = df.sum(axis=1)
    # Reordenar columnas para que los casos nacionales queden primero
    df = df[cols]

    if zero_dates:
        # Llenamos ceros para fechas sin informacion
        idx = pd.date_range(df.index.min(), df.index.max())
        df = df.reindex(idx, fill_value=0)

    df.index.name = 'Fecha'

    return df

Example #40

0

Show file

File: seasonLog.py Project: likuangzheng/rating_system

 def get_all_player_game_scores(self):
     """给出每个玩家在每个game上的得分"""
     _game_score_dict = self._get_game_score_distribution()
     _game_player_scores = {}
     for _, l in self.log.iterrows():
         info = tuple([l.user, l.seasonId, l.sessionId, l.gameId])
         try:
             score = self._get_player_game_score(*info[1:],
                                                 _game_score_dict,
                                                 l.gameResult)
             _game_player_scores[info] = score
         except:
             pass
     player_scores = Series(_game_player_scores, name='game_score')
     player_scores.index.names = ['user', 'seasonId', 'sessionId', 'gameId']
     player_scores = player_scores.unstack(level='user')
     return player_scores

Example #41

0

Show file

File: pandas_test.py Project: coderJianXun/geekbangpython

obj6 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])

#
# print( obj6.reindex(range(6),method='bfill'))

from numpy import nan as NA

data = Series([1, NA, 2])
# print(data.dropna())

# data2 = DataFrame([[1., 6.5, 3], [1., NA, NA], [NA, NA, NA]
#                   ])
# data2[4] = NA
# print(data2)
# print(data2.dropna(axis=1, how='all'))
#
# data2.fillna(0)
# print(data2.fillna(0, inplace=True))
# print(data2)

import numpy as np

data3 = Series(np.random.randn(10),
               index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                      [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])

print( data3.unstack().stack() )

# print ( data3['b':'c'])

Example #42

0

Show file

File: pd.py Project: wabu/zeroflo

 def process(self, data : pd.Series, tag):
     yield from data.unstack() >> tag >> self.out

Example #43

0

Show file

File: Starter_Code_Python_Analytics_cheat_sheet.py Project: aroonjham/CodeRepository

b    0.792968
c   -0.317989
dtype: float64
'''
ser[2]
'''
a   -0.178000
b   -0.243812
c   -0.451486
dtype: float64
'''

ser[:,'a'] # return all from primary index, but use secondary index = 'a'
ser[1,'a'] # returns value at index 1 (primary), 'a' (secondary)

df = ser.unstack() # converts hierarchical index series into dataframe with primary index as rows, and secondary index as columns

#combine_first() method
Series(np.where(pd.isnull(ser1),ser2,ser1), index = ['x','y','z','q','r','s']) #Series meets numpy where meets panda's isnull() method
# the above statement sates where ser1 values are NaN, use ser2 values, else use ser1 values
ser1.combine_first(ser2) #combine_first() does the same

df1.combine_first(df2) # does the same with dataframes. 

ser1.replace(1,10) # replace '1' in your series with '10'
ser1.replace(1,np.nan) # replace '1' in your series with NaN
ser1.replace([1,4],[100,400]) # replace value (1 and 4) with (100 and 400)
ser1.replace({4: 'clown' , 2: 'owl'}) # replace 4 with clown, and 2 with owl

###############################################################
###															###

Example #44

0

Show file

File: intro.py Project: Qianfengwoo/Learn_in_python

# -*- coding: utf-8 -*- 

import numpy as np
from pandas import Series, DataFrame, MultiIndex

print 'Series的层次索引'
data = Series(np.random.randn(10),
              index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                       [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
print data
print data.index
print data.b
print data['b':'c']
print data[:2]
print data.unstack()
print data.unstack().stack()
print

print 'DataFrame的层次索引'
frame = DataFrame(np.arange(12).reshape((4, 3)),
                  index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])
print frame
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
print frame
print frame.ix['a', 1]
print frame.ix['a', 2]['Colorado']
print frame.ix['a', 2]['Ohio']['Red']
print

Example #45

0

Show file

File: ch05MissingData.py Project: oldblackwave/PlayPython

                       [1,2,3,1,2,3,1,2,2,3]])


print(data)
print('\n')
print(data.index)
print('\n')
print(data['b'])
print('\n')
print(data['b':'c'])
print('\n')
print(data.ix[['b','d']])
print('\n')
print(data[:,2])
print('\n')
print(data.unstack())
print('\n')
print(data.unstack().stack())
print('\n')

###############################################################

#page 154

frame = DataFrame(np.arange(12).reshape((4,3)),
                  index = [['a','a','b','b'],[1,2,1,2]],
                  columns = [['Ohio', 'Ohio', 'Colorado'],['Green','Red','Green']]
                  )

print(frame)
print('\n')

Example #46

0

Show file

File: ch05_7.py Project: uolter/pydata-book

data = Series(np.random.randn(10),
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
import numpy as np
data = Series(np.random.randn(10),
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
data
data.index
data[b]
data['b']
data['b': 'c']
data.ix[['b', 'd']]
# selection in an inner level
data[:, 2]
data.unstack()
data.unstack().stack()
from pandas import DataFrame
frame = DataFrame(np.arange(12).reshape((4, 3)),
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
columns=[['Ohio', 'Ohio', 'Colorado'],
['Green', 'Red', 'Green']])
frame
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame
# Reordering and Sorting Levels
frame.swaplevel('key1', 'key2')
frame.sortlevel(1)
# Summary Ststistics by Level
frame.sum(level='key2')

Example #47

0

Show file

File: hierarchical_index.py Project: ksomemo/pandas-study

def main():
    """
    Handling of not applicable values
    """

    data = Series(np.random.randn(10),
                  index=[list('aaabbbccdd'), map(int, list('1231231223'))])
    print data
    print data.index
    print type(data.index)

    print data['b']
    print data['b':'c']
    print data.ix[['b', 'd']]
    print data[:, 2]

    print data.unstack()
    print data.unstack().stack()

    print '',''
    frame = DataFrame(np.arange(12).reshape((4, 3)),
                      index=[['a','a','b','b'], [1,2,1,2]],
                      columns=[['Ohio', 'Ohio', 'Colorado'],
                               ['Green', 'Red', 'Green']])
    print frame
    frame.index.names = ['key1', 'key2']
    frame.columns.names = ['state', 'color']
    print frame

    print frame['Ohio']
    print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']],
                                 names=['state', 'color'])

    # change hierarchy and sort
    print '',''
    print frame.swaplevel('key1', 'key2')
    print '',''
    print frame.sortlevel(1) # sorted by key2
    print '',''
    print frame.swaplevel(0, 1).sortlevel(0) # swap and sorted by key 2

    # summary statistics for each hierarchy
    print '',''
    print frame.sum(level='key2')
    print '',''
    print frame.sum(level='color', axis=1)
    print '',''

    # Using column of the DataFrame for index
    print '','-------------------------'
    frame = DataFrame({
        'a': range(7),
        'b': range(7, 0, -1),
        'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],
        'd': [0, 1, 2, 0, 1, 2, 3],
    })
    print frame
    frame2 = frame.set_index(['c', 'd'])
    print '',''
    print frame2
    print '',''
    print frame.set_index(['c', 'd'], drop=False)
    print '',''
    print frame2.reset_index()

Example #48

0

Show file

File: test_reshape.py Project: 8ballbb/ProjectRothar

    def test_unstack_fill(self):

        # GH #9746: fill_value keyword argument for Series
        # and DataFrame unstack

        # From a series
        data = Series([1, 2, 4, 5], dtype=np.int16)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack(fill_value=-1)
        expected = DataFrame({'a': [1, -1, 5], 'b': [2, 4, -1]},
                             index=['x', 'y', 'z'], dtype=np.int16)
        assert_frame_equal(result, expected)

        # From a series with incorrect data type for fill_value
        result = data.unstack(fill_value=0.5)
        expected = DataFrame({'a': [1, 0.5, 5], 'b': [2, 4, 0.5]},
                             index=['x', 'y', 'z'], dtype=np.float)
        assert_frame_equal(result, expected)

        # From a dataframe
        rows = [[1, 2], [3, 4], [5, 6], [7, 8]]
        df = DataFrame(rows, columns=list('AB'), dtype=np.int32)
        df.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = df.unstack(fill_value=-1)

        rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]]
        expected = DataFrame(rows, index=list('xyz'), dtype=np.int32)
        expected.columns = MultiIndex.from_tuples(
            [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
        assert_frame_equal(result, expected)

        # From a mixed type dataframe
        df['A'] = df['A'].astype(np.int16)
        df['B'] = df['B'].astype(np.float64)

        result = df.unstack(fill_value=-1)
        expected['A'] = expected['A'].astype(np.int16)
        expected['B'] = expected['B'].astype(np.float64)
        assert_frame_equal(result, expected)

        # From a dataframe with incorrect data type for fill_value
        result = df.unstack(fill_value=0.5)

        rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]]
        expected = DataFrame(rows, index=list('xyz'), dtype=np.float)
        expected.columns = MultiIndex.from_tuples(
            [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
        assert_frame_equal(result, expected)

        # Test unstacking with date times
        dv = pd.date_range('2012-01-01', periods=4).values
        data = Series(dv)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [dv[0], pd.NaT, dv[3]],
                              'b': [dv[1], dv[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=dv[0])
        expected = DataFrame({'a': [dv[0], dv[0], dv[3]],
                              'b': [dv[1], dv[2], dv[0]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        # Test unstacking with time deltas
        td = [Timedelta(days=i) for i in range(4)]
        data = Series(td)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [td[0], pd.NaT, td[3]],
                              'b': [td[1], td[2], pd.NaT]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=td[1])
        expected = DataFrame({'a': [td[0], td[1], td[3]],
                              'b': [td[1], td[2], td[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        # Test unstacking with period
        periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
                   Period('2012-04')]
        data = Series(periods)
        data.index = MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        result = data.unstack()
        expected = DataFrame({'a': [periods[0], None, periods[3]],
                              'b': [periods[1], periods[2], None]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        result = data.unstack(fill_value=periods[1])
        expected = DataFrame({'a': [periods[0], periods[1], periods[3]],
                              'b': [periods[1], periods[2], periods[1]]},
                             index=['x', 'y', 'z'])
        assert_frame_equal(result, expected)

        # Test unstacking with categorical
        data = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
        data.index = pd.MultiIndex.from_tuples(
            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])

        # By default missing values will be NaN
        result = data.unstack()
        expected = DataFrame({'a': pd.Categorical(list('axa'),
                                                  categories=list('abc')),
                              'b': pd.Categorical(list('bcx'),
                                                  categories=list('abc'))},
                             index=list('xyz'))
        assert_frame_equal(result, expected)

        # Fill with non-category results in NaN entries similar to above
        result = data.unstack(fill_value='d')
        assert_frame_equal(result, expected)

        # Fill with category value replaces missing values as expected
        result = data.unstack(fill_value='c')
        expected = DataFrame({'a': pd.Categorical(list('aca'),
                                                  categories=list('abc')),
                              'b': pd.Categorical(list('bcc'),
                                                  categories=list('abc'))},
                             index=list('xyz'))
        assert_frame_equal(result, expected)

Example #49

0

Show file

File: pandas_two.py Project: caseymm/python_class_materials

print df
#sorts by a and then by the assoc b vals
print df.sort_index(by=['a','b'])
print

######

s = Series([8, 2, 5, 9, 4, 7, 5, 3], index=[['a','a','b','b','c','c','d','d'], ['x','y','x','y','x','y','x','y']])
print s
print s['b']
#can slice
print s[1:2]
#can sel particular items
print s.ix[['a','c']]

s2 = s.unstack()
print s2
#can also restack to put back in original form
print s2.stack()
print

#####
d = np.arange(12).reshape((4,3))
df = DataFrame(d, index=[['a','a','b','b'], [1, 2, 1, 2]], columns=[['unc','unc','duke'], ['x','y','x']])
print df

#this sums the outermost thing
print df.sum(level=0)

Example #50

0

Show file

File: Lecture_24_Index_Hierarchy.py Project: colson1111/Udemy

from numpy.random import randn

ser = Series(randn(6), index = [[1,1,1,2,2,2],['a','b','c','a','b','c']])
ser

ser.index # get number of index levels and labels

# outer indexing
ser[2]

# internal indexing
ser[:,'a']


# creating dataframe from multi-index level Series
dframe = ser.unstack()
dframe

# construct dataframe with multiple index levels
dframe2 = DataFrame(np.arange(16).reshape(4,4),
                    index = [['a','a','b','b'],[1,2,1,2]],
                    columns = [['NY','NY','LA','SF'],['cold','hot','hot','cold']])

dframe2

# naming indexes and columns
dframe2.index.names = ['INDEX_1', 'INDEX_2']
dframe2.columns.names = ['Cities','Temp']
dframe2

# Interchange index level orders