Esempio n. 1
0
    def test_hash_pandas_object(self):

        for obj in [Series([1, 2, 3]),
                    Series([1.0, 1.5, 3.2]),
                    Series([1.0, 1.5, np.nan]),
                    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
                    Series(['a', 'b', 'c']),
                    Series(['a', np.nan, 'c']),
                    Series(['a', None, 'c']),
                    Series([True, False, True]),
                    Series(),
                    Index([1, 2, 3]),
                    Index([True, False, True]),
                    DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
                    DataFrame(),
                    tm.makeMissingDataframe(),
                    tm.makeMixedDataFrame(),
                    tm.makeTimeDataFrame(),
                    tm.makeTimeSeries(),
                    tm.makeTimedeltaIndex(),
                    tm.makePeriodIndex(),
                    Series(tm.makePeriodIndex()),
                    Series(pd.date_range('20130101',
                                         periods=3, tz='US/Eastern')),
                    MultiIndex.from_product(
                        [range(5),
                         ['foo', 'bar', 'baz'],
                         pd.date_range('20130101', periods=2)]),
                    MultiIndex.from_product(
                        [pd.CategoricalIndex(list('aabc')),
                         range(3)])]:
            self.check_equal(obj)
            self.check_not_equal_with_index(obj)
Esempio n. 2
0
    def __init__(self, node_name, parents=None, node_domain=None):
        super(CPT, self).__init__(node_name)

        if node_domain is None or node_domain.__len__() == 0:
            self._domain = ['T', 'F']
        else:
            self._domain = node_domain[:]

        self.m = 1
        self.n = self._domain.__len__()

        if parents is None or parents.__len__() == 0:
            self.rows = [self._name]
            self.cols = MultiIndex.from_product([self._domain])
        else:
            parents_names = []
            parents_domains = []
            for parent in parents:
                parents_names.append(parent.name)
                parents_domains.append(parent.domain)
                self.m = self.m * parent.domain.__len__()
            self.cols = MultiIndex.from_product([self._domain], names=[self._name])
            self.rows = MultiIndex.from_product(parents_domains, names=parents_names)

        self._values = np.zeros((self.m, self.n))
        self._table = DataFrame(self._values, index=self.rows, columns=self.cols)
Esempio n. 3
0
    def setup_method(self, method):

        self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2))
        self.frame_ints = DataFrame(np.random.randn(4, 4),
                                    index=lrange(0, 8, 2),
                                    columns=lrange(0, 12, 3))

        self.series_uints = Series(np.random.rand(4),
                                   index=UInt64Index(lrange(0, 8, 2)))
        self.frame_uints = DataFrame(np.random.randn(4, 4),
                                     index=UInt64Index(lrange(0, 8, 2)),
                                     columns=UInt64Index(lrange(0, 12, 3)))

        self.series_floats = Series(np.random.rand(4),
                                    index=Float64Index(range(0, 8, 2)))
        self.frame_floats = DataFrame(np.random.randn(4, 4),
                                      index=Float64Index(range(0, 8, 2)),
                                      columns=Float64Index(range(0, 12, 3)))

        m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]),
                   MultiIndex.from_product([[5, 6], [7, 8]]),
                   MultiIndex.from_product([[9, 10], [11, 12]])]

        self.series_multi = Series(np.random.rand(4),
                                   index=m_idces[0])
        self.frame_multi = DataFrame(np.random.randn(4, 4),
                                     index=m_idces[0],
                                     columns=m_idces[1])

        self.series_labels = Series(np.random.randn(4), index=list('abcd'))
        self.frame_labels = DataFrame(np.random.randn(4, 4),
                                      index=list('abcd'), columns=list('ABCD'))

        self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8])
        self.frame_mixed = DataFrame(np.random.randn(4, 4),
                                     index=[2, 4, 'null', 8])

        self.series_ts = Series(np.random.randn(4),
                                index=date_range('20130101', periods=4))
        self.frame_ts = DataFrame(np.random.randn(4, 4),
                                  index=date_range('20130101', periods=4))

        dates_rev = (date_range('20130101', periods=4)
                     .sort_values(ascending=False))
        self.series_ts_rev = Series(np.random.randn(4),
                                    index=dates_rev)
        self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
                                      index=dates_rev)

        self.frame_empty = DataFrame()
        self.series_empty = Series()

        # form agglomerates
        for o in self._objs:

            d = dict()
            for t in self._typs:
                d[t] = getattr(self, '%s_%s' % (o, t), None)

            setattr(self, o, d)
Esempio n. 4
0
    def test_join_multi_to_multi(self, join_type):
        # GH 20475
        leftindex = MultiIndex.from_product([list('abc'), list('xy'), [1, 2]],
                                            names=['abc', 'xy', 'num'])
        left = DataFrame({'v1': range(12)}, index=leftindex)

        rightindex = MultiIndex.from_product([list('abc'), list('xy')],
                                             names=['abc', 'xy'])
        right = DataFrame({'v2': [100 * i for i in range(1, 7)]},
                          index=rightindex)

        result = left.join(right, on=['abc', 'xy'], how=join_type)
        expected = (left.reset_index()
                        .merge(right.reset_index(),
                               on=['abc', 'xy'], how=join_type)
                        .set_index(['abc', 'xy', 'num'])
                    )
        assert_frame_equal(expected, result)

        msg = (r'len\(left_on\) must equal the number of levels in the index'
               ' of "right"')
        with pytest.raises(ValueError, match=msg):
            left.join(right, on='xy', how=join_type)

        with pytest.raises(ValueError, match=msg):
            right.join(left, on=['abc', 'xy'], how=join_type)
Esempio n. 5
0
    def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
        # GH #19686
        # .loc should work with nested indexers which can be
        # any list-like objects (see `pandas.api.types.is_list_like`) or slices

        def convert_nested_indexer(indexer_type, keys):
            if indexer_type == np.ndarray:
                return np.array(keys)
            if indexer_type == slice:
                return slice(*keys)
            return indexer_type(keys)

        a = [10, 20, 30]
        b = [1, 2, 3]
        index = MultiIndex.from_product([a, b])
        df = DataFrame(
            np.arange(len(index), dtype='int64'),
            index=index, columns=['Data'])

        keys = ([10, 20], [2, 3])
        types = (indexer_type_1, indexer_type_2)

        # check indexers with all the combinations of nested objects
        # of all the valid types
        indexer = tuple(
            convert_nested_indexer(indexer_type, k)
            for indexer_type, k in zip(types, keys))

        result = df.loc[indexer, 'Data']
        expected = Series(
            [1, 2, 4, 5], name='Data',
            index=MultiIndex.from_product(keys))

        tm.assert_series_equal(result, expected)
Esempio n. 6
0
    def test_apply_categorical_data(self):
        # GH 10138
        for ordered in [True, False]:
            dense = Categorical(list('abc'), ordered=ordered)
            # 'b' is in the categories but not in the list
            missing = Categorical(
                list('aaa'), categories=['a', 'b'], ordered=ordered)
            values = np.arange(len(dense))
            df = DataFrame({'missing': missing,
                            'dense': dense,
                            'values': values})
            grouped = df.groupby(['missing', 'dense'])

            # missing category 'b' should still exist in the output index
            idx = MultiIndex.from_product(
                [Categorical(['a', 'b'], ordered=ordered),
                 Categorical(['a', 'b', 'c'], ordered=ordered)],
                names=['missing', 'dense'])
            expected = DataFrame([0, 1, 2, np.nan, np.nan, np.nan],
                                 index=idx,
                                 columns=['values'])

            assert_frame_equal(grouped.apply(lambda x: np.mean(x)), expected)
            assert_frame_equal(grouped.mean(), expected)
            assert_frame_equal(grouped.agg(np.mean), expected)

            # but for transform we should still get back the original index
            idx = MultiIndex.from_product([['a'], ['a', 'b', 'c']],
                                          names=['missing', 'dense'])
            expected = Series(1, index=idx)
            assert_series_equal(grouped.apply(lambda x: 1), expected)
Esempio n. 7
0
    def test_loc_getitem_series(self):
        # GH14730
        # passing a series as a key with a MultiIndex
        index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
        x = Series(index=index, data=range(9), dtype=np.float64)
        y = Series([1, 3])
        expected = Series(
            data=[0, 1, 2, 6, 7, 8],
            index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
            dtype=np.float64)
        result = x.loc[y]
        tm.assert_series_equal(result, expected)

        result = x.loc[[1, 3]]
        tm.assert_series_equal(result, expected)

        # GH15424
        y1 = Series([1, 3], index=[1, 2])
        result = x.loc[y1]
        tm.assert_series_equal(result, expected)

        empty = Series(data=[], dtype=np.float64)
        expected = Series([], index=MultiIndex(
            levels=index.levels, labels=[[], []], dtype=np.float64))
        result = x.loc[empty]
        tm.assert_series_equal(result, expected)
Esempio n. 8
0
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
                                                            expected_error):
    # GH 7866
    # multi-index slicing with missing indexers
    idx = MultiIndex.from_product([['A', 'B', 'C'],
                                   ['foo', 'bar', 'baz']],
                                  names=['one', 'two'])
    s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()

    if indexer == []:
        expected = s.iloc[[]]
    elif is_level1:
        expected = Series([0, 3, 6], index=MultiIndex.from_product(
            [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
    else:
        exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
                                          names=['one', 'two'])
        expected = Series(np.arange(3, dtype='int64'),
                          index=exp_idx).sort_index()

    if expected_error is not None:
        with pytest.raises(KeyError, match=expected_error):
            s.loc[indexer]
    else:
        result = s.loc[indexer]
        tm.assert_series_equal(result, expected)
Esempio n. 9
0
    def test_loc_getitem_array(self):
        # GH15434
        # passing an array as a key with a MultiIndex
        index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
        x = Series(index=index, data=range(9), dtype=np.float64)
        y = np.array([1, 3])
        expected = Series(
            data=[0, 1, 2, 6, 7, 8],
            index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
            dtype=np.float64)
        result = x.loc[y]
        tm.assert_series_equal(result, expected)

        # empty array:
        empty = np.array([])
        expected = Series([], index=MultiIndex(
            levels=index.levels, labels=[[], []], dtype=np.float64))
        result = x.loc[empty]
        tm.assert_series_equal(result, expected)

        # 0-dim array (scalar):
        scalar = np.int64(1)
        expected = Series(
            data=[0, 1, 2],
            index=['A', 'B', 'C'],
            dtype=np.float64)
        result = x.loc[scalar]
        tm.assert_series_equal(result, expected)
Esempio n. 10
0
    def test_loc_multiindex_incomplete(self):

        # GH 7399
        # incomplete indexers
        s = Series(np.arange(15, dtype='int64'),
                   MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
        expected = s.loc[:, 'a':'c']

        result = s.loc[0:4, 'a':'c']
        tm.assert_series_equal(result, expected)
        tm.assert_series_equal(result, expected)

        result = s.loc[:4, 'a':'c']
        tm.assert_series_equal(result, expected)
        tm.assert_series_equal(result, expected)

        result = s.loc[0:, 'a':'c']
        tm.assert_series_equal(result, expected)
        tm.assert_series_equal(result, expected)

        # GH 7400
        # multiindexer gettitem with list of indexers skips wrong element
        s = Series(np.arange(15, dtype='int64'),
                   MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
        expected = s.iloc[[6, 7, 8, 12, 13, 14]]
        result = s.loc[2:4:2, 'a':'c']
        tm.assert_series_equal(result, expected)
Esempio n. 11
0
def test_from_product_empty():
    # 0 levels
    with tm.assert_raises_regex(
            ValueError, "Must pass non-zero number of levels/labels"):
        MultiIndex.from_product([])

    # 1 level
    result = MultiIndex.from_product([[]], names=['A'])
    expected = pd.Index([], name='A')
    tm.assert_index_equal(result.levels[0], expected)

    # 2 levels
    l1 = [[], ['foo', 'bar', 'baz'], []]
    l2 = [[], [], ['a', 'b', 'c']]
    names = ['A', 'B']
    for first, second in zip(l1, l2):
        result = MultiIndex.from_product([first, second], names=names)
        expected = MultiIndex(levels=[first, second],
                              labels=[[], []], names=names)
        tm.assert_index_equal(result, expected)

    # GH12258
    names = ['A', 'B', 'C']
    for N in range(4):
        lvl2 = lrange(N)
        result = MultiIndex.from_product([[], lvl2, []], names=names)
        expected = MultiIndex(levels=[[], lvl2, []],
                              labels=[[], [], []], names=names)
        tm.assert_index_equal(result, expected)
Esempio n. 12
0
    def test_binary_ops_align(self):

        # test aligning binary ops

        # GH 6681
        index = MultiIndex.from_product(
            [list("abc"), ["one", "two", "three"], [1, 2, 3]], names=["first", "second", "third"]
        )

        df = DataFrame(
            np.arange(27 * 3).reshape(27, 3), index=index, columns=["value1", "value2", "value3"]
        ).sortlevel()

        idx = pd.IndexSlice
        for op in ["add", "sub", "mul", "div", "truediv"]:
            opa = getattr(operator, op, None)
            if opa is None:
                continue

            x = Series([1.0, 10.0, 100.0], [1, 2, 3])
            result = getattr(df, op)(x, level="third", axis=0)

            expected = pd.concat([opa(df.loc[idx[:, :, i], :], v) for i, v in x.iteritems()]).sortlevel()
            assert_frame_equal(result, expected)

            x = Series([1.0, 10.0], ["two", "three"])
            result = getattr(df, op)(x, level="second", axis=0)

            expected = pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.iteritems()]).reindex_like(df).sortlevel()
            assert_frame_equal(result, expected)

        # GH9463 (alignment level of dataframe with series)

        midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
        df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
        s = pd.Series({"a": 1, "b": 2})

        df2 = df.copy()
        df2.columns.names = ["lvl0", "lvl1"]
        s2 = s.copy()
        s2.index.name = "lvl1"

        # different cases of integer/string level names:
        res1 = df.mul(s, axis=1, level=1)
        res2 = df.mul(s2, axis=1, level=1)
        res3 = df2.mul(s, axis=1, level=1)
        res4 = df2.mul(s2, axis=1, level=1)
        res5 = df2.mul(s, axis=1, level="lvl1")
        res6 = df2.mul(s2, axis=1, level="lvl1")

        exp = DataFrame(np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx)

        for res in [res1, res2]:
            assert_frame_equal(res, exp)

        exp.columns.names = ["lvl0", "lvl1"]
        for res in [res3, res4, res5, res6]:
            assert_frame_equal(res, exp)
Esempio n. 13
0
def test_to_html_multi_indexes_index_false(datapath):
    # GH 22579
    df = DataFrame({'a': range(10), 'b': range(10, 20), 'c': range(10, 20),
                    'd': range(10, 20)})
    df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
    df.index = MultiIndex.from_product([['a', 'b'],
                                        ['c', 'd', 'e', 'f', 'g']])
    result = df.to_html(index=False)
    expected = expected_html(datapath, 'gh22579_expected_output')
    assert result == expected
Esempio n. 14
0
 def setup(self):
     self.mi_large = MultiIndex.from_product(
         [np.arange(1000), np.arange(20), list(string.ascii_letters)],
         names=['one', 'two', 'three'])
     self.mi_med = MultiIndex.from_product(
         [np.arange(1000), np.arange(10), list('A')],
         names=['one', 'two', 'three'])
     self.mi_small = MultiIndex.from_product(
         [np.arange(100), list('A'), list('A')],
         names=['one', 'two', 'three'])
Esempio n. 15
0
def test_repeat():
    reps = 2
    numbers = [1, 2, 3]
    names = np.array(['foo', 'bar'])

    m = MultiIndex.from_product([
        numbers, names], names=names)
    expected = MultiIndex.from_product([
        numbers, names.repeat(reps)], names=names)
    tm.assert_index_equal(m.repeat(reps), expected)
Esempio n. 16
0
    def test_conversion_multiindex(self):
        d = {'comp_str': ["Fe2", "MnO2"]}

        df_1lvl = DataFrame(data=d)

        df_1lvl = StrToComposition().featurize_dataframe(
            df_1lvl, 'comp_str', multiindex=True)
        self.assertEqual(df_1lvl[("StrToComposition", "composition")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        df_2lvl = StrToComposition().featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True)
        self.assertEqual(df_2lvl[("StrToComposition", "composition")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        sto = StrToComposition(target_col_id='test')
        df_2lvl = sto.featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True)
        self.assertEqual(df_2lvl[("StrToComposition", "test")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        # if two level multiindex provided as target, it should be written there
        # here we test converting multiindex in place
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        sto = StrToComposition(target_col_id=None, overwrite_data=True)

        df_2lvl = sto.featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True, inplace=False)
        self.assertEqual(df_2lvl[("custom", "comp_str")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        # Try inplace multiindex conversion with return errors
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        sto = StrToComposition(target_col_id=None, overwrite_data=True)
        df_2lvl = sto.featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True,
            return_errors=True, ignore_errors=True)

        self.assertTrue(
            all(df_2lvl[("custom", "StrToComposition Exceptions")].isnull()))
Esempio n. 17
0
def test_aggregate_api_consistency():
    # GH 9052
    # make sure that the aggregates via dict
    # are consistent
    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                          'foo', 'bar', 'foo', 'foo'],
                    'B': ['one', 'one', 'two', 'two',
                          'two', 'two', 'one', 'two'],
                    'C': np.random.randn(8) + 1.0,
                    'D': np.arange(8)})

    grouped = df.groupby(['A', 'B'])
    c_mean = grouped['C'].mean()
    c_sum = grouped['C'].sum()
    d_mean = grouped['D'].mean()
    d_sum = grouped['D'].sum()

    result = grouped['D'].agg(['sum', 'mean'])
    expected = pd.concat([d_sum, d_mean], axis=1)
    expected.columns = ['sum', 'mean']
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped.agg([np.sum, np.mean])
    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
    expected.columns = MultiIndex.from_product([['C', 'D'],
                                                ['sum', 'mean']])
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped[['D', 'C']].agg([np.sum, np.mean])
    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
    expected.columns = MultiIndex.from_product([['D', 'C'],
                                                ['sum', 'mean']])
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped.agg({'C': 'mean', 'D': 'sum'})
    expected = pd.concat([d_sum, c_mean], axis=1)
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped.agg({'C': ['mean', 'sum'],
                          'D': ['mean', 'sum']})
    expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
    expected.columns = MultiIndex.from_product([['C', 'D'],
                                                ['mean', 'sum']])

    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        result = grouped[['D', 'C']].agg({'r': np.sum,
                                          'r2': np.mean})
    expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
    expected.columns = MultiIndex.from_product([['r', 'r2'],
                                                ['D', 'C']])
    tm.assert_frame_equal(result, expected, check_like=True)
Esempio n. 18
0
def test_repeat():
    reps = 2
    numbers = [1, 2, 3]
    names = np.array(['foo', 'bar'])

    m = MultiIndex.from_product([
        numbers, names], names=names)
    expected = MultiIndex.from_product([
        numbers, names.repeat(reps)], names=names)
    tm.assert_index_equal(m.repeat(reps), expected)

    with tm.assert_produces_warning(FutureWarning):
        result = m.repeat(n=reps)
        tm.assert_index_equal(result, expected)
Esempio n. 19
0
def test_numpy_repeat():
    reps = 2
    numbers = [1, 2, 3]
    names = np.array(['foo', 'bar'])

    m = MultiIndex.from_product([
        numbers, names], names=names)
    expected = MultiIndex.from_product([
        numbers, names.repeat(reps)], names=names)
    tm.assert_index_equal(np.repeat(m, reps), expected)

    msg = "the 'axis' parameter is not supported"
    with pytest.raises(ValueError, match=msg):
        np.repeat(m, reps, axis=1)
Esempio n. 20
0
def test_duplicate_level_names(names):
    # GH18872, GH19029
    mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
    assert mi.names == names

    # With .rename()
    mi = MultiIndex.from_product([[0, 1]] * 3)
    mi = mi.rename(names)
    assert mi.names == names

    # With .rename(., level=)
    mi.rename(names[1], level=1, inplace=True)
    mi = mi.rename([names[0], names[2]], level=[0, 2])
    assert mi.names == names
Esempio n. 21
0
    def test_delitem_multiindex(self):
        midx = MultiIndex.from_product([['A', 'B'], [1, 2]])
        df = DataFrame(np.random.randn(4, 4), columns=midx)
        assert len(df.columns) == 4
        assert ('A', ) in df.columns
        assert 'A' in df.columns

        result = df['A']
        assert isinstance(result, DataFrame)
        del df['A']

        assert len(df.columns) == 2

        # A still in the levels, BUT get a KeyError if trying
        # to delete
        assert ('A', ) not in df.columns
        with pytest.raises(KeyError):
            del df[('A',)]

        # behavior of dropped/deleted MultiIndex levels changed from
        # GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
        # levels which are dropped/deleted
        assert 'A' not in df.columns
        with pytest.raises(KeyError):
            del df['A']
Esempio n. 22
0
 def factor_matrix(self, terms, start_date, end_date):
     return DataFrame(
         index=MultiIndex.from_product(
             [date_range(start=start_date, end=end_date, freq='D'), ()],
         ),
         columns=sorted(terms.keys())
     )
Esempio n. 23
0
def test_get_indexer_categorical_time():
    # https://github.com/pandas-dev/pandas/issues/21390
    midx = MultiIndex.from_product(
        [Categorical(['a', 'b', 'c']),
         Categorical(date_range("2012-01-01", periods=3, freq='H'))])
    result = midx.get_indexer(midx)
    tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
Esempio n. 24
0
def analyze():
    signals = read_csv(FILE_SIGNALS)
    devices = signals["id"].unique()
    
    print("got %d signals from %d devices" % (len(signals), len(devices)))

    signals = signals.groupby(["frequency", "id"]).size()
    signals = signals.reindex(MultiIndex.from_product([SPECTRUM, devices],
                                                      names=signals.index.names),
                              fill_value=0)
    signals = signals.unstack("id")
    
    # let's only keep frequencies with all signals present
    candidates = signals.dropna()
    # suggest frequency where the weakest sensor has the most
    # received signals, and then the frequency with most total
    # received signals for all sensors
    candidates = DataFrame({"total":   candidates.sum(axis=1),
                            "weakest": candidates.min(axis=1)})
    appropriate_freq = candidates.sort(["weakest", "total"],
                                       ascending=False).index[0]
    print("suggesting frequency %s" % mhz(appropriate_freq))

    signals.to_csv("spectrum.csv")
    
    import matplotlib.pyplot as plt
    from matplotlib.ticker import EngFormatter

    p=signals.plot(kind="Area")
    p.xaxis.set_major_formatter(EngFormatter(unit='Hz', places=2))
    plt.savefig(FILE_SPECTRUM, dpi=300)
    print("saved spectrum as %s" % FILE_SPECTRUM)
Esempio n. 25
0
def test_multi_index_parse_dates(all_parsers, index_col):
    data = """index1,index2,A,B,C
20090101,one,a,1,2
20090101,two,b,3,4
20090101,three,c,4,5
20090102,one,a,1,2
20090102,two,b,3,4
20090102,three,c,4,5
20090103,one,a,1,2
20090103,two,b,3,4
20090103,three,c,4,5
"""
    parser = all_parsers
    index = MultiIndex.from_product([
        (datetime(2009, 1, 1), datetime(2009, 1, 2),
         datetime(2009, 1, 3)), ("one", "two", "three")],
        names=["index1", "index2"])

    # Out of order.
    if index_col == [1, 0]:
        index = index.swaplevel(0, 1)

    expected = DataFrame([["a", 1, 2], ["b", 3, 4], ["c", 4, 5],
                          ["a", 1, 2], ["b", 3, 4], ["c", 4, 5],
                          ["a", 1, 2], ["b", 3, 4], ["c", 4, 5]],
                         columns=["A", "B", "C"], index=index)
    result = parser.read_csv(StringIO(data), index_col=index_col,
                             parse_dates=True)
    tm.assert_frame_equal(result, expected)
Esempio n. 26
0
 def run_pipeline(self, pipeline, start_date, end_date):
     return DataFrame(
         index=MultiIndex.from_product(
             [date_range(start=start_date, end=end_date, freq='D'), ()],
         ),
         columns=sorted(pipeline.columns.keys()),
     )
Esempio n. 27
0
 def setup(self):
     s = Series([np.nan] * 10000)
     s[0] = 3.0
     s[100] = -1.0
     s[999] = 12.1
     s.index = MultiIndex.from_product([range(10)] * 4)
     self.ss = s.to_sparse()
Esempio n. 28
0
    def test_multiindex_label_slicing_with_negative_step(self):
        s = Series(np.arange(20),
                   MultiIndex.from_product([list('abcde'), np.arange(4)]))
        SLC = pd.IndexSlice

        def assert_slices_equivalent(l_slc, i_slc):
            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
            tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
            with catch_warnings(record=True):
                tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])

        assert_slices_equivalent(SLC[::-1], SLC[::-1])

        assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
        assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1])

        assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
        assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1])

        assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
        assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1])
        assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1])
        assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1])
        assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])

        assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
        assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
        assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
Esempio n. 29
0
 def setup(self):
     self.mi_int = MultiIndex.from_product([np.arange(1000),
                                            np.arange(1000)],
                                           names=['one', 'two'])
     self.obj_index = np.array([(0, 10), (0, 11), (0, 12),
                                (0, 13), (0, 14), (0, 15),
                                (0, 16), (0, 17), (0, 18),
                                (0, 19)], dtype=object)
Esempio n. 30
0
def test_get_loc_nan(level, nulls_fixture):
    # GH 18485 : NaN in MultiIndex
    levels = [['a', 'b'], ['c', 'd']]
    key = ['b', 'd']
    levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
    key[level] = nulls_fixture
    idx = MultiIndex.from_product(levels)
    assert idx.get_loc(tuple(key)) == 3
Esempio n. 31
0
    def test_binary_ops_align(self):

        # test aligning binary ops

        # GH 6681
        index = MultiIndex.from_product(
            [list("abc"), ["one", "two", "three"], [1, 2, 3]],
            names=["first", "second", "third"],
        )

        df = DataFrame(
            np.arange(27 * 3).reshape(27, 3),
            index=index,
            columns=["value1", "value2", "value3"],
        ).sort_index()

        idx = pd.IndexSlice
        for op in ["add", "sub", "mul", "div", "truediv"]:
            opa = getattr(operator, op, None)
            if opa is None:
                continue

            x = Series([1.0, 10.0, 100.0], [1, 2, 3])
            result = getattr(df, op)(x, level="third", axis=0)

            expected = pd.concat([
                opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()
            ]).sort_index()
            tm.assert_frame_equal(result, expected)

            x = Series([1.0, 10.0], ["two", "three"])
            result = getattr(df, op)(x, level="second", axis=0)

            expected = (pd.concat([
                opa(df.loc[idx[:, i], :], v) for i, v in x.items()
            ]).reindex_like(df).sort_index())
            tm.assert_frame_equal(result, expected)

        # GH9463 (alignment level of dataframe with series)

        midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
        df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
        s = pd.Series({"a": 1, "b": 2})

        df2 = df.copy()
        df2.columns.names = ["lvl0", "lvl1"]
        s2 = s.copy()
        s2.index.name = "lvl1"

        # different cases of integer/string level names:
        res1 = df.mul(s, axis=1, level=1)
        res2 = df.mul(s2, axis=1, level=1)
        res3 = df2.mul(s, axis=1, level=1)
        res4 = df2.mul(s2, axis=1, level=1)
        res5 = df2.mul(s, axis=1, level="lvl1")
        res6 = df2.mul(s2, axis=1, level="lvl1")

        exp = DataFrame(np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"),
                        columns=midx)

        for res in [res1, res2]:
            tm.assert_frame_equal(res, exp)

        exp.columns.names = ["lvl0", "lvl1"]
        for res in [res3, res4, res5, res6]:
            tm.assert_frame_equal(res, exp)
Esempio n. 32
0
def test_from_product_invalid_input(invalid_input):
    msg = r"Input must be a list / sequence of iterables|Input must be list-like"
    with pytest.raises(TypeError, match=msg):
        MultiIndex.from_product(iterables=invalid_input)
Esempio n. 33
0
def test_groupby_categorical_two_columns():

    # https://github.com/pandas-dev/pandas/issues/8138
    d = {
        'cat':
        pd.Categorical(["a", "b", "a", "b"],
                       categories=["a", "b", "c"],
                       ordered=True),
        'ints': [1, 1, 2, 2],
        'val': [10, 20, 30, 40]
    }
    test = pd.DataFrame(d)

    # Grouping on a single column
    groups_single_key = test.groupby("cat")
    res = groups_single_key.agg('mean')

    exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat", ordered=True)
    exp = DataFrame({
        "ints": [1.5, 1.5, np.nan],
        "val": [20, 30, np.nan]
    },
                    index=exp_index)
    tm.assert_frame_equal(res, exp)

    # Grouping on two columns
    groups_double_key = test.groupby(["cat", "ints"])
    res = groups_double_key.agg('mean')
    exp = DataFrame({
        "val": [10, 30, 20, 40, np.nan, np.nan],
        "cat":
        pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
        "ints": [1, 2, 1, 2, 1, 2]
    }).set_index(["cat", "ints"])
    tm.assert_frame_equal(res, exp)

    # GH 10132
    for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
        c, i = key
        result = groups_double_key.get_group(key)
        expected = test[(test.cat == c) & (test.ints == i)]
        assert_frame_equal(result, expected)

    d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
    test = pd.DataFrame(d)
    values = pd.cut(test['C1'], [1, 2, 3, 6])
    values.name = "cat"
    groups_double_key = test.groupby([values, 'C2'])

    res = groups_double_key.agg('mean')
    nan = np.nan
    idx = MultiIndex.from_product([
        Categorical(
            [Interval(1, 2), Interval(2, 3),
             Interval(3, 6)], ordered=True), [1, 2, 3, 4]
    ],
                                  names=["cat", "C2"])
    exp = DataFrame(
        {
            "C1": [nan, nan, nan, nan, 3, 3, nan, nan, nan, nan, 4, 5],
            "C3": [nan, nan, nan, nan, 10, 100, nan, nan, nan, nan, 200, 34]
        },
        index=idx)
    tm.assert_frame_equal(res, exp)
Esempio n. 34
0
    def setup_method(self, method):

        self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2))
        self.frame_ints = DataFrame(np.random.randn(4, 4),
                                    index=lrange(0, 8, 2),
                                    columns=lrange(0, 12, 3))
        with catch_warnings(record=True):
            self.panel_ints = Panel(np.random.rand(4, 4, 4),
                                    items=lrange(0, 8, 2),
                                    major_axis=lrange(0, 12, 3),
                                    minor_axis=lrange(0, 16, 4))

        self.series_uints = Series(np.random.rand(4),
                                   index=UInt64Index(lrange(0, 8, 2)))
        self.frame_uints = DataFrame(np.random.randn(4, 4),
                                     index=UInt64Index(lrange(0, 8, 2)),
                                     columns=UInt64Index(lrange(0, 12, 3)))
        self.panel_uints = Panel(np.random.rand(4, 4, 4),
                                 items=UInt64Index(lrange(0, 8, 2)),
                                 major_axis=UInt64Index(lrange(0, 12, 3)),
                                 minor_axis=UInt64Index(lrange(0, 16, 4)))

        self.series_floats = Series(np.random.rand(4),
                                    index=Float64Index(range(0, 8, 2)))
        self.frame_floats = DataFrame(np.random.randn(4, 4),
                                      index=Float64Index(range(0, 8, 2)),
                                      columns=Float64Index(range(0, 12, 3)))
        self.panel_floats = Panel(np.random.rand(4, 4, 4),
                                  items=Float64Index(range(0, 8, 2)),
                                  major_axis=Float64Index(range(0, 12, 3)),
                                  minor_axis=Float64Index(range(0, 16, 4)))

        m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]),
                   MultiIndex.from_product([[5, 6], [7, 8]]),
                   MultiIndex.from_product([[9, 10], [11, 12]])]

        self.series_multi = Series(np.random.rand(4),
                                   index=m_idces[0])
        self.frame_multi = DataFrame(np.random.randn(4, 4),
                                     index=m_idces[0],
                                     columns=m_idces[1])
        self.panel_multi = Panel(np.random.rand(4, 4, 4),
                                 items=m_idces[0],
                                 major_axis=m_idces[1],
                                 minor_axis=m_idces[2])

        self.series_labels = Series(np.random.randn(4), index=list('abcd'))
        self.frame_labels = DataFrame(np.random.randn(4, 4),
                                      index=list('abcd'), columns=list('ABCD'))
        self.panel_labels = Panel(np.random.randn(4, 4, 4),
                                  items=list('abcd'),
                                  major_axis=list('ABCD'),
                                  minor_axis=list('ZYXW'))

        self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8])
        self.frame_mixed = DataFrame(np.random.randn(4, 4),
                                     index=[2, 4, 'null', 8])
        self.panel_mixed = Panel(np.random.randn(4, 4, 4),
                                 items=[2, 4, 'null', 8])

        self.series_ts = Series(np.random.randn(4),
                                index=date_range('20130101', periods=4))
        self.frame_ts = DataFrame(np.random.randn(4, 4),
                                  index=date_range('20130101', periods=4))
        self.panel_ts = Panel(np.random.randn(4, 4, 4),
                              items=date_range('20130101', periods=4))

        dates_rev = (date_range('20130101', periods=4)
                     .sort_values(ascending=False))
        self.series_ts_rev = Series(np.random.randn(4),
                                    index=dates_rev)
        self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
                                      index=dates_rev)
        self.panel_ts_rev = Panel(np.random.randn(4, 4, 4),
                                  items=dates_rev)

        self.frame_empty = DataFrame({})
        self.series_empty = Series({})
        self.panel_empty = Panel({})

        # form agglomerates
        for o in self._objs:

            d = dict()
            for t in self._typs:
                d[t] = getattr(self, '%s_%s' % (o, t), None)

            setattr(self, o, d)
Esempio n. 35
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product(
            [_mklbl("A", 5),
             _mklbl("B", 7),
             _mklbl("C", 4),
             _mklbl("D", 2)])
        df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)

        result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == "A1" or a == "A2" or a == "A3") and (
                c == "C1" or c == "C3")
        ]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == "A1" or a == "A2" or a == "A3") and (
                c == "C1" or c == "C2" or c == "C3")
        ]]
        result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3),
                                        ("B", 1)],
                                       names=["one", "two"])
        columns = MultiIndex.from_tuples(
            [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
            names=["lvl0", "lvl1"],
        )

        df = DataFrame(np.arange(16, dtype="int64").reshape(4, 4),
                       index=index,
                       columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ["foo"])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc["A", "a"]
        expected = DataFrame(
            dict(bar=[1, 5, 9], foo=[0, 4, 8]),
            index=Index([1, 2, 3], name="two"),
            columns=Index(["bar", "foo"], name="lvl1"),
        )
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.to_numpy())), index=ix)
        result = s.loc["A1":"A3", :, ["C1", "C3"]]
        expected = s.loc[[
            tuple([a, b, c, d]) for a, b, c, d in s.index.values
            if (a == "A1" or a == "A2" or a == "A3") and (
                c == "C1" or c == "C3")
        ]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        with pytest.raises(ValueError):
            df.loc[(slice(None), np.array([True, False])), :]

        # ambiguous notation
        # this is interpreted as slicing on both axes (GH #16396)
        result = df.loc[slice(None), [1]]
        expected = df.iloc[:, []]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        assert df.index.lexsort_depth == 2
        df = df.sort_index(level=1, axis=0)
        assert df.index.lexsort_depth == 0

        msg = ("MultiIndex slicing requires the index to be "
               r"lexsorted: slicing on levels \[1\], lexsort depth 0")
        with pytest.raises(UnsortedIndexError, match=msg):
            df.loc[(slice(None), slice("bar")), :]

        # GH 16734: not sorted, but no real slicing
        result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
Esempio n. 36
0
    def setup_method(self, method):

        self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2))
        self.frame_ints = DataFrame(np.random.randn(4, 4),
                                    index=np.arange(0, 8, 2),
                                    columns=np.arange(0, 12, 3))

        self.series_uints = Series(np.random.rand(4),
                                   index=UInt64Index(np.arange(0, 8, 2)))
        self.frame_uints = DataFrame(
            np.random.randn(4, 4),
            index=UInt64Index(range(0, 8, 2)),
            columns=UInt64Index(range(0, 12, 3)),
        )

        self.series_floats = Series(np.random.rand(4),
                                    index=Float64Index(range(0, 8, 2)))
        self.frame_floats = DataFrame(
            np.random.randn(4, 4),
            index=Float64Index(range(0, 8, 2)),
            columns=Float64Index(range(0, 12, 3)),
        )

        m_idces = [
            MultiIndex.from_product([[1, 2], [3, 4]]),
            MultiIndex.from_product([[5, 6], [7, 8]]),
            MultiIndex.from_product([[9, 10], [11, 12]]),
        ]

        self.series_multi = Series(np.random.rand(4), index=m_idces[0])
        self.frame_multi = DataFrame(np.random.randn(4, 4),
                                     index=m_idces[0],
                                     columns=m_idces[1])

        self.series_labels = Series(np.random.randn(4), index=list("abcd"))
        self.frame_labels = DataFrame(np.random.randn(4, 4),
                                      index=list("abcd"),
                                      columns=list("ABCD"))

        self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8])
        self.frame_mixed = DataFrame(np.random.randn(4, 4),
                                     index=[2, 4, "null", 8])

        self.series_ts = Series(np.random.randn(4),
                                index=date_range("20130101", periods=4))
        self.frame_ts = DataFrame(np.random.randn(4, 4),
                                  index=date_range("20130101", periods=4))

        dates_rev = date_range("20130101",
                               periods=4).sort_values(ascending=False)
        self.series_ts_rev = Series(np.random.randn(4), index=dates_rev)
        self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev)

        self.frame_empty = DataFrame()
        self.series_empty = Series()

        # form agglomerates
        for kind in self._kinds:
            d = dict()
            for typ in self._typs:
                d[typ] = getattr(self, "{kind}_{typ}".format(kind=kind,
                                                             typ=typ))

            setattr(self, kind, d)
class PerformanceTestCase(TestCase):
    dr = date_range(start="2015-1-1", end="2015-1-2", name="date")
    tickers = ["A", "B", "C", "D"]
    factor = (
        DataFrame(index=dr, columns=tickers, data=[[1, 2, 3, 4], [4, 3, 2, 1]])
        .stack()
        .rename("factor")
    )
    factor.index = factor.index.set_names(["date", "asset"])
    factor_data = DataFrame(
        {
            "factor": factor,
            "group": Series(
                index=factor.index,
                data=[1, 1, 2, 2, 1, 1, 2, 2],
                dtype="category",
            ),
        }
    )

    @parameterized.expand(
        [
            (
                factor_data,
                [4, 3, 2, 1, 1, 2, 3, 4],
                False,
                False,
                dr,
                [-1.0, -1.0],
            ),
            (
                factor_data,
                [1, 2, 3, 4, 4, 3, 2, 1],
                False,
                False,
                dr,
                [1.0, 1.0],
            ),
            (
                factor_data,
                [1, 2, 3, 4, 4, 3, 2, 1],
                False,
                True,
                MultiIndex.from_product(
                    [dr, Categorical([1, 2])], names=["date", "group"]
                ),
                [1.0, 1.0, 1.0, 1.0],
            ),
            (
                factor_data,
                [1, 2, 3, 4, 4, 3, 2, 1],
                True,
                True,
                MultiIndex.from_product(
                    [dr, Categorical([1, 2])], names=["date", "group"]
                ),
                [1.0, 1.0, 1.0, 1.0],
            ),
        ]
    )
    def test_information_coefficient(
        self,
        factor_data,
        forward_returns,
        group_adjust,
        by_group,
        expected_ix,
        expected_ic_val,
    ):

        factor_data["1D"] = Series(
            index=factor_data.index, data=forward_returns
        )

        ic = factor_information_coefficient(
            factor_data=factor_data,
            group_adjust=group_adjust,
            by_group=by_group,
        )

        expected_ic_df = DataFrame(
            index=expected_ix,
            columns=Index(["1D"], dtype="object"),
            data=expected_ic_val,
        )

        assert_frame_equal(ic, expected_ic_df)

    @parameterized.expand(
        [
            (
                factor_data,
                [4, 3, 2, 1, 1, 2, 3, 4],
                False,
                False,
                "D",
                dr,
                [-1.0, -1.0],
            ),
            (
                factor_data,
                [1, 2, 3, 4, 4, 3, 2, 1],
                False,
                False,
                "W",
                DatetimeIndex(["2015-01-04"], name="date", freq="W-SUN"),
                [1.0],
            ),
            (
                factor_data,
                [1, 2, 3, 4, 4, 3, 2, 1],
                False,
                True,
                None,
                CategoricalIndex([1, 2], name="group"),
                [1.0, 1.0],
            ),
            (
                factor_data,
                [1, 2, 3, 4, 4, 3, 2, 1],
                False,
                True,
                "W",
                MultiIndex.from_product(
                    [
                        DatetimeIndex(
                            ["2015-01-04"], name="date", freq="W-SUN"
                        ),
                        Categorical([1, 2]),
                    ],
                    names=["date", "group"],
                ),
                [1.0, 1.0],
            ),
        ]
    )
    def test_mean_information_coefficient(
        self,
        factor_data,
        forward_returns,
        group_adjust,
        by_group,
        by_time,
        expected_ix,
        expected_ic_val,
    ):

        factor_data["1D"] = Series(
            index=factor_data.index, data=forward_returns
        )

        ic = mean_information_coefficient(
            factor_data,
            group_adjust=group_adjust,
            by_group=by_group,
            by_time=by_time,
        )

        expected_ic_df = DataFrame(
            index=expected_ix,
            columns=Index(["1D"], dtype="object"),
            data=expected_ic_val,
        )

        assert_frame_equal(ic, expected_ic_df)

    @parameterized.expand(
        [
            (
                [1.1, 1.2, 1.1, 1.2, 1.1, 1.2],
                [[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]],
                2,
                False,
                [0.1, 0.2],
            ),
            (
                [1.1, 1.2, 1.1, 1.2, 1.1, 1.2],
                [[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]],
                2,
                True,
                [0.1, 0.1, 0.2, 0.2],
            ),
            (
                [1.1, 1.1, 1.1, 1.2, 1.2, 1.2],
                [[1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3]],
                3,
                False,
                [0.15, 0.15, 0.15],
            ),
            (
                [1.1, 1.1, 1.1, 1.2, 1.2, 1.2],
                [[1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3]],
                3,
                True,
                [0.1, 0.2, 0.1, 0.2, 0.1, 0.2],
            ),
            (
                [1.5, 1.5, 1.2, 1.0, 1.0, 1.0],
                [[1, 1, 2, 2, 2, 2], [2, 2, 1, 2, 2, 2], [2, 2, 1, 2, 2, 2]],
                2,
                False,
                [0.3, 0.15],
            ),
            (
                [1.5, 1.5, 1.2, 1.0, 1.0, 1.0],
                [[1, 1, 3, 2, 2, 2], [3, 3, 1, 2, 2, 2], [3, 3, 1, 2, 2, 2]],
                3,
                False,
                [0.3, 0.0, 0.4],
            ),
            (
                [1.6, 1.6, 1.0, 1.0, 1.0, 1.0],
                [[1, 1, 2, 2, 2, 2], [2, 2, 1, 1, 1, 1], [2, 2, 1, 1, 1, 1]],
                2,
                False,
                [0.2, 0.4],
            ),
            (
                [1.6, 1.6, 1.0, 1.6, 1.6, 1.0],
                [[1, 1, 2, 1, 1, 2], [2, 2, 1, 2, 2, 1], [2, 2, 1, 2, 2, 1]],
                2,
                True,
                [0.2, 0.2, 0.4, 0.4],
            ),
        ]
    )
    def test_mean_return_by_quantile(
        self, daily_rets, factor, bins, by_group, expected_data
    ):
        """
        Test mean_return_by_quantile
        """
        tickers = ["A", "B", "C", "D", "E", "F"]

        factor_groups = {"A": 1, "B": 1, "C": 1, "D": 2, "E": 2, "F": 2}

        price_data = [
            [
                daily_rets[0] ** i,
                daily_rets[1] ** i,
                daily_rets[2] ** i,
                daily_rets[3] ** i,
                daily_rets[4] ** i,
                daily_rets[5] ** i,
            ]
            for i in range(1, 5)
        ]  # 4 days

        start = "2015-1-11"
        factor_end = "2015-1-13"
        price_end = "2015-1-14"  # 1D fwd returns

        price_index = date_range(start=start, end=price_end)
        price_index.name = "date"
        prices = DataFrame(index=price_index, columns=tickers, data=price_data)

        factor_index = date_range(start=start, end=factor_end)
        factor_index.name = "date"
        factor = DataFrame(
            index=factor_index, columns=tickers, data=factor
        ).stack()

        factor_data = get_clean_factor_and_forward_returns(
            factor,
            prices,
            groupby=factor_groups,
            quantiles=None,
            bins=bins,
            periods=(1,),
        )

        mean_quant_ret, std_quantile = mean_return_by_quantile(
            factor_data,
            by_date=False,
            by_group=by_group,
            demeaned=False,
            group_adjust=False,
        )

        expected = DataFrame(
            index=mean_quant_ret.index.copy(),
            columns=mean_quant_ret.columns.copy(),
            data=expected_data,
        )
        expected.index.name = "factor_quantile"

        assert_frame_equal(mean_quant_ret, expected)

    @parameterized.expand(
        [
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                4.0,
                1,
                [nan, 1.0, 1.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                4.0,
                1,
                [nan, 1.0, 1.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                4.0,
                2,
                [nan, nan, 0.0, 1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                4.0,
                2,
                [nan, nan, 0.0, 1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                4.0,
                3,
                [nan, nan, nan, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                4.0,
                3,
                [nan, nan, nan, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                3.0,
                1,
                [nan, 0.0, 0.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                3.0,
                1,
                [nan, 0.0, 0.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                3.0,
                2,
                [nan, nan, 0.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                3.0,
                2,
                [nan, nan, 0.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                3.0,
                3,
                [nan, nan, nan, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                3.0,
                3,
                [nan, nan, nan, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                ],
                "1B",
                2.0,
                1,
                [nan, 1.0, 1.0, 1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                ],
                "1D",
                2.0,
                1,
                [nan, 1.0, 1.0, 1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                ],
                "1B",
                3.0,
                4,
                [nan, nan, nan, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                ],
                "1D",
                3.0,
                4,
                [nan, nan, nan, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                3.0,
                10,
                [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 0.0, 1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 3.0, 2.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                3.0,
                10,
                [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 0.0, 1.0],
            ),
        ]
    )
    def test_quantile_turnover(
        self, quantile_values, freq, test_quantile, period, expected_vals
    ):

        dr = date_range(
            start="2015-1-1",
            periods=len(quantile_values),
            freq=freq,
            name="date",
        )
        tickers = ["A", "B", "C", "D"]

        quantized_test_factor = (
            DataFrame(index=dr, columns=tickers, data=quantile_values)
            .rename_axis("asset", axis=1)
            .stack()
        )

        to = quantile_turnover(quantized_test_factor, test_quantile, period)

        expected = Series(
            index=quantized_test_factor.index.levels[0], data=expected_vals
        ).rename(test_quantile)

        assert_series_equal(to, expected)

    @parameterized.expand(
        [
            (
                [[3, 4, 2, 1, nan], [3, 4, -2, -1, nan], [3, nan, nan, 1, 4]],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                False,
                False,
                False,
                [
                    0.30,
                    0.40,
                    0.20,
                    0.10,
                    0.30,
                    0.40,
                    -0.20,
                    -0.10,
                    0.375,
                    0.125,
                    0.50,
                ],
            ),
            (
                [[3, 4, 2, 1, nan], [3, 4, -2, -1, nan], [3, nan, nan, 1, 4]],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                True,
                False,
                False,
                [
                    0.125,
                    0.375,
                    -0.125,
                    -0.375,
                    0.20,
                    0.30,
                    -0.30,
                    -0.20,
                    0.10,
                    -0.50,
                    0.40,
                ],
            ),
            (
                [[3, 4, 2, 1, nan], [-3, 4, -2, 1, nan], [2, 2, 2, 3, 1]],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                False,
                True,
                False,
                [
                    0.30,
                    0.40,
                    0.20,
                    0.10,
                    -0.30,
                    0.40,
                    -0.20,
                    0.10,
                    0.20,
                    0.20,
                    0.20,
                    0.30,
                    0.10,
                ],
            ),
            (
                [[3, 4, 2, 1, nan], [3, 4, -2, -1, nan], [3, nan, nan, 1, 4]],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                True,
                True,
                False,
                [
                    0.25,
                    0.25,
                    -0.25,
                    -0.25,
                    0.25,
                    0.25,
                    -0.25,
                    -0.25,
                    -0.50,
                    nan,
                    0.50,
                ],
            ),
            (
                [[3, 4, 2, 1, 5], [3, 4, -2, -1, 5], [3, nan, nan, 1, nan]],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                False,
                False,
                True,
                [
                    0.20,
                    0.20,
                    0.20,
                    0.20,
                    0.20,
                    0.20,
                    0.20,
                    -0.20,
                    -0.20,
                    0.20,
                    0.50,
                    0.50,
                ],
            ),
            (
                [[1, 4, 2, 3, nan], [1, 4, -2, -3, nan], [3, nan, nan, 2, 7]],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                True,
                False,
                True,
                [
                    -0.25,
                    0.25,
                    -0.25,
                    0.25,
                    0.25,
                    0.25,
                    -0.25,
                    -0.25,
                    0.0,
                    -0.50,
                    0.50,
                ],
            ),
            (
                [
                    [3, 4, 2, 1, nan],
                    [-3, 4, -2, 1, nan],
                    [3, nan, nan, 1, 4],
                    [3, nan, nan, -1, 4],
                    [3, nan, nan, 1, -4],
                ],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                False,
                True,
                True,
                [
                    0.25,
                    0.25,
                    0.25,
                    0.25,
                    -0.25,
                    0.25,
                    -0.25,
                    0.25,
                    0.25,
                    0.50,
                    0.25,
                    0.25,
                    -0.50,
                    0.25,
                    0.25,
                    0.50,
                    -0.25,
                ],
            ),
            (
                [
                    [1, 4, 2, 3, nan],
                    [3, 4, -2, -1, nan],
                    [3, nan, nan, 2, 7],
                    [3, nan, nan, 2, -7],
                ],
                ["A", "B", "C", "D", "E"],
                {
                    "A": "Group1",
                    "B": "Group2",
                    "C": "Group1",
                    "D": "Group2",
                    "E": "Group1",
                },
                True,
                True,
                True,
                [
                    -0.25,
                    0.25,
                    0.25,
                    -0.25,
                    0.25,
                    0.25,
                    -0.25,
                    -0.25,
                    -0.50,
                    nan,
                    0.50,
                    0.50,
                    nan,
                    -0.50,
                ],
            ),
        ]
    )
    def test_factor_weights(
        self,
        factor_vals,
        tickers,
        groups,
        demeaned,
        group_adjust,
        equal_weight,
        expected_vals,
    ):

        index = date_range("1/12/2000", periods=len(factor_vals))
        factor = DataFrame(
            index=index, columns=tickers, data=factor_vals
        ).stack()
        factor.index = factor.index.set_names(["date", "asset"])
        factor.name = "factor"

        factor_data = DataFrame()
        factor_data["factor"] = factor
        groups = Series(groups)
        factor_data["group"] = Series(
            index=factor.index,
            data=groups[factor.index.get_level_values("asset")].values,
        )

        weights = factor_weights(
            factor_data, demeaned, group_adjust, equal_weight
        )

        expected = Series(
            data=expected_vals, index=factor_data.index, name="factor"
        )

        assert_series_equal(weights, expected)

    @parameterized.expand(
        [
            (
                [1, 2, 3, 4, 4, 3, 2, 1],
                [4, 3, 2, 1, 1, 2, 3, 4],
                False,
                [-1.25000, -1.25000],
            ),
            (
                [1, 1, 1, 1, 1, 1, 1, 1],
                [4, 3, 2, 1, 1, 2, 3, 4],
                False,
                [nan, nan],
            ),
            (
                [1, 2, 3, 4, 4, 3, 2, 1],
                [4, 3, 2, 1, 1, 2, 3, 4],
                True,
                [-0.5, -0.5],
            ),
            (
                [1, 2, 3, 4, 1, 2, 3, 4],
                [1, 4, 1, 2, 1, 2, 2, 1],
                True,
                [1.0, 0.0],
            ),
            (
                [1, 1, 1, 1, 1, 1, 1, 1],
                [4, 3, 2, 1, 1, 2, 3, 4],
                True,
                [nan, nan],
            ),
        ]
    )
    def test_factor_returns(
        self, factor_vals, fwd_return_vals, group_adjust, expected_vals
    ):

        factor_data = self.factor_data.copy()
        factor_data["1D"] = fwd_return_vals
        factor_data["factor"] = factor_vals

        factor_returns_s = factor_returns(
            factor_data=factor_data, demeaned=True, group_adjust=group_adjust
        )

        expected = DataFrame(
            index=self.dr,
            data=expected_vals,
            columns=get_forward_returns_columns(factor_data.columns),
        )

        assert_frame_equal(factor_returns_s, expected)

    @parameterized.expand([([1, 2, 3, 4, 1, 1, 1, 1], -1, 5.0 / 6.0)])
    def test_factor_alpha_beta(self, fwd_return_vals, alpha, beta):

        factor_data = self.factor_data.copy()
        factor_data["1D"] = fwd_return_vals

        ab = factor_alpha_beta(factor_data=factor_data)

        expected = DataFrame(
            columns=["1D"], index=["Ann. alpha", "beta"], data=[alpha, beta]
        )

        assert_frame_equal(ab, expected)

    @parameterized.expand(
        [
            (
                [1.0, 0.5, 1.0, 0.5, 0.5],
                "1D",
                "1D",
                [2.0, 3.0, 6.0, 9.0, 13.50],
            ),
            (
                [0.1, 0.1, 0.1, 0.1, 0.1],
                "1D",
                "1D",
                [1.1, 1.21, 1.331, 1.4641, 1.61051],
            ),
            (
                [-0.1, -0.1, -0.1, -0.1, -0.1],
                "1D",
                "1D",
                [0.9, 0.81, 0.729, 0.6561, 0.59049],
            ),
            (
                [1.0, 0.5, 1.0, 0.5, 0.5],
                "1B",
                "1D",
                [2.0, 3.0, 6.0, 9.0, 13.50],
            ),
            (
                [0.1, 0.1, 0.1, 0.1, 0.1],
                "1B",
                "1D",
                [1.1, 1.21, 1.331, 1.4641, 1.61051],
            ),
            (
                [-0.1, -0.1, -0.1, -0.1, -0.1],
                "1B",
                "1D",
                [0.9, 0.81, 0.729, 0.6561, 0.59049],
            ),
            (
                [1.0, 0.5, 1.0, 0.5, 0.5],
                "1CD",
                "1D",
                [2.0, 3.0, 6.0, 9.0, 13.50],
            ),
            (
                [0.1, 0.1, 0.1, 0.1, 0.1],
                "1CD",
                "1D",
                [1.1, 1.21, 1.331, 1.4641, 1.61051],
            ),
            (
                [-0.1, -0.1, -0.1, -0.1, -0.1],
                "1CD",
                "1D",
                [0.9, 0.81, 0.729, 0.6561, 0.59049],
            ),
        ]
    )
    def test_cumulative_returns(
        self, returns, ret_freq, period_len, expected_vals
    ):
        if "CD" in ret_freq:
            ret_freq_class = CDay(weekmask="Tue Wed Thu Fri Sun")
            ret_freq = ret_freq_class
        elif "B" in ret_freq:
            ret_freq_class = BDay()
        else:
            ret_freq_class = Day()

        period_len = Timedelta(period_len)
        index = date_range("1/1/1999", periods=len(returns), freq=ret_freq)
        returns = Series(returns, index=index)

        cum_ret = cumulative_returns(returns)

        expected = Series(expected_vals, index=cum_ret.index)

        assert_series_equal(cum_ret, expected)

    @parameterized.expand(
        [
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                1,
                [nan, 1.0, 1.0, 1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                1,
                [nan, 1.0, 1.0, 1.0],
            ),
            (
                [
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1B",
                1,
                [nan, -1.0, -1.0, -1.0],
            ),
            (
                [
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                ],
                "1D",
                1,
                [nan, -1.0, -1.0, -1.0],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                ],
                "1B",
                3,
                [
                    nan,
                    nan,
                    nan,
                    1.0,
                    1.0,
                    1.0,
                    0.6,
                    -0.6,
                    -1.0,
                    1.0,
                    -0.6,
                    -1.0,
                ],
            ),
            (
                [
                    [1.0, 2.0, 3.0, 4.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                    [1.0, 2.0, 3.0, 4.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [2.0, 1.0, 4.0, 3.0],
                    [4.0, 3.0, 2.0, 1.0],
                ],
                "1D",
                3,
                [
                    nan,
                    nan,
                    nan,
                    1.0,
                    1.0,
                    1.0,
                    0.6,
                    -0.6,
                    -1.0,
                    1.0,
                    -0.6,
                    -1.0,
                ],
            ),
        ]
    )
    def test_factor_rank_autocorrelation(
        self, factor_values, freq, period, expected_vals
    ):

        dr = date_range(
            start="2015-1-1",
            periods=len(factor_values),
            freq=freq,
            name="date",
        )

        tickers = ["A", "B", "C", "D"]
        factor = (
            DataFrame(index=dr, columns=tickers, data=factor_values)
            .rename_axis("asset", axis=1)
            .stack()
        )

        factor_df = DataFrame(data=factor, columns=["factor"])

        fa = factor_rank_autocorrelation(factor_df, period)
        expected = Series(index=dr, data=expected_vals)
        expected.name = period

        assert_series_equal(fa, expected)

    @parameterized.expand(
        [
            (
                2,
                3,
                False,
                False,
                [
                    [4.93048307, 8.68843922],
                    [6.60404312, 12.22369139],
                    [8.92068367, 17.1794088],
                    [12.1275523, 24.12861778],
                    [16.5694159, 33.8740100],
                    [22.7273233, 47.53995233],
                ],
            ),
            (
                3,
                2,
                False,
                True,
                [
                    [0.0, 5.63219176],
                    [0.0, 7.96515233],
                    [0.0, 11.2420646],
                    [0.0, 15.8458720],
                    [0.0, 22.3134160],
                    [0.0, 31.3970961],
                ],
            ),
            (
                3,
                5,
                True,
                False,
                [
                    [3.7228318, 2.6210478],
                    [4.9304831, 3.6296796],
                    [6.6040431, 5.0193734],  # noqa
                    [8.9206837, 6.9404046],
                    [12.127552, 9.6023405],
                    [16.569416, 13.297652],  # noqa
                    [22.727323, 18.434747],
                    [31.272682, 25.584180],
                    [34.358565, 25.497254],
                ],  # noqa
            ),
            (
                1,
                4,
                True,
                True,
                [
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                ],
            ),
            (
                6,
                6,
                False,
                False,
                [
                    [2.02679565, 2.38468223],
                    [2.38769454, 3.22602748],
                    [2.85413029, 4.36044469],
                    [3.72283181, 6.16462715],
                    [4.93048307, 8.68843922],
                    [6.60404312, 12.2236914],
                    [8.92068367, 17.1794088],
                    [12.1275523, 24.1286178],
                    [16.5694159, 33.8740100],
                    [22.7273233, 47.5399523],
                    [31.2726821, 66.7013483],
                    [34.3585654, 70.1828776],
                    [37.9964585, 74.3294620],
                ],
            ),
            (
                6,
                6,
                False,
                True,
                [
                    [0.0, 2.20770299],
                    [0.0, 2.95942924],
                    [0.0, 3.97022414],
                    [0.0, 5.63219176],
                    [0.0, 7.96515233],
                    [0.0, 11.2420646],
                    [0.0, 15.8458720],
                    [0.0, 22.3134160],
                    [0.0, 31.3970962],
                    [0.0, 44.1512888],
                    [0.0, 62.0533954],
                    [0.0, 65.8668371],
                    [0.0, 70.4306483],
                ],
            ),
            (
                6,
                6,
                True,
                False,
                [
                    [2.0267957, 0.9562173],
                    [2.3876945, 1.3511898],
                    [2.8541303, 1.8856194],  # noqa
                    [3.7228318, 2.6210478],
                    [4.9304831, 3.6296796],
                    [6.6040431, 5.0193734],  # noqa
                    [8.9206837, 6.9404046],
                    [12.127552, 9.6023405],
                    [16.569416, 13.297652],  # noqa
                    [22.727323, 18.434747],
                    [31.272682, 25.584180],
                    [34.358565, 25.497254],  # noqa
                    [37.996459, 25.198051],
                ],
            ),
            (
                6,
                6,
                True,
                True,
                [
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                    [0.0, 0.0],
                ],
            ),
        ]
    )
    def test_common_start_returns(
        self, before, after, mean_by_date, demeaned, expected_vals
    ):
        dr = date_range(start="2015-1-17", end="2015-2-2")
        dr.name = "date"
        tickers = ["A", "B", "C", "D"]
        r1, r2, r3, r4 = (1.20, 1.40, 0.90, 0.80)
        data = [[r1 ** i, r2 ** i, r3 ** i, r4 ** i] for i in range(1, 18)]
        returns = DataFrame(data=data, index=dr, columns=tickers)
        dr2 = date_range(start="2015-1-21", end="2015-1-29")
        factor = DataFrame(
            index=dr2,
            columns=tickers,
            data=[
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
            ],
        ).stack()
        factor.index = factor.index.set_names(["date", "asset"])
        factor.name = "factor"

        cmrt = common_start_returns(
            factor,
            returns,
            before,
            after,
            cumulative=True,
            mean_by_date=mean_by_date,
            demean_by=factor if demeaned else None,
        )
        cmrt = DataFrame({"mean": cmrt.mean(axis=1), "std": cmrt.std(axis=1)})
        expected = DataFrame(
            index=range(-before, after + 1),
            columns=["mean", "std"],
            data=expected_vals,
        )
        assert_frame_equal(cmrt, expected)

    @parameterized.expand(
        [
            (
                1,
                2,
                False,
                4,
                [
                    [0.00512695, 0.00256348, 0.00128174, 6.40869e-4],
                    [0.00579185, 0.00289592, 0.00144796, 7.23981e-4],
                    [1.00000000, 1.00000000, 1.00000000, 1.00000000],
                    [0.00000000, 0.00000000, 0.00000000, 0.00000000],
                    [7.15814531, 8.94768164, 11.1846020, 13.9807526],
                    [2.93784787, 3.67230984, 4.59038730, 5.73798413],
                    [39.4519043, 59.1778564, 88.7667847, 133.150177],
                    [28.3717330, 42.5575995, 63.8363992, 95.7545989],
                ],
            ),
            (
                1,
                2,
                True,
                4,
                [
                    [-11.898667, -17.279462, -25.236885, -37.032252],
                    [7.82587034, 11.5529583, 17.0996881, 25.3636472],
                    [-10.903794, -16.282025, -24.238167, -36.032893],
                    [7.82140124, 11.5507268, 17.0985737, 25.3630906],
                    [-4.7456488, -8.3343438, -14.053565, -23.052140],
                    [4.91184665, 7.91180853, 12.5481552, 19.6734224],
                    [27.5481102, 41.8958311, 63.5286176, 96.1172844],
                    [20.5510133, 31.0075980, 46.7385910, 70.3923129],
                ],
            ),
            (
                3,
                0,
                False,
                4,
                [
                    [7.0, 3.0, 1.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0],
                    [-0.488, -0.36, -0.2, 0.0],
                    [0.0, 0.0, 0.0, 0.0],
                    [-0.703704, -0.55555555, -0.333333333, 0.0],
                    [0.0, 0.0, 0.0, 0.0],
                ],
            ),
            (
                0,
                3,
                True,
                4,
                [
                    [-17.279462, -25.236885, -37.032252, -54.550061],
                    [11.5529583, 17.0996881, 25.3636472, 37.6887906],
                    [-16.282025, -24.238167, -36.032893, -53.550382],
                    [11.5507268, 17.0985737, 25.3630906, 37.6885125],
                    [-8.3343438, -14.053565, -23.052140, -37.074441],
                    [7.91180853, 12.5481552, 19.6734224, 30.5748605],
                    [41.8958311, 63.5286176, 96.1172844, 145.174884],
                    [31.0075980, 46.7385910, 70.3923129, 105.944230],
                ],
            ),
            (
                3,
                3,
                False,
                2,
                [
                    [
                        0.5102539,
                        0.50512695,
                        0.50256348,
                        0.50128174,
                        0.50064087,
                        0.50032043,
                        0.50016022,
                    ],  # noqa
                    [
                        0.0115837,
                        0.00579185,
                        0.00289592,
                        1.44796e-3,
                        7.23981e-4,
                        3.61990e-4,
                        1.80995e-4,
                    ],  # noqa
                    [
                        11.057696,
                        16.0138929,
                        23.3050248,
                        34.0627690,
                        49.9756934,
                        73.5654648,
                        108.600603,
                    ],  # noqa
                    [
                        7.2389454,
                        10.6247239,
                        15.6450367,
                        23.1025693,
                        34.1977045,
                        50.7264595,
                        75.3771641,
                    ],
                ],  # noqa
            ),
            (
                3,
                3,
                True,
                2,
                [
                    [
                        -5.273721,
                        -7.754383,
                        -11.40123,
                        -16.78074,
                        -24.73753,
                        -36.53257,
                        -54.05022,
                    ],  # noqa
                    [
                        3.6239580,
                        5.3146000,
                        7.8236356,
                        11.551843,
                        17.099131,
                        25.363369,
                        37.688652,
                    ],  # noqa
                    [
                        5.2737212,
                        7.7543830,
                        11.401231,
                        16.780744,
                        24.737526,
                        36.532572,
                        54.050221,
                    ],  # noqa
                    [
                        3.6239580,
                        5.3146000,
                        7.8236356,
                        11.551843,
                        17.099131,
                        25.363369,
                        37.688652,
                    ],
                ],  # noqa
            ),
        ]
    )
    def test_average_cumulative_return_by_quantile(
        self, before, after, demeaned, quantiles, expected_vals
    ):
        dr = date_range(start="2015-1-15", end="2015-2-1")
        dr.name = "date"
        tickers = ["A", "B", "C", "D"]
        r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50)
        data = [[r1 ** i, r2 ** i, r3 ** i, r4 ** i] for i in range(1, 19)]
        returns = DataFrame(index=dr, columns=tickers, data=data)
        dr2 = date_range(start="2015-1-21", end="2015-1-26")
        dr2.name = "date"
        factor = DataFrame(
            index=dr2,
            columns=tickers,
            data=[
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
                [3, 4, 2, 1],
            ],
        ).stack()

        factor_data = get_clean_factor_and_forward_returns(
            factor,
            returns,
            quantiles=quantiles,
            periods=range(0, after + 1),
            filter_zscore=False,
        )

        avgrt = average_cumulative_return_by_quantile(
            factor_data, returns, before, after, demeaned
        )
        arrays = []
        for q in range(1, quantiles + 1):
            arrays.append((q, "mean"))
            arrays.append((q, "std"))
        index = MultiIndex.from_tuples(arrays, names=["factor_quantile", None])
        expected = DataFrame(
            index=index, columns=range(-before, after + 1), data=expected_vals
        )
        assert_frame_equal(avgrt, expected)

    @parameterized.expand(
        [
            (
                0,
                2,
                False,
                4,
                [
                    [0.0292969, 0.0146484, 7.32422e-3],
                    [0.0241851, 0.0120926, 6.04628e-3],
                    [1.0000000, 1.0000000, 1.00000000],
                    [0.0000000, 0.0000000, 0.00000000],
                    [3.5190582, 4.3988228, 5.49852848],
                    [1.0046375, 1.2557969, 1.56974616],
                    [10.283203, 15.424805, 23.1372070],
                    [5.2278892, 7.8418338, 11.7627508],
                ],
            ),
            (
                0,
                3,
                True,
                4,
                [
                    [-3.6785927, -5.1949205, -7.4034407, -10.641996],
                    [1.57386873, 2.28176590, 3.33616491, 4.90228915],
                    [-2.7078896, -4.2095690, -6.4107649, -9.6456583],
                    [1.55205002, 2.27087143, 3.33072273, 4.89956999],
                    [-0.1888313, -0.8107462, -1.9122365, -3.7724977],
                    [0.55371389, 1.02143924, 1.76795263, 2.94536298],
                    [6.57531357, 10.2152357, 15.7264421, 24.0601522],
                    [3.67596914, 5.57112656, 8.43221341, 12.7447568],
                ],
            ),
            (
                0,
                3,
                False,
                2,
                [
                    [0.51464844, 0.50732422, 0.50366211, 0.50183105],
                    [0.01209256, 0.00604628, 0.00302314, 0.00151157],
                    [6.90113068, 9.91181374, 14.3178678, 20.7894856],
                    [3.11499629, 4.54718783, 6.66416616, 9.80049950],
                ],
            ),
            (
                0,
                3,
                True,
                2,
                [
                    [-3.1932411, -4.7022448, -6.9071028, -10.143827],
                    [1.56295067, 2.27631715, 3.33344356, 4.90092953],
                    [3.19324112, 4.70224476, 6.90710282, 10.1438273],
                    [1.56295067, 2.27631715, 3.33344356, 4.90092953],
                ],
            ),
        ]
    )
    def test_average_cumulative_return_by_quantile_2(
        self, before, after, demeaned, quantiles, expected_vals
    ):
        """
        Test varying factor asset universe: at different dates there might be
        different assets
        """
        dr = date_range(start="2015-1-15", end="2015-1-25")
        dr.name = "date"
        tickers = ["A", "B", "C", "D", "E", "F"]
        r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50)
        data = [
            [r1 ** i, r2 ** i, r3 ** i, r4 ** i, r2 ** i, r3 ** i]
            for i in range(1, 12)
        ]
        prices = DataFrame(index=dr, columns=tickers, data=data)
        dr2 = date_range(start="2015-1-18", end="2015-1-21")
        dr2.name = "date"
        factor = DataFrame(
            index=dr2,
            columns=tickers,
            data=[
                [3, 4, 2, 1, nan, nan],
                [3, 4, 2, 1, nan, nan],
                [3, nan, nan, 1, 4, 2],
                [3, nan, nan, 1, 4, 2],
            ],
        ).stack()

        factor_data = get_clean_factor_and_forward_returns(
            factor,
            prices,
            quantiles=quantiles,
            periods=range(0, after + 1),
            filter_zscore=False,
        )

        avgrt = average_cumulative_return_by_quantile(
            factor_data, prices, before, after, demeaned
        )
        arrays = []
        for q in range(1, quantiles + 1):
            arrays.append((q, "mean"))
            arrays.append((q, "std"))
        index = MultiIndex.from_tuples(arrays, names=["factor_quantile", None])
        expected = DataFrame(
            index=index, columns=range(-before, after + 1), data=expected_vals
        )
        assert_frame_equal(avgrt, expected)
Esempio n. 38
0
def makeMultiIndex(k=10, names=None, **kwargs):
    return MultiIndex.from_product((("foo", "bar"), (1, 2)),
                                   names=names,
                                   **kwargs)
Esempio n. 39
0
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
    # GH7774
    idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
    assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
    assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
Esempio n. 40
0
def test_repr_roundtrip_raises():
    mi = MultiIndex.from_product([list('ab'), range(3)],
                                 names=['first', 'second'])
    with pytest.raises(TypeError):
        eval(repr(mi))
Esempio n. 41
0
    def test_to_html_multiindex_odd_even_truncate(self):
        # GH 14882 - Issue on truncation with odd length DataFrame
        mi = MultiIndex.from_product([[100, 200, 300],
                                      [10, 20, 30],
                                      [1, 2, 3, 4, 5, 6, 7]],
                                     names=['a', 'b', 'c'])
        df = DataFrame({'n': range(len(mi))}, index=mi)
        result = df.to_html(max_rows=60)
        expected = """\
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th></th>
      <th></th>
      <th>n</th>
    </tr>
    <tr>
      <th>a</th>
      <th>b</th>
      <th>c</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th rowspan="21" valign="top">100</th>
      <th rowspan="7" valign="top">10</th>
      <th>1</th>
      <td>0</td>
    </tr>
    <tr>
      <th>2</th>
      <td>1</td>
    </tr>
    <tr>
      <th>3</th>
      <td>2</td>
    </tr>
    <tr>
      <th>4</th>
      <td>3</td>
    </tr>
    <tr>
      <th>5</th>
      <td>4</td>
    </tr>
    <tr>
      <th>6</th>
      <td>5</td>
    </tr>
    <tr>
      <th>7</th>
      <td>6</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">20</th>
      <th>1</th>
      <td>7</td>
    </tr>
    <tr>
      <th>2</th>
      <td>8</td>
    </tr>
    <tr>
      <th>3</th>
      <td>9</td>
    </tr>
    <tr>
      <th>4</th>
      <td>10</td>
    </tr>
    <tr>
      <th>5</th>
      <td>11</td>
    </tr>
    <tr>
      <th>6</th>
      <td>12</td>
    </tr>
    <tr>
      <th>7</th>
      <td>13</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">30</th>
      <th>1</th>
      <td>14</td>
    </tr>
    <tr>
      <th>2</th>
      <td>15</td>
    </tr>
    <tr>
      <th>3</th>
      <td>16</td>
    </tr>
    <tr>
      <th>4</th>
      <td>17</td>
    </tr>
    <tr>
      <th>5</th>
      <td>18</td>
    </tr>
    <tr>
      <th>6</th>
      <td>19</td>
    </tr>
    <tr>
      <th>7</th>
      <td>20</td>
    </tr>
    <tr>
      <th rowspan="19" valign="top">200</th>
      <th rowspan="7" valign="top">10</th>
      <th>1</th>
      <td>21</td>
    </tr>
    <tr>
      <th>2</th>
      <td>22</td>
    </tr>
    <tr>
      <th>3</th>
      <td>23</td>
    </tr>
    <tr>
      <th>4</th>
      <td>24</td>
    </tr>
    <tr>
      <th>5</th>
      <td>25</td>
    </tr>
    <tr>
      <th>6</th>
      <td>26</td>
    </tr>
    <tr>
      <th>7</th>
      <td>27</td>
    </tr>
    <tr>
      <th rowspan="5" valign="top">20</th>
      <th>1</th>
      <td>28</td>
    </tr>
    <tr>
      <th>2</th>
      <td>29</td>
    </tr>
    <tr>
      <th>...</th>
      <td>...</td>
    </tr>
    <tr>
      <th>6</th>
      <td>33</td>
    </tr>
    <tr>
      <th>7</th>
      <td>34</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">30</th>
      <th>1</th>
      <td>35</td>
    </tr>
    <tr>
      <th>2</th>
      <td>36</td>
    </tr>
    <tr>
      <th>3</th>
      <td>37</td>
    </tr>
    <tr>
      <th>4</th>
      <td>38</td>
    </tr>
    <tr>
      <th>5</th>
      <td>39</td>
    </tr>
    <tr>
      <th>6</th>
      <td>40</td>
    </tr>
    <tr>
      <th>7</th>
      <td>41</td>
    </tr>
    <tr>
      <th rowspan="21" valign="top">300</th>
      <th rowspan="7" valign="top">10</th>
      <th>1</th>
      <td>42</td>
    </tr>
    <tr>
      <th>2</th>
      <td>43</td>
    </tr>
    <tr>
      <th>3</th>
      <td>44</td>
    </tr>
    <tr>
      <th>4</th>
      <td>45</td>
    </tr>
    <tr>
      <th>5</th>
      <td>46</td>
    </tr>
    <tr>
      <th>6</th>
      <td>47</td>
    </tr>
    <tr>
      <th>7</th>
      <td>48</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">20</th>
      <th>1</th>
      <td>49</td>
    </tr>
    <tr>
      <th>2</th>
      <td>50</td>
    </tr>
    <tr>
      <th>3</th>
      <td>51</td>
    </tr>
    <tr>
      <th>4</th>
      <td>52</td>
    </tr>
    <tr>
      <th>5</th>
      <td>53</td>
    </tr>
    <tr>
      <th>6</th>
      <td>54</td>
    </tr>
    <tr>
      <th>7</th>
      <td>55</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">30</th>
      <th>1</th>
      <td>56</td>
    </tr>
    <tr>
      <th>2</th>
      <td>57</td>
    </tr>
    <tr>
      <th>3</th>
      <td>58</td>
    </tr>
    <tr>
      <th>4</th>
      <td>59</td>
    </tr>
    <tr>
      <th>5</th>
      <td>60</td>
    </tr>
    <tr>
      <th>6</th>
      <td>61</td>
    </tr>
    <tr>
      <th>7</th>
      <td>62</td>
    </tr>
  </tbody>
</table>"""
        self.assertEqual(result, expected)

        # Test that ... appears in a middle level
        result = df.to_html(max_rows=56)
        expected = """\
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th></th>
      <th></th>
      <th>n</th>
    </tr>
    <tr>
      <th>a</th>
      <th>b</th>
      <th>c</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th rowspan="21" valign="top">100</th>
      <th rowspan="7" valign="top">10</th>
      <th>1</th>
      <td>0</td>
    </tr>
    <tr>
      <th>2</th>
      <td>1</td>
    </tr>
    <tr>
      <th>3</th>
      <td>2</td>
    </tr>
    <tr>
      <th>4</th>
      <td>3</td>
    </tr>
    <tr>
      <th>5</th>
      <td>4</td>
    </tr>
    <tr>
      <th>6</th>
      <td>5</td>
    </tr>
    <tr>
      <th>7</th>
      <td>6</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">20</th>
      <th>1</th>
      <td>7</td>
    </tr>
    <tr>
      <th>2</th>
      <td>8</td>
    </tr>
    <tr>
      <th>3</th>
      <td>9</td>
    </tr>
    <tr>
      <th>4</th>
      <td>10</td>
    </tr>
    <tr>
      <th>5</th>
      <td>11</td>
    </tr>
    <tr>
      <th>6</th>
      <td>12</td>
    </tr>
    <tr>
      <th>7</th>
      <td>13</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">30</th>
      <th>1</th>
      <td>14</td>
    </tr>
    <tr>
      <th>2</th>
      <td>15</td>
    </tr>
    <tr>
      <th>3</th>
      <td>16</td>
    </tr>
    <tr>
      <th>4</th>
      <td>17</td>
    </tr>
    <tr>
      <th>5</th>
      <td>18</td>
    </tr>
    <tr>
      <th>6</th>
      <td>19</td>
    </tr>
    <tr>
      <th>7</th>
      <td>20</td>
    </tr>
    <tr>
      <th rowspan="15" valign="top">200</th>
      <th rowspan="7" valign="top">10</th>
      <th>1</th>
      <td>21</td>
    </tr>
    <tr>
      <th>2</th>
      <td>22</td>
    </tr>
    <tr>
      <th>3</th>
      <td>23</td>
    </tr>
    <tr>
      <th>4</th>
      <td>24</td>
    </tr>
    <tr>
      <th>5</th>
      <td>25</td>
    </tr>
    <tr>
      <th>6</th>
      <td>26</td>
    </tr>
    <tr>
      <th>7</th>
      <td>27</td>
    </tr>
    <tr>
      <th>...</th>
      <th>...</th>
      <td>...</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">30</th>
      <th>1</th>
      <td>35</td>
    </tr>
    <tr>
      <th>2</th>
      <td>36</td>
    </tr>
    <tr>
      <th>3</th>
      <td>37</td>
    </tr>
    <tr>
      <th>4</th>
      <td>38</td>
    </tr>
    <tr>
      <th>5</th>
      <td>39</td>
    </tr>
    <tr>
      <th>6</th>
      <td>40</td>
    </tr>
    <tr>
      <th>7</th>
      <td>41</td>
    </tr>
    <tr>
      <th rowspan="21" valign="top">300</th>
      <th rowspan="7" valign="top">10</th>
      <th>1</th>
      <td>42</td>
    </tr>
    <tr>
      <th>2</th>
      <td>43</td>
    </tr>
    <tr>
      <th>3</th>
      <td>44</td>
    </tr>
    <tr>
      <th>4</th>
      <td>45</td>
    </tr>
    <tr>
      <th>5</th>
      <td>46</td>
    </tr>
    <tr>
      <th>6</th>
      <td>47</td>
    </tr>
    <tr>
      <th>7</th>
      <td>48</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">20</th>
      <th>1</th>
      <td>49</td>
    </tr>
    <tr>
      <th>2</th>
      <td>50</td>
    </tr>
    <tr>
      <th>3</th>
      <td>51</td>
    </tr>
    <tr>
      <th>4</th>
      <td>52</td>
    </tr>
    <tr>
      <th>5</th>
      <td>53</td>
    </tr>
    <tr>
      <th>6</th>
      <td>54</td>
    </tr>
    <tr>
      <th>7</th>
      <td>55</td>
    </tr>
    <tr>
      <th rowspan="7" valign="top">30</th>
      <th>1</th>
      <td>56</td>
    </tr>
    <tr>
      <th>2</th>
      <td>57</td>
    </tr>
    <tr>
      <th>3</th>
      <td>58</td>
    </tr>
    <tr>
      <th>4</th>
      <td>59</td>
    </tr>
    <tr>
      <th>5</th>
      <td>60</td>
    </tr>
    <tr>
      <th>6</th>
      <td>61</td>
    </tr>
    <tr>
      <th>7</th>
      <td>62</td>
    </tr>
  </tbody>
</table>"""
        self.assertEqual(result, expected)
Esempio n. 42
0
    def setup_cache(self):

        level1 = range(1000)
        level2 = date_range(start="1/1/2012", periods=100)
        mi = MultiIndex.from_product([level1, level2])
        return mi
Esempio n. 43
0
def test_from_product_empty_zero_levels():
    # 0 levels
    msg = "Must pass non-zero number of levels/codes"
    with pytest.raises(ValueError, match=msg):
        MultiIndex.from_product([])
Esempio n. 44
0
def test_cython_transform_frame(op, args, targop):
    s = Series(np.random.randn(1000))
    s_missing = s.copy()
    s_missing.iloc[2:10] = np.nan
    labels = np.random.randint(0, 50, size=1000).astype(float)
    strings = list("qwertyuiopasdfghjklz")
    strings_missing = strings[:]
    strings_missing[5] = np.nan
    df = DataFrame(
        {
            "float": s,
            "float_missing": s_missing,
            "int": [1, 1, 1, 1, 2] * 200,
            "datetime": date_range("1990-1-1", periods=1000),
            "timedelta": pd.timedelta_range(1, freq="s", periods=1000),
            "string": strings * 50,
            "string_missing": strings_missing * 50,
        },
        columns=[
            "float",
            "float_missing",
            "int",
            "datetime",
            "timedelta",
            "string",
            "string_missing",
        ],
    )
    df["cat"] = df["string"].astype("category")

    df2 = df.copy()
    df2.index = MultiIndex.from_product([range(100), range(10)])

    # DataFrame - Single and MultiIndex,
    # group by values, index level, columns
    for df in [df, df2]:
        for gb_target in [
            {
                "by": labels
            },
            {
                "level": 0
            },
            {
                "by": "string"
            },
        ]:  # {"by": 'string_missing'}]:
            # {"by": ['int','string']}]:

            gb = df.groupby(**gb_target)
            # allowlisted methods set the selection before applying
            # bit a of hack to make sure the cythonized shift
            # is equivalent to pre 0.17.1 behavior
            if op == "shift":
                gb._set_group_selection()

            if op != "shift" and "int" not in gb_target:
                # numeric apply fastpath promotes dtype so have
                # to apply separately and concat
                i = gb[["int"]].apply(targop)
                f = gb[["float", "float_missing"]].apply(targop)
                expected = concat([f, i], axis=1)
            else:
                expected = gb.apply(targop)

            expected = expected.sort_index(axis=1)
            tm.assert_frame_equal(expected,
                                  gb.transform(op, *args).sort_index(axis=1))
            tm.assert_frame_equal(expected,
                                  getattr(gb, op)(*args).sort_index(axis=1))
            # individual columns
            for c in df:
                if c not in ["float", "int", "float_missing"
                             ] and op != "shift":
                    msg = "No numeric types to aggregate"
                    with pytest.raises(DataError, match=msg):
                        gb[c].transform(op)
                    with pytest.raises(DataError, match=msg):
                        getattr(gb[c], op)()
                else:
                    expected = gb[c].apply(targop)
                    expected.name = c
                    tm.assert_series_equal(expected,
                                           gb[c].transform(op, *args))
                    tm.assert_series_equal(expected, getattr(gb[c], op)(*args))
Esempio n. 45
0
File: ctors.py Progetto: tnir/pandas
 def time_multiindex_from_iterables(self):
     MultiIndex.from_product(self.iterables)
Esempio n. 46
0
 def setup(self):
     mi = MultiIndex.from_product([range(100), range(100)])
     self.s = Series(np.random.randn(10000), index=mi)
    def test_reset_index_datetime(self, tz_naive_fixture):
        # GH#3950
        tz = tz_naive_fixture
        idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1")
        idx2 = Index(range(5), name="idx2", dtype="int64")
        idx = MultiIndex.from_arrays([idx1, idx2])
        df = DataFrame(
            {
                "a": np.arange(5, dtype="int64"),
                "b": ["A", "B", "C", "D", "E"]
            },
            index=idx,
        )

        expected = DataFrame(
            {
                "idx1": [
                    datetime(2011, 1, 1),
                    datetime(2011, 1, 2),
                    datetime(2011, 1, 3),
                    datetime(2011, 1, 4),
                    datetime(2011, 1, 5),
                ],
                "idx2":
                np.arange(5, dtype="int64"),
                "a":
                np.arange(5, dtype="int64"),
                "b": ["A", "B", "C", "D", "E"],
            },
            columns=["idx1", "idx2", "a", "b"],
        )
        expected["idx1"] = expected["idx1"].apply(
            lambda d: Timestamp(d, tz=tz))

        tm.assert_frame_equal(df.reset_index(), expected)

        idx3 = date_range("1/1/2012",
                          periods=5,
                          freq="MS",
                          tz="Europe/Paris",
                          name="idx3")
        idx = MultiIndex.from_arrays([idx1, idx2, idx3])
        df = DataFrame(
            {
                "a": np.arange(5, dtype="int64"),
                "b": ["A", "B", "C", "D", "E"]
            },
            index=idx,
        )

        expected = DataFrame(
            {
                "idx1": [
                    datetime(2011, 1, 1),
                    datetime(2011, 1, 2),
                    datetime(2011, 1, 3),
                    datetime(2011, 1, 4),
                    datetime(2011, 1, 5),
                ],
                "idx2":
                np.arange(5, dtype="int64"),
                "idx3": [
                    datetime(2012, 1, 1),
                    datetime(2012, 2, 1),
                    datetime(2012, 3, 1),
                    datetime(2012, 4, 1),
                    datetime(2012, 5, 1),
                ],
                "a":
                np.arange(5, dtype="int64"),
                "b": ["A", "B", "C", "D", "E"],
            },
            columns=["idx1", "idx2", "idx3", "a", "b"],
        )
        expected["idx1"] = expected["idx1"].apply(
            lambda d: Timestamp(d, tz=tz))
        expected["idx3"] = expected["idx3"].apply(
            lambda d: Timestamp(d, tz="Europe/Paris"))
        tm.assert_frame_equal(df.reset_index(), expected)

        # GH#7793
        idx = MultiIndex.from_product([["a", "b"],
                                       date_range("20130101", periods=3,
                                                  tz=tz)])
        df = DataFrame(np.arange(6, dtype="int64").reshape(6, 1),
                       columns=["a"],
                       index=idx)

        expected = DataFrame(
            {
                "level_0":
                "a a a b b b".split(),
                "level_1": [
                    datetime(2013, 1, 1),
                    datetime(2013, 1, 2),
                    datetime(2013, 1, 3),
                ] * 2,
                "a":
                np.arange(6, dtype="int64"),
            },
            columns=["level_0", "level_1", "a"],
        )
        expected["level_1"] = expected["level_1"].apply(
            lambda d: Timestamp(d, freq="D", tz=tz))
        result = df.reset_index()
        tm.assert_frame_equal(result, expected)
Esempio n. 48
0
    def test_setitem_multiindex(self):
        for index_fn in ("loc", ):

            def assert_equal(a, b):
                assert a == b

            def check(target, indexers, value, compare_fn, expected=None):
                fn = getattr(target, index_fn)
                fn.__setitem__(indexers, value)
                result = fn.__getitem__(indexers)
                if expected is None:
                    expected = value
                compare_fn(result, expected)

            # GH7190
            index = MultiIndex.from_product(
                [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"])
            t, n = 0, 2
            df = DataFrame(
                np.nan,
                columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
                index=index,
            )
            check(target=df,
                  indexers=((t, n), "X"),
                  value=0,
                  compare_fn=assert_equal)

            df = DataFrame(
                -999,
                columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
                index=index)
            check(target=df,
                  indexers=((t, n), "X"),
                  value=1,
                  compare_fn=assert_equal)

            df = DataFrame(
                columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
                index=index)
            check(target=df,
                  indexers=((t, n), "X"),
                  value=2,
                  compare_fn=assert_equal)

            # gh-7218: assigning with 0-dim arrays
            df = DataFrame(
                -999,
                columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
                index=index)
            check(
                target=df,
                indexers=((t, n), "X"),
                value=np.array(3),
                compare_fn=assert_equal,
                expected=3,
            )

            # GH5206
            df = DataFrame(np.arange(25).reshape(5, 5),
                           columns="A,B,C,D,E".split(","),
                           dtype=float)
            df["F"] = 99
            row_selection = df["A"] % 2 == 0
            col_selection = ["B", "C"]
            df.loc[row_selection, col_selection] = df["F"]
            output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
            tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
            check(
                target=df,
                indexers=(row_selection, col_selection),
                value=df["F"],
                compare_fn=tm.assert_frame_equal,
                expected=output,
            )

            # GH11372
            idx = MultiIndex.from_product([["A", "B", "C"],
                                           date_range("2015-01-01",
                                                      "2015-04-01",
                                                      freq="MS")])
            cols = MultiIndex.from_product([["foo", "bar"],
                                            date_range("2016-01-01",
                                                       "2016-02-01",
                                                       freq="MS")])

            df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols)

            subidx = MultiIndex.from_tuples([("A", Timestamp("2015-01-01")),
                                             ("A", Timestamp("2015-02-01"))])
            subcols = MultiIndex.from_tuples([("foo", Timestamp("2016-01-01")),
                                              ("foo", Timestamp("2016-02-01"))
                                              ])

            vals = DataFrame(np.random.random((2, 2)),
                             index=subidx,
                             columns=subcols)
            check(
                target=df,
                indexers=(subidx, subcols),
                value=vals,
                compare_fn=tm.assert_frame_equal,
            )
            # set all columns
            vals = DataFrame(np.random.random((2, 4)),
                             index=subidx,
                             columns=cols)
            check(
                target=df,
                indexers=(subidx, slice(None, None, None)),
                value=vals,
                compare_fn=tm.assert_frame_equal,
            )
            # identity
            copy = df.copy()
            check(
                target=df,
                indexers=(df.index, df.columns),
                value=df,
                compare_fn=tm.assert_frame_equal,
                expected=copy,
            )
Esempio n. 49
0
    def test_read_excel_multiindex(self, read_ext):
        # see gh-4679
        if pd.read_excel.keywords["engine"] == "pyxlsb":
            pytest.xfail("Sheets containing datetimes not supported by pyxlsb")

        mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
        mi_file = "testmultiindex" + read_ext

        # "mi_column" sheet
        expected = DataFrame(
            [
                [1, 2.5, pd.Timestamp("2015-01-01"), True],
                [2, 3.5, pd.Timestamp("2015-01-02"), False],
                [3, 4.5, pd.Timestamp("2015-01-03"), False],
                [4, 5.5, pd.Timestamp("2015-01-04"), True],
            ],
            columns=mi,
        )

        actual = pd.read_excel(mi_file,
                               sheet_name="mi_column",
                               header=[0, 1],
                               index_col=0)
        tm.assert_frame_equal(actual, expected)

        # "mi_index" sheet
        expected.index = mi
        expected.columns = ["a", "b", "c", "d"]

        actual = pd.read_excel(mi_file,
                               sheet_name="mi_index",
                               index_col=[0, 1])
        tm.assert_frame_equal(actual, expected, check_names=False)

        # "both" sheet
        expected.columns = mi

        actual = pd.read_excel(mi_file,
                               sheet_name="both",
                               index_col=[0, 1],
                               header=[0, 1])
        tm.assert_frame_equal(actual, expected, check_names=False)

        # "mi_index_name" sheet
        expected.columns = ["a", "b", "c", "d"]
        expected.index = mi.set_names(["ilvl1", "ilvl2"])

        actual = pd.read_excel(mi_file,
                               sheet_name="mi_index_name",
                               index_col=[0, 1])
        tm.assert_frame_equal(actual, expected)

        # "mi_column_name" sheet
        expected.index = list(range(4))
        expected.columns = mi.set_names(["c1", "c2"])
        actual = pd.read_excel(mi_file,
                               sheet_name="mi_column_name",
                               header=[0, 1],
                               index_col=0)
        tm.assert_frame_equal(actual, expected)

        # see gh-11317
        # "name_with_int" sheet
        expected.columns = mi.set_levels([1, 2],
                                         level=1).set_names(["c1", "c2"])

        actual = pd.read_excel(mi_file,
                               sheet_name="name_with_int",
                               index_col=0,
                               header=[0, 1])
        tm.assert_frame_equal(actual, expected)

        # "both_name" sheet
        expected.columns = mi.set_names(["c1", "c2"])
        expected.index = mi.set_names(["ilvl1", "ilvl2"])

        actual = pd.read_excel(mi_file,
                               sheet_name="both_name",
                               index_col=[0, 1],
                               header=[0, 1])
        tm.assert_frame_equal(actual, expected)

        # "both_skiprows" sheet
        actual = pd.read_excel(
            mi_file,
            sheet_name="both_name_skiprows",
            index_col=[0, 1],
            header=[0, 1],
            skiprows=2,
        )
        tm.assert_frame_equal(actual, expected)
def test_is_monotonic_decreasing():
    i = MultiIndex.from_product(
        [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"])
    assert i.is_monotonic_decreasing is True
    assert i._is_strictly_monotonic_decreasing is True
    assert Index(i.values).is_monotonic_decreasing is True
    assert i._is_strictly_monotonic_decreasing is True

    i = MultiIndex.from_product(
        [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"])
    assert i.is_monotonic_decreasing is False
    assert i._is_strictly_monotonic_decreasing is False
    assert Index(i.values).is_monotonic_decreasing is False
    assert Index(i.values)._is_strictly_monotonic_decreasing is False

    i = MultiIndex.from_product(
        [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"])
    assert i.is_monotonic_decreasing is False
    assert i._is_strictly_monotonic_decreasing is False
    assert Index(i.values).is_monotonic_decreasing is False
    assert Index(i.values)._is_strictly_monotonic_decreasing is False

    i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
    assert i.is_monotonic_decreasing is False
    assert i._is_strictly_monotonic_decreasing is False
    assert Index(i.values).is_monotonic_decreasing is False
    assert Index(i.values)._is_strictly_monotonic_decreasing is False

    # string ordering
    i = MultiIndex(
        levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
        codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
        names=["first", "second"],
    )
    assert i.is_monotonic_decreasing is False
    assert Index(i.values).is_monotonic_decreasing is False
    assert i._is_strictly_monotonic_decreasing is False
    assert Index(i.values)._is_strictly_monotonic_decreasing is False

    i = MultiIndex(
        levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
        codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
        names=["first", "second"],
    )
    assert i.is_monotonic_decreasing is True
    assert Index(i.values).is_monotonic_decreasing is True
    assert i._is_strictly_monotonic_decreasing is True
    assert Index(i.values)._is_strictly_monotonic_decreasing is True

    # mixed levels, hits the TypeError
    i = MultiIndex(
        levels=[
            [4, 3, 2, 1],
            [
                "nl0000301109",
                "nl0000289965",
                "nl0000289783",
                "lu0197800237",
                "gb00b03mlx29",
            ],
        ],
        codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
        names=["household_id", "asset_id"],
    )

    assert i.is_monotonic_decreasing is False
    assert i._is_strictly_monotonic_decreasing is False

    # empty
    i = MultiIndex.from_arrays([[], []])
    assert i.is_monotonic_decreasing is True
    assert Index(i.values).is_monotonic_decreasing is True
    assert i._is_strictly_monotonic_decreasing is True
    assert Index(i.values)._is_strictly_monotonic_decreasing is True
Esempio n. 51
0
    def test_sort_index_and_reconstruction(self):

        # GH#15622
        # lexsortedness should be identical
        # across MultiIndex construction methods

        df = DataFrame([[1, 1], [2, 2]], index=list("ab"))
        expected = DataFrame(
            [[1, 1], [2, 2], [1, 1], [2, 2]],
            index=MultiIndex.from_tuples([(0.5, "a"), (0.5, "b"), (0.8, "a"),
                                          (0.8, "b")]),
        )
        assert expected.index.is_lexsorted()

        result = DataFrame(
            [[1, 1], [2, 2], [1, 1], [2, 2]],
            index=MultiIndex.from_product([[0.5, 0.8], list("ab")]),
        )
        result = result.sort_index()
        assert result.index.is_lexsorted()
        assert result.index.is_monotonic

        tm.assert_frame_equal(result, expected)

        result = DataFrame(
            [[1, 1], [2, 2], [1, 1], [2, 2]],
            index=MultiIndex(levels=[[0.5, 0.8], ["a", "b"]],
                             codes=[[0, 0, 1, 1], [0, 1, 0, 1]]),
        )
        result = result.sort_index()
        assert result.index.is_lexsorted()

        tm.assert_frame_equal(result, expected)

        concatted = pd.concat([df, df], keys=[0.8, 0.5])
        result = concatted.sort_index()

        assert result.index.is_lexsorted()
        assert result.index.is_monotonic

        tm.assert_frame_equal(result, expected)

        # GH#14015
        df = DataFrame(
            [[1, 2], [6, 7]],
            columns=MultiIndex.from_tuples(
                [(0, "20160811 12:00:00"), (0, "20160809 12:00:00")],
                names=["l1", "Date"],
            ),
        )

        df.columns = df.columns.set_levels(pd.to_datetime(
            df.columns.levels[1]),
                                           level=1)
        assert not df.columns.is_lexsorted()
        assert not df.columns.is_monotonic
        result = df.sort_index(axis=1)
        assert result.columns.is_lexsorted()
        assert result.columns.is_monotonic
        result = df.sort_index(axis=1, level=1)
        assert result.columns.is_lexsorted()
        assert result.columns.is_monotonic
Esempio n. 52
0
def test_from_product_empty_one_level():
    result = MultiIndex.from_product([[]], names=["A"])
    expected = pd.Index([], name="A")
    tm.assert_index_equal(result.levels[0], expected)
    assert result.names == ["A"]
Esempio n. 53
0
             ("foo", "two", "max"),
             ("bar", "one", "min"),
             ("bar", "one", "max"),
             ("bar", "three", "min"),
             ("bar", "three", "max"),
         ],
         names=["A", "B", None],
     ),
     [1, 1, 3, 3, 2, 2, 4, 4],
 ),
 (
     False,
     MultiIndex.from_product(
         [
             CategoricalIndex(["bar", "foo"], ordered=False),
             CategoricalIndex(["one", "three", "two"], ordered=False),
             Index(["min", "max"]),
         ],
         names=["A", "B", None],
     ),
     [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3],
 ),
 (
     None,
     MultiIndex.from_product(
         [
             CategoricalIndex(["bar", "foo"], ordered=False),
             CategoricalIndex(["one", "three", "two"], ordered=False),
             Index(["min", "max"]),
         ],
         names=["A", "B", None],
     ),
Esempio n. 54
0
def test_mi_indexing_list_nonexistent_raises():
    # GH 15452
    s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
    with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
        s.loc[["not", "found"]]
Esempio n. 55
0
    def test_loader_given_multiple_columns(self):

        class Loader1DataSet1(DataSet):
            col1 = Column(float)
            col2 = Column(float32)

        class Loader1DataSet2(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        class Loader2DataSet(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        constants1 = {Loader1DataSet1.col1: 1,
                      Loader1DataSet1.col2: 2,
                      Loader1DataSet2.col1: 3,
                      Loader1DataSet2.col2: 4}

        loader1 = RecordingPrecomputedLoader(constants=constants1,
                                             dates=self.dates,
                                             sids=self.assets)
        constants2 = {Loader2DataSet.col1: 5,
                      Loader2DataSet.col2: 6}
        loader2 = RecordingPrecomputedLoader(constants=constants2,
                                             dates=self.dates,
                                             sids=self.assets)

        engine = SimplePipelineEngine(
            lambda column:
            loader2 if column.dataset == Loader2DataSet else loader1,
            self.dates, self.asset_finder,
        )

        pipe_col1 = RollingSumSum(inputs=[Loader1DataSet1.col1,
                                          Loader1DataSet2.col1,
                                          Loader2DataSet.col1],
                                  window_length=2)

        pipe_col2 = RollingSumSum(inputs=[Loader1DataSet1.col2,
                                          Loader1DataSet2.col2,
                                          Loader2DataSet.col2],
                                  window_length=3)

        pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1],
                                  window_length=3)

        columns = OrderedDict([
            ('pipe_col1', pipe_col1),
            ('pipe_col2', pipe_col2),
            ('pipe_col3', pipe_col3),
        ])
        result = engine.run_pipeline(
            Pipeline(columns=columns),
            self.dates[2],  # index is >= the largest window length - 1
            self.dates[-1]
        )
        min_window = min(pip_col.window_length
                         for pip_col in itervalues(columns))
        col_to_val = ChainMap(constants1, constants2)
        vals = {name: (sum(col_to_val[col] for col in pipe_col.inputs)
                       * pipe_col.window_length)
                for name, pipe_col in iteritems(columns)}

        index = MultiIndex.from_product([self.dates[2:], self.assets])

        def expected_for_col(col):
            val = vals[col]
            offset = columns[col].window_length - min_window
            return concatenate(
                [
                    full(offset * index.levshape[1], nan),
                    full(
                        (index.levshape[0] - offset) * index.levshape[1],
                        val,
                        float,
                    )
                ],
            )

        expected = DataFrame(
            data={col: expected_for_col(col) for col in vals},
            index=index,
            columns=columns,
        )

        assert_frame_equal(result, expected)

        self.assertEqual(set(loader1.load_calls),
                         {ColumnArgs.sorted_by_ds(Loader1DataSet1.col1,
                                                  Loader1DataSet2.col1),
                          ColumnArgs.sorted_by_ds(Loader1DataSet1.col2,
                                                  Loader1DataSet2.col2)})
        self.assertEqual(set(loader2.load_calls),
                         {ColumnArgs.sorted_by_ds(Loader2DataSet.col1,
                                                  Loader2DataSet.col2)})
Esempio n. 56
0
def test_from_product_empty_two_levels(first, second):
    names = ["A", "B"]
    result = MultiIndex.from_product([first, second], names=names)
    expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
    tm.assert_index_equal(result, expected)
Esempio n. 57
0
    def test_per_axis_per_level_getitem(self):

        # GH6134
        # example test case
        ix = MultiIndex.from_product(
            [_mklbl('A', 5),
             _mklbl('B', 7),
             _mklbl('C', 4),
             _mklbl('D', 2)])
        df = DataFrame(np.arange(len(ix.get_values())), index=ix)

        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_frame_equal(result, expected)

        expected = df.loc[[
            tuple([a, b, c, d]) for a, b, c, d in df.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C2' or c == 'C3')
        ]]
        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
        tm.assert_frame_equal(result, expected)

        # test multi-index slicing with per axis and per index controls
        index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3),
                                        ('B', 1)],
                                       names=['one', 'two'])
        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
                                          ('b', 'foo'), ('b', 'bah')],
                                         names=['lvl0', 'lvl1'])

        df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4),
                       index=index,
                       columns=columns)
        df = df.sort_index(axis=0).sort_index(axis=1)

        # identity
        result = df.loc[(slice(None), slice(None)), :]
        tm.assert_frame_equal(result, df)
        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)
        result = df.loc[:, (slice(None), slice(None))]
        tm.assert_frame_equal(result, df)

        # index
        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), 1), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # columns
        result = df.loc[:, (slice(None), ['foo'])]
        expected = df.iloc[:, [1, 3]]
        tm.assert_frame_equal(result, expected)

        # both
        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
        expected = df.iloc[[0, 3], [1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df.loc['A', 'a']
        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
                             index=Index([1, 2, 3], name='two'),
                             columns=Index(['bar', 'foo'], name='lvl1'))
        tm.assert_frame_equal(result, expected)

        result = df.loc[(slice(None), [1, 2]), :]
        expected = df.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # multi-level series
        s = Series(np.arange(len(ix.get_values())), index=ix)
        result = s.loc['A1':'A3', :, ['C1', 'C3']]
        expected = s.loc[[
            tuple([a, b, c, d]) for a, b, c, d in s.index.values
            if (a == 'A1' or a == 'A2' or a == 'A3') and (
                c == 'C1' or c == 'C3')
        ]]
        tm.assert_series_equal(result, expected)

        # boolean indexers
        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
        expected = df.iloc[[2, 3]]
        tm.assert_frame_equal(result, expected)

        def f():
            df.loc[(slice(None), np.array([True, False])), :]

        self.assertRaises(ValueError, f)

        # ambiguous cases
        # these can be multiply interpreted (e.g. in this case
        # as df.loc[slice(None),[1]] as well
        self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]])

        result = df.loc[(slice(None), [1]), :]
        expected = df.iloc[[0, 3]]
        tm.assert_frame_equal(result, expected)

        # not lexsorted
        self.assertEqual(df.index.lexsort_depth, 2)
        df = df.sort_index(level=1, axis=0)
        self.assertEqual(df.index.lexsort_depth, 0)
        with tm.assertRaisesRegexp(
                UnsortedIndexError,
                'MultiIndex Slicing requires the index to be fully '
                r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
            df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
Esempio n. 58
0
def test_is_monotonic_increasing():
    i = MultiIndex.from_product([np.arange(10),
                                 np.arange(10)], names=['one', 'two'])
    assert i.is_monotonic is True
    assert i._is_strictly_monotonic_increasing is True
    assert Index(i.values).is_monotonic is True
    assert i._is_strictly_monotonic_increasing is True

    i = MultiIndex.from_product([np.arange(10, 0, -1),
                                 np.arange(10)], names=['one', 'two'])
    assert i.is_monotonic is False
    assert i._is_strictly_monotonic_increasing is False
    assert Index(i.values).is_monotonic is False
    assert Index(i.values)._is_strictly_monotonic_increasing is False

    i = MultiIndex.from_product([np.arange(10),
                                 np.arange(10, 0, -1)],
                                names=['one', 'two'])
    assert i.is_monotonic is False
    assert i._is_strictly_monotonic_increasing is False
    assert Index(i.values).is_monotonic is False
    assert Index(i.values)._is_strictly_monotonic_increasing is False

    i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
    assert i.is_monotonic is False
    assert i._is_strictly_monotonic_increasing is False
    assert Index(i.values).is_monotonic is False
    assert Index(i.values)._is_strictly_monotonic_increasing is False

    # string ordering
    i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                           ['one', 'two', 'three']],
                   labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                           [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                   names=['first', 'second'])
    assert i.is_monotonic is False
    assert Index(i.values).is_monotonic is False
    assert i._is_strictly_monotonic_increasing is False
    assert Index(i.values)._is_strictly_monotonic_increasing is False

    i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
                           ['mom', 'next', 'zenith']],
                   labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                           [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                   names=['first', 'second'])
    assert i.is_monotonic is True
    assert Index(i.values).is_monotonic is True
    assert i._is_strictly_monotonic_increasing is True
    assert Index(i.values)._is_strictly_monotonic_increasing is True

    # mixed levels, hits the TypeError
    i = MultiIndex(
        levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237',
                               'nl0000289783',
                               'nl0000289965', 'nl0000301109']],
        labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
        names=['household_id', 'asset_id'])

    assert i.is_monotonic is False
    assert i._is_strictly_monotonic_increasing is False

    # empty
    i = MultiIndex.from_arrays([[], []])
    assert i.is_monotonic is True
    assert Index(i.values).is_monotonic is True
    assert i._is_strictly_monotonic_increasing is True
    assert Index(i.values)._is_strictly_monotonic_increasing is True
Esempio n. 59
0
    def test_binary_ops_align(self):

        # test aligning binary ops

        # GH 6681
        index = MultiIndex.from_product(
            [list('abc'), ['one', 'two', 'three'], [1, 2, 3]],
            names=['first', 'second', 'third'])

        df = DataFrame(np.arange(27 * 3).reshape(27, 3),
                       index=index,
                       columns=['value1', 'value2', 'value3']).sort_index()

        idx = pd.IndexSlice
        for op in ['add', 'sub', 'mul', 'div', 'truediv']:
            opa = getattr(operator, op, None)
            if opa is None:
                continue

            x = Series([1.0, 10.0, 100.0], [1, 2, 3])
            result = getattr(df, op)(x, level='third', axis=0)

            expected = pd.concat([
                opa(df.loc[idx[:, :, i], :], v) for i, v in x.iteritems()
            ]).sort_index()
            assert_frame_equal(result, expected)

            x = Series([1.0, 10.0], ['two', 'three'])
            result = getattr(df, op)(x, level='second', axis=0)

            expected = (pd.concat([
                opa(df.loc[idx[:, i], :], v) for i, v in x.iteritems()
            ]).reindex_like(df).sort_index())
            assert_frame_equal(result, expected)

        # GH9463 (alignment level of dataframe with series)

        midx = MultiIndex.from_product([['A', 'B'], ['a', 'b']])
        df = DataFrame(np.ones((2, 4), dtype='int64'), columns=midx)
        s = pd.Series({'a': 1, 'b': 2})

        df2 = df.copy()
        df2.columns.names = ['lvl0', 'lvl1']
        s2 = s.copy()
        s2.index.name = 'lvl1'

        # different cases of integer/string level names:
        res1 = df.mul(s, axis=1, level=1)
        res2 = df.mul(s2, axis=1, level=1)
        res3 = df2.mul(s, axis=1, level=1)
        res4 = df2.mul(s2, axis=1, level=1)
        res5 = df2.mul(s, axis=1, level='lvl1')
        res6 = df2.mul(s2, axis=1, level='lvl1')

        exp = DataFrame(np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype='int64'),
                        columns=midx)

        for res in [res1, res2]:
            assert_frame_equal(res, exp)

        exp.columns.names = ['lvl0', 'lvl1']
        for res in [res3, res4, res5, res6]:
            assert_frame_equal(res, exp)
Esempio n. 60
0
    def test_missing_key_raises_keyerror2(self):
        # GH#21168 KeyError, not "IndexingError: Too many indexers"
        ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))

        with pytest.raises(KeyError, match=r"\(0, 3\)"):
            ser.loc[0, 3]