def test_filter_maintains_ordering(self): # Simple case: index is sequential. #4621 df = DataFrame({ 'pid': [1, 1, 1, 2, 2, 3, 3, 3], 'tag': [23, 45, 62, 24, 45, 34, 25, 62] }) s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) expected = df.iloc[[1, 2, 4, 7]] assert_frame_equal(actual, expected) grouped = s.groupby(df['tag']) actual = grouped.filter(lambda x: len(x) > 1) expected = s.iloc[[1, 2, 4, 7]] assert_series_equal(actual, expected) # Now index is sequentially decreasing. df.index = np.arange(len(df) - 1, -1, -1) s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) expected = df.iloc[[1, 2, 4, 7]] assert_frame_equal(actual, expected) grouped = s.groupby(df['tag']) actual = grouped.filter(lambda x: len(x) > 1) expected = s.iloc[[1, 2, 4, 7]] assert_series_equal(actual, expected) # Index is shuffled. SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3] df.index = df.index[SHUFFLED] s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) expected = df.iloc[[1, 2, 4, 7]] assert_frame_equal(actual, expected) grouped = s.groupby(df['tag']) actual = grouped.filter(lambda x: len(x) > 1) expected = s.iloc[[1, 2, 4, 7]] assert_series_equal(actual, expected)
def test_groupby_multi_categorical_as_index(self): # GH13204 df = DataFrame({"cat": Categorical([1, 2, 2], [1, 2, 3]), "A": [10, 11, 11], "B": [101, 102, 103]}) result = df.groupby(["cat", "A"], as_index=False).sum() expected = DataFrame( { "cat": Categorical([1, 1, 2, 2, 3, 3]), "A": [10, 11, 10, 11, 10, 11], "B": [101.0, nan, nan, 205.0, nan, nan], }, columns=["cat", "A", "B"], ) tm.assert_frame_equal(result, expected) # function grouper f = lambda r: df.loc[r, "A"] result = df.groupby(["cat", f], as_index=False).sum() expected = DataFrame( { "cat": Categorical([1, 1, 2, 2, 3, 3]), "A": [10.0, nan, nan, 22.0, nan, nan], "B": [101.0, nan, nan, 205.0, nan, nan], }, columns=["cat", "A", "B"], ) tm.assert_frame_equal(result, expected) # another not in-axis grouper (conflicting names in index) s = Series(["a", "b", "b"], name="cat") result = df.groupby(["cat", s], as_index=False).sum() expected = DataFrame( { "cat": Categorical([1, 1, 2, 2, 3, 3]), "A": [10.0, nan, nan, 22.0, nan, nan], "B": [101.0, nan, nan, 205.0, nan, nan], }, columns=["cat", "A", "B"], ) tm.assert_frame_equal(result, expected) # is original index dropped? expected = DataFrame( { "cat": Categorical([1, 1, 2, 2, 3, 3]), "A": [10, 11, 10, 11, 10, 11], "B": [101.0, nan, nan, 205.0, nan, nan], }, columns=["cat", "A", "B"], ) for name in [None, "X", "B", "cat"]: df.index = Index(list("abc"), name=name) result = df.groupby(["cat", "A"], as_index=False).sum() tm.assert_frame_equal(result, expected, check_index_type=True)
def test_filter_maintains_ordering(self): # Simple case: index is sequential. #4621 df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}) s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) expected = df.iloc[[1, 2, 4, 7]] assert_frame_equal(actual, expected) grouped = s.groupby(df['tag']) actual = grouped.filter(lambda x: len(x) > 1) expected = s.iloc[[1, 2, 4, 7]] assert_series_equal(actual, expected) # Now index is sequentially decreasing. df.index = np.arange(len(df) - 1, -1, -1) s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) expected = df.iloc[[1, 2, 4, 7]] assert_frame_equal(actual, expected) grouped = s.groupby(df['tag']) actual = grouped.filter(lambda x: len(x) > 1) expected = s.iloc[[1, 2, 4, 7]] assert_series_equal(actual, expected) # Index is shuffled. SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3] df.index = df.index[SHUFFLED] s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) expected = df.iloc[[1, 2, 4, 7]] assert_frame_equal(actual, expected) grouped = s.groupby(df['tag']) actual = grouped.filter(lambda x: len(x) > 1) expected = s.iloc[[1, 2, 4, 7]] assert_series_equal(actual, expected)
def test_groupby_multi_categorical_as_index(self): # GH13204 df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]), 'A': [10, 11, 11], 'B': [101, 102, 103]}) result = df.groupby(['cat', 'A'], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # function grouper f = lambda r: df.loc[r, 'A'] result = df.groupby(['cat', f], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # another not in-axis grouper (conflicting names in index) s = Series(['a', 'b', 'b'], name='cat') result = df.groupby(['cat', s], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # is original index dropped? expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) group_columns = ['cat', 'A'] for name in [None, 'X', 'B', 'cat']: df.index = Index(list("abc"), name=name) if name in group_columns and name in df.index.names: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.groupby(group_columns, as_index=False).sum() else: result = df.groupby(group_columns, as_index=False).sum() tm.assert_frame_equal(result, expected, check_index_type=True)
def test_groupby_multi_categorical_as_index(self): # GH13204 df = DataFrame({ 'cat': Categorical([1, 2, 2], [1, 2, 3]), 'A': [10, 11, 11], 'B': [101, 102, 103] }) result = df.groupby(['cat', 'A'], as_index=False).sum() expected = DataFrame( { 'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], 'B': [101.0, nan, nan, 205.0, nan, nan] }, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # function grouper f = lambda r: df.loc[r, 'A'] result = df.groupby(['cat', f], as_index=False).sum() expected = DataFrame( { 'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], 'B': [101.0, nan, nan, 205.0, nan, nan] }, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # another not in-axis grouper (conflicting names in index) s = Series(['a', 'b', 'b'], name='cat') result = df.groupby(['cat', s], as_index=False).sum() expected = DataFrame( { 'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], 'B': [101.0, nan, nan, 205.0, nan, nan] }, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # is original index dropped? expected = DataFrame( { 'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], 'B': [101.0, nan, nan, 205.0, nan, nan] }, columns=['cat', 'A', 'B']) group_columns = ['cat', 'A'] for name in [None, 'X', 'B', 'cat']: df.index = Index(list("abc"), name=name) if name in group_columns and name in df.index.names: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.groupby(group_columns, as_index=False).sum() else: result = df.groupby(group_columns, as_index=False).sum() tm.assert_frame_equal(result, expected, check_index_type=True)