def test_join_on_fails_with_different_left_index(self): with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}, index=tm.makeCustomIndex(10, 2)) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), 'b': np.random.randn(10)}) merge(df, df2, right_on='b', left_index=True)
def test_join_on_fails_with_different_column_counts(self): with tm.assertRaises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), 'b': np.random.randn(10)}, index=tm.makeCustomIndex(10, 2)) merge(df, df2, right_on='a', left_on=['a', 'b'])
def test_join_on_fails_with_different_column_counts(self): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), 'b': np.random.randn(10)}, index=tm.makeCustomIndex(10, 2)) msg = r"len\(right_on\) must equal len\(left_on\)" with pytest.raises(ValueError, match=msg): merge(df, df2, right_on='a', left_on=['a', 'b'])
def test_join_on_fails_with_different_left_index(self): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}, index=tm.makeCustomIndex(3, 2)) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), 'b': np.random.randn(10)}) msg = (r'len\(right_on\) must equal the number of levels in the index' ' of "left"') with pytest.raises(ValueError, match=msg): merge(df, df2, right_on='b', left_index=True)
def test_join_on_fails_with_different_column_counts(self): df = DataFrame( {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} ) df2 = DataFrame( {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, index=tm.makeCustomIndex(10, 2), ) msg = r"len\(right_on\) must equal len\(left_on\)" with pytest.raises(ValueError, match=msg): merge(df, df2, right_on="a", left_on=["a", "b"])
def test_join_on_fails_with_different_right_index(self): df = DataFrame( {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} ) df2 = DataFrame( {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, index=tm.makeCustomIndex(10, 2), ) msg = ( r"len\(left_on\) must equal the number of levels in the index" ' of "right"' ) with pytest.raises(ValueError, match=msg): merge(df, df2, left_on="a", right_index=True)
def test_fails_on_no_datetime_index(): index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') index_funcs = (tm.makeIntIndex, tm.makeUnicodeIndex, tm.makeFloatIndex, lambda m: tm.makeCustomIndex(m, 2)) n = 2 for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " "or PeriodIndex, but got an instance of %r" % name) with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D'))
def metadata_column_headers(request): """Make a list of metadata column headers. Returns: list: A metadata column header list whose length is between 0 and `MAX_METADATA_COLS`. """ if request.param is 0: return list() # pandas bug (?) in makeCustomIndex when nentries = 1 elif request.param is 1: return ['M_l0_g0'] else: return list(makeCustomIndex(request.param, 1, prefix='M'))
def test_fails_on_no_datetime_index(self): index_names = ('Int64Index', 'PeriodIndex', 'Index', 'Float64Index', 'MultiIndex') index_funcs = (tm.makeIntIndex, tm.makePeriodIndex, tm.makeUnicodeIndex, tm.makeFloatIndex, lambda m: tm.makeCustomIndex(m, 2)) n = 2 for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) with tm.assertRaisesRegexp(TypeError, "axis must be a DatetimeIndex, " "but got an instance of %r" % name): df.groupby(TimeGrouper('D'))
def test_fails_on_no_datetime_index(self): index_names = ('Int64Index', 'PeriodIndex', 'Index', 'Float64Index', 'MultiIndex') index_funcs = (tm.makeIntIndex, tm.makePeriodIndex, tm.makeUnicodeIndex, tm.makeFloatIndex, lambda m: tm.makeCustomIndex(m, 2)) n = 2 for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) with tm.assertRaisesRegexp( TypeError, "axis must be a DatetimeIndex, " "but got an instance of %r" % name): df.groupby(TimeGrouper('D'))
pd.Series(np.arange(4,9)) # using the numpy function pd.Series(np.linspace(0,9,5)) # allows to specify the number of values to be created btw boundaries pd.Series(np.random.normal(size=5)) np.random.randint(50,101,10) a = np.array([4] * 16) a[1::] = [42] * 15 a[1:8:2] = 16 import pandas.util.testing as tm tm.N, tm.K = 5,3 tm.makeFloatSeries(), tm.makeBoolIndex(), tm.makeCategoricalIndex() tm.makeCustomIndex(nentries=4,nlevels=2), tm.makeFloatIndex(), tm.makeIntIndex() tm.makeMultiIndex(), tm.makeRangeIndex(), tm.makeIntervalIndex() # All possible combinations (Permutations) from itertools import permutations my_list = [1,2,3] perm = list(permutations(my_list)) #(1, 2, 3) #(1, 3, 2) #(2, 1, 3) #(2, 3, 1) #(3, 1, 2) #(3, 2, 1)
# Errors grouped = df.groupby(grouper, group_keys=False) def f(df): return df['close'] / df['open'] # it works! result = grouped.apply(f) tm.assert_index_equal(result.index, df.index) @pytest.mark.parametrize('name, func', [('Int64Index', tm.makeIntIndex), ('Index', tm.makeUnicodeIndex), ('Float64Index', tm.makeFloatIndex), ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))]) def test_fails_on_no_datetime_index(name, func): n = 2 index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " "or PeriodIndex, but got an instance of '{}'".format(name)) with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D')) def test_aaa_group_order(): # GH 12840 # check TimeGrouper perform stable sorts n = 20
def test_raise_on_panel4d_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p4d = tm.makePanel4D(7) p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) with pytest.raises(NotImplementedError): pd.eval('p4d + 1', parser=parser, engine=engine)
class TestResolvePath(object): archive = '.\\' headers = makeCustomIndex(7, 1, prefix='C') multi_schema = SortedDict({'0': headers[1], '1': headers[3]}) multi_schema2 = SortedDict({ '0': headers[0], '1': headers[2], '2': headers[3] }) multi_schema3 = SortedDict({'0': headers[1], '1': headers[6]}) single_schema = SortedDict({ '0': headers[0], }) single_schema2 = SortedDict({'0': headers[4]}) @staticmethod def data_gen_invalid(row: int, col: int): valmap_invalid = [ ['val', 'val', 'xxx', 'val'], ['val', nan, 'val', 'val'], ['val', nan, 'val', nan], [nan, nan, 'val', 'xxx'], # noqa: E201 ['xxx', nan, 'val', nan] ] if row < len(valmap_invalid): if col < len(valmap_invalid[row]): return valmap_invalid[row][col] return make_dataframe_value(row, col) @staticmethod def data_gen(row: int, col: int): valmap = [ ['val', 'val', 'val', 'val'], ['val', nan, 'val', 'val'], ['val', nan, 'val', nan], [nan, nan, 'val', nan], # noqa: E201 [nan, nan, 'val', nan] # noqa: E201 ] if row < len(valmap): if col < len(valmap[row]): return valmap[row][col] return make_dataframe_value(row, col) @staticmethod def data_gen_normalizable(row: int, col: int): valmap = [ ['Value 1.', 'Value 1.', 'Value 1.', 'Value 1.'], ['Value 1.', nan, 'Value 1.', 'Value 1.'], ['Value 1.', nan, 'Value 1.', nan], [nan, nan, 'Value 1.', nan], # noqa: E201 [nan, nan, 'Value 1.', nan] # noqa: E201 ] if row < len(valmap): if col < len(valmap[row]): return valmap[row][col] return make_dataframe_value(row, col) @pytest.mark.parametrize('schema, expected', [ (single_schema, join(archive, 'val')), (multi_schema, join(archive, 'val', 'val')), (multi_schema2, join(archive, 'val', 'val', 'val')), ]) def test_resolve_path(self, schema, expected): data = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen) actual = resolve_path(self.archive, schema, data) assert actual == expected @pytest.mark.parametrize( 'schema, expected', [(single_schema, join(archive, 'value_1')), (multi_schema, join(archive, 'value_1', 'value_1')), (multi_schema2, join(archive, 'value_1', 'value_1', 'value_1'))]) def test_resolve_path_normalized(self, schema, expected): data = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen_normalizable) actual = resolve_path(self.archive, schema, data) assert actual == expected @pytest.mark.parametrize('schema', [ single_schema2, multi_schema3, ]) def test_resolve_path_indexerror(self, schema): data = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen) with pytest.raises(IndexError): resolve_path(self.archive, schema, data) @pytest.mark.parametrize('schema', [ single_schema, multi_schema, multi_schema2, ]) def test_resolve_path_valueerror(self, schema): data = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen_invalid) with pytest.raises(ValueError): resolve_path(self.archive, schema, data)
def check_raise_on_panel_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) with tm.assertRaises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine)
def test_raise_on_panel_with_multiindex(self, parser, engine): p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) with pytest.raises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine)
def f(df): return df["close"] / df["open"] # it works! result = grouped.apply(f) tm.assert_index_equal(result.index, df.index) @pytest.mark.parametrize( "name, func", [ ("Int64Index", tm.makeIntIndex), ("Index", tm.makeUnicodeIndex), ("Float64Index", tm.makeFloatIndex), ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)), ], ) def test_fails_on_no_datetime_index(name, func): n = 2 index = func(n) df = DataFrame({"a": np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " f"or PeriodIndex, but got an instance of '{name}'") with pytest.raises(TypeError, match=msg): df.groupby(Grouper(freq="D")) def test_aaa_group_order(): # GH 12840
# Errors grouped = df.groupby(grouper, group_keys=False) def f(df): return df['close'] / df['open'] # it works! result = grouped.apply(f) tm.assert_index_equal(result.index, df.index) @pytest.mark.parametrize('name, func', [ ('Int64Index', tm.makeIntIndex), ('Index', tm.makeUnicodeIndex), ('Float64Index', tm.makeFloatIndex), ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2)) ]) def test_fails_on_no_datetime_index(name, func): n = 2 index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " "or PeriodIndex, but got an instance of '{}'".format(name)) with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D')) def test_aaa_group_order(): # GH 12840 # check TimeGrouper perform stable sorts
def check_raise_on_panel4d_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p4d = tm.makePanel4D(7) p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) with tm.assertRaises(NotImplementedError): pd.eval('p4d + 1', parser=parser, engine=engine)