def test_string_join_non_key(str_data, num_cols, how, how_raise): other_data = [1, 2, 3, 4, 5][:len(str_data)] pdf = pd.DataFrame() gdf = DataFrame() for i in range(num_cols): pdf[i] = pd.Series(str_data, dtype="str") gdf[i] = Series(str_data, dtype="str") pdf["a"] = other_data gdf["a"] = other_data pdf2 = pdf.copy() gdf2 = gdf.copy() expectation = raise_builder([how_raise], NotImplementedError) with expectation: expect = pdf.merge(pdf2, on=["a"], how=how) got = gdf.merge(gdf2, on=["a"], how=how) if len(expect) == 0 and len(got) == 0: expect = expect.reset_index(drop=True) got = got[expect.columns] assert_eq(expect, got)
def test_onehot_get_dummies_simple(): df = DataFrame({'x': np.arange(10)}) original = df.copy() encoded = get_dummies(df, prefix='test') assert df == original # the original df should be unchanged cols = list(encoded.columns)[1:] actual = DataFrame(dict(zip(cols, np.eye(len(cols))))) assert (encoded.loc[:, cols] == actual).all().all()
def test_dataframe_empty_concat(): gdf1 = DataFrame() gdf1['a'] = [] gdf1['b'] = [] gdf2 = gdf1.copy() gdf3 = gd.concat([gdf1, gdf2]) assert len(gdf3) == 0 assert len(gdf3.columns) == 2
def test_dataframe_copy_shallow(): # Test for copy dataframe using class method df = DataFrame() df['a'] = [1, 2, 3] df2 = df.copy() df2['b'] = [4, 2, 3] got = df.to_string() print(got) expect = ''' a 0 1 1 2 2 3 ''' # values should match despite whitespace difference assert got.split() == expect.split()
def test_onehot_get_dummies_multicol(n_cols): from string import ascii_lowercase n_categories = 5 df = DataFrame( dict( zip(ascii_lowercase, (np.arange(n_categories) for _ in range(n_cols))))) original = df.copy() encoded = get_dummies(df, prefix='test') assert df == original cols = list(encoded.columns)[n_cols:] actual = DataFrame( dict( zip( cols, np.concatenate( list(np.eye(n_categories) for _ in range(n_cols)))))) assert (encoded.loc[:, cols] == actual).all().all()