def test(): """ Update操作是,将新DataFrame中凡是跟原DataFrame的Index和Column有重复的位置,只要 值不是 np.nan, 就用它取代原DataFrame中的值。 **此操作只考虑原DataFrame中有的Index和Column**。 """ df1 = pd.DataFrame({"v": [1, 1]}, index=[0, 1]) """ v 0 1 1 1 """ df2 = pd.DataFrame({"v": [2, 2]}, index=[1, 2]) """ v 1 2 2 2 """ df = pd.DataFrame({"v": [0, 0, 0]}, index=[0, 1, 2]) df.update(df1) assert_value_equal(df.v, [1, 1, 0]) """ v 0 1 1 1 2 0 """ df.update(df2) assert_value_equal(df.v, [1, 2, 2]) """
def test_sort_index(): df = pd.DataFrame( {"v": [2, 3, 1]}, index=[2, 3, 1], ) df = df.sort_index(axis=0) assert_value_equal(df.index, [1, 2, 3]) assert_value_equal(df.v, [1, 2, 3])
def test_timeseries(): m, n = 2, 3 data = [[1, 2, 3], [4, 5, 6]] df = pd.DataFrame( data, index=pd.date_range("2017-01-01", periods=m, freq="D"), columns=list("ABC") ) assert_value_equal(df, data)
def test_multiple_row(): # select second and third rows row23 = df.iloc[1:3] assert_value_equal(row23, [[4, 5, 6], [7, 8, 9]]) assert isinstance(row23, pd.DataFrame) # select second and third rows row23 = df.loc[2:3] assert_value_equal(row23, [[4, 5, 6], [7, 8, 9]]) assert isinstance(row23, pd.DataFrame)
def test_specified_row(): # select second row, use .iloc (Integer Index Only) row2 = df.iloc[1] # the seconds index assert_value_equal(row2, [4, 5, 6]) assert isinstance(row2, pd.Series) # select second row, use .loc (Value Index) row2 = df.loc[2] # the index value is 2 assert_value_equal(row2, [4, 5, 6]) assert isinstance(row2, pd.Series)
def test_head_tail(): data = [[1, 2], [3, 4]] df = pd.DataFrame(data) assert_value_equal(df.head(1), [ [1, 2], ]) assert_value_equal(df.tail(1), [ [3, 4], ])
def test_dropna(): data = [[1, 2, 3], [None, 5, 6], [7, 8, 9]] df = pd.DataFrame( data, index=[1, 2, 3], columns=list("ABC"), ) res = df.dropna(axis=0) # by row assert_value_equal(res, [[1, 2, 3], [7, 8, 9]]) """ A B C 1 1 2 3 3 7 8 9 """ res = df.dropna(axis=1) # by column assert_value_equal(res, [[2, 3], [5, 6], [8, 9]]) """
def test_sort_index(): data = [ [1, 1, 1], [1, 2, 2], [1, 3, 3], [2, 1, 4], [2, 2, 5], [2, 3, 6], [3, 1, 7], [3, 2, 8], [3, 3, 9], ] columns = ["k1", "k2", "v"] random.shuffle(data) df = pd.DataFrame(data, columns=columns) res = df.sort_values([ "k1", ]) assert_value_equal(res.k1, [1, 1, 1, 2, 2, 2, 3, 3, 3]) """ k1 k2 v 1 3 3 1 1 1 1 2 2 2 2 5 2 1 4 2 3 6 3 1 7 3 2 8 3 3 9 """ res = df.sort_values([ "k2", ]) assert_value_equal(res.k2, [1, 1, 1, 2, 2, 2, 3, 3, 3]) """ k1 k2 v 2 1 4 3 1 7 1 1 1 1 2 2 3 2 8 2 2 5 1 3 3 3 3 9 2 3 6 """ res = df.sort_values(["k1", "k2"]) assert_value_equal(res.k1, [1, 1, 1, 2, 2, 2, 3, 3, 3]) assert_value_equal(res.k2, [1, 2, 3, 1, 2, 3, 1, 2, 3]) """
def test_sort_column(): df = pd.DataFrame({ "b": [ 2, ], "c": [ 3, ], "a": [ 1, ] }) df.sort_index(axis=1) assert_value_equal(df.columns, ["a", "b", "c"]) assert_value_equal(df.a, [ 1, ]) assert_value_equal(df.b, [ 2, ]) assert_value_equal(df.c, [ 3, ])
def test(): df = pd.DataFrame([ [1, 2, None], [None, 5, 6], [7, 8, 9], ]) assert_value_equal(df.isnull(), [ [False, False, True], [True, False, False], [False, False, False], ]) # any na value assert df.isnull().values.any() == True # for each column, any na in each column assert_value_equal(df.isnull().any(axis=0), [True, False, True]) # for each row, any na in each row assert_value_equal(df.isnull().any(axis=1), [True, True, False])
def test(): """ 将两个Column相同的DataFrame连接起来,对于有相同的Index,使用 前面/后面 的哪一个。 """ df1 = pd.DataFrame({"v": [1, 1]}, index=[0, 1]) """ v 0 1 1 1 """ df2 = pd.DataFrame({"v": [2, 2]}, index=[1, 2]) """ v 1 2 2 2 """ df = pd.concat([df1, df2]) assert_value_equal(df.v, [1, 1, 2, 2]) """ v 0 1 1 1 1 2 2 2 """ # Remove Duplicate Index, keep df1's res = df[~df.index.duplicated(keep="first")] assert_value_equal(res.v, [1, 1, 2]) """ v 0 1 1 1 2 2 """ # Remove Duplicate Index, keep df2's res = df[~df.index.duplicated(keep="last")] assert_value_equal(res.v, [1, 2, 2]) """
def test_from_records(): data = [[1, 2], [3, 4]] df = pd.DataFrame(data, columns=["c1", "c2"], index=["r1", "r2"]) assert_value_equal(df, data)
def test_from_row_list(): data = [{"c1": 1, "c2": 2}, {"c1": 3, "c2": 4}] df = pd.DataFrame(data, columns=["c1", "c2"], index=["r1", "r2"]) assert_value_equal(df, [[1, 2], [3, 4]])
def test_from_dict(): data = {"c1": [1, 3], "c2": [2, 4]} df = pd.DataFrame(data, columns=["c1", "c2"], index=["r1", "r2"]) assert_value_equal(df, [[1, 2], [3, 4]])