def test():
    """
    Update操作是,将新DataFrame中凡是跟原DataFrame的Index和Column有重复的位置,只要
    值不是 np.nan, 就用它取代原DataFrame中的值。

    **此操作只考虑原DataFrame中有的Index和Column**。
    """
    df1 = pd.DataFrame({"v": [1, 1]}, index=[0, 1])
    """
        v
    0   1
    1   1
    """

    df2 = pd.DataFrame({"v": [2, 2]}, index=[1, 2])
    """
        v
    1   2
    2   2
    """

    df = pd.DataFrame({"v": [0, 0, 0]}, index=[0, 1, 2])
    df.update(df1)
    assert_value_equal(df.v, [1, 1, 0])
    """
        v
    0   1
    1   1
    2   0
    """
    df.update(df2)
    assert_value_equal(df.v, [1, 2, 2])
    """
def test_sort_index():
    df = pd.DataFrame(
        {"v": [2, 3, 1]},
        index=[2, 3, 1],
    )
    df = df.sort_index(axis=0)
    assert_value_equal(df.index, [1, 2, 3])
    assert_value_equal(df.v, [1, 2, 3])
def test_timeseries():
    m, n = 2, 3
    data = [[1, 2, 3], [4, 5, 6]]
    df = pd.DataFrame(
        data,
        index=pd.date_range("2017-01-01", periods=m, freq="D"),
        columns=list("ABC")
    )
    assert_value_equal(df, data)
Example #4
0
def test_multiple_row():
    # select second and third rows
    row23 = df.iloc[1:3]
    assert_value_equal(row23, [[4, 5, 6], [7, 8, 9]])
    assert isinstance(row23, pd.DataFrame)

    # select second and third rows
    row23 = df.loc[2:3]
    assert_value_equal(row23, [[4, 5, 6], [7, 8, 9]])
    assert isinstance(row23, pd.DataFrame)
Example #5
0
def test_specified_row():
    # select second row, use .iloc (Integer Index Only)
    row2 = df.iloc[1]  # the seconds index
    assert_value_equal(row2, [4, 5, 6])
    assert isinstance(row2, pd.Series)

    # select second row, use .loc (Value Index)
    row2 = df.loc[2]  # the index value is 2
    assert_value_equal(row2, [4, 5, 6])
    assert isinstance(row2, pd.Series)
Example #6
0
def test_head_tail():
    data = [[1, 2], [3, 4]]
    df = pd.DataFrame(data)

    assert_value_equal(df.head(1), [
        [1, 2],
    ])
    assert_value_equal(df.tail(1), [
        [3, 4],
    ])
def test_dropna():
    data = [[1, 2, 3], [None, 5, 6], [7, 8, 9]]
    df = pd.DataFrame(
        data,
        index=[1, 2, 3],
        columns=list("ABC"),
    )
    res = df.dropna(axis=0)  # by row
    assert_value_equal(res, [[1, 2, 3], [7, 8, 9]])
    """
        A   B   C
    1   1   2   3
    3   7   8   9
    """

    res = df.dropna(axis=1)  # by column
    assert_value_equal(res, [[2, 3], [5, 6], [8, 9]])
    """
def test_sort_index():
    data = [
        [1, 1, 1],
        [1, 2, 2],
        [1, 3, 3],
        [2, 1, 4],
        [2, 2, 5],
        [2, 3, 6],
        [3, 1, 7],
        [3, 2, 8],
        [3, 3, 9],
    ]
    columns = ["k1", "k2", "v"]
    random.shuffle(data)
    df = pd.DataFrame(data, columns=columns)

    res = df.sort_values([
        "k1",
    ])
    assert_value_equal(res.k1, [1, 1, 1, 2, 2, 2, 3, 3, 3])
    """
    k1  k2  v
    1   3   3
    1   1   1
    1   2   2
    2   2   5
    2   1   4
    2   3   6
    3   1   7
    3   2   8
    3   3   9
    """

    res = df.sort_values([
        "k2",
    ])
    assert_value_equal(res.k2, [1, 1, 1, 2, 2, 2, 3, 3, 3])
    """
    k1  k2  v
    2   1   4
    3   1   7
    1   1   1
    1   2   2
    3   2   8
    2   2   5
    1   3   3
    3   3   9
    2   3   6
    """

    res = df.sort_values(["k1", "k2"])
    assert_value_equal(res.k1, [1, 1, 1, 2, 2, 2, 3, 3, 3])
    assert_value_equal(res.k2, [1, 2, 3, 1, 2, 3, 1, 2, 3])
    """
def test_sort_column():
    df = pd.DataFrame({
        "b": [
            2,
        ],
        "c": [
            3,
        ],
        "a": [
            1,
        ]
    })
    df.sort_index(axis=1)
    assert_value_equal(df.columns, ["a", "b", "c"])
    assert_value_equal(df.a, [
        1,
    ])
    assert_value_equal(df.b, [
        2,
    ])
    assert_value_equal(df.c, [
        3,
    ])
Example #10
0
def test():
    df = pd.DataFrame([
        [1, 2, None],
        [None, 5, 6],
        [7, 8, 9],
    ])
    assert_value_equal(df.isnull(), [
        [False, False, True],
        [True, False, False],
        [False, False, False],
    ])
    # any na value
    assert df.isnull().values.any() == True

    # for each column, any na in each column
    assert_value_equal(df.isnull().any(axis=0), [True, False, True])

    # for each row, any na in each row
    assert_value_equal(df.isnull().any(axis=1), [True, True, False])
def test():
    """
    将两个Column相同的DataFrame连接起来,对于有相同的Index,使用 前面/后面 的哪一个。
    """
    df1 = pd.DataFrame({"v": [1, 1]}, index=[0, 1])
    """
        v
    0   1
    1   1
    """

    df2 = pd.DataFrame({"v": [2, 2]}, index=[1, 2])
    """
        v
    1   2
    2   2
    """

    df = pd.concat([df1, df2])
    assert_value_equal(df.v, [1, 1, 2, 2])
    """
       v
    0  1
    1  1
    1  2
    2  2
    """

    # Remove Duplicate Index, keep df1's
    res = df[~df.index.duplicated(keep="first")]
    assert_value_equal(res.v, [1, 1, 2])
    """
       v
    0  1
    1  1
    2  2
    """

    # Remove Duplicate Index, keep df2's
    res = df[~df.index.duplicated(keep="last")]
    assert_value_equal(res.v, [1, 2, 2])
    """
def test_from_records():
    data = [[1, 2], [3, 4]]
    df = pd.DataFrame(data, columns=["c1", "c2"], index=["r1", "r2"])
    assert_value_equal(df, data)
def test_from_row_list():
    data = [{"c1": 1, "c2": 2}, {"c1": 3, "c2": 4}]
    df = pd.DataFrame(data, columns=["c1", "c2"], index=["r1", "r2"])
    assert_value_equal(df, [[1, 2], [3, 4]])
def test_from_dict():
    data = {"c1": [1, 3], "c2": [2, 4]}
    df = pd.DataFrame(data, columns=["c1", "c2"], index=["r1", "r2"])
    assert_value_equal(df, [[1, 2], [3, 4]])