def test_checks_fullcols_index_simple():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }

    # Test wrong index length
    try:
        df = ko.DataFrame(my_dict,
                          columns=['Integer', 'Float', 'String', 'Boolean'],
                          index=[1, 2, 3, 4])
        caught_error_1 = False
    except ValueError:
        caught_error_1 = True

    # Test wrong index type
    try:
        df = ko.DataFrame(my_dict,
                          columns=['Integer', 'Float', 'String', 'Boolean'],
                          index={
                              1: 1,
                              2: 2,
                              3: 3
                          })
        caught_error_2 = False
    except TypeError:
        caught_error_2 = True

    # Test wrong index member type
    try:
        df = ko.DataFrame(my_dict,
                          columns=['Integer', 'Float', 'String', 'Boolean'],
                          index=['a', 'b', 'c'])
        caught_error_3 = False
    except TypeError:
        caught_error_3 = True

    assert caught_error_1 is True
    assert caught_error_2 is True
    assert caught_error_3 is True

    expected_index = np.array([1, 2, 3])

    df = ko.DataFrame(my_dict,
                      columns=['Integer', 'Float', 'String', 'Boolean'],
                      index=[1, 2, 3])

    assert np.array_equal(df.index, expected_index)
def test_checks_column_list_type_simple():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }
    try:
        ko.DataFrame(data=my_dict, columns=[1, 2, 3, 4])  # Good
        ko.DataFrame(data=my_dict, columns="One Two Three Four")  # Bad
        error_catched = 0
    except TypeError:
        error_catched = 1

    assert error_catched == 1
def test_aggregation():
    my_dict = {
        "int": np.array([1, 2, 3, 4, 5]),
        "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]),
        "str": np.array(["one", "two", "three", "four", "five"]),
        "bool": np.array([True, False, True, False, True]),
        4: np.array([1, 2, 3, 4, 5]),
        True: np.array([6, 7, 8, 9, 10]),
    }

    df = ko.DataFrame(data=my_dict)

    expected_sum = [15, 16.5, 3, 15, 40]
    expected_min = [1, 1.1, False, 1, 6]
    expected_max = [5, 5.5, True, 5, 10]
    expected_mean = [3.0, 3.3, 0.6, 3.0, 8.0]
    expected_median = [3.0, 3.3, 1.0, 3.0, 8.0]
    expected_std = [
        1.4142135623730951,
        1.5556349186104046,
        0.48989794855663565,
        1.4142135623730951,
        1.4142135623730951,
    ]

    assert df.sum() == expected_sum
    assert df.min() == expected_min
    assert df.max() == expected_max
    assert df.mean() == expected_mean
    assert df.median() == expected_median
    assert df.std() == expected_std
def test_column_drop(drop_col, col_result):
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
        4: np.array([1, 2, 3]),
    }

    df = ko.DataFrame(my_dict)
    original_columns = ['int', 'float', 'str', 'bool', '4']
    # This is to make sure constructor works fine
    assert df.columns == original_columns

    df2 = df.drop(drop_col, inplace=False)  # Rename Method here
    for col in df2.columns:  # To make sure they are all strings
        assert type(col) == str
    assert list(df2.df) == col_result  # Chgeck keys in self.df
    assert df2.columns == col_result  # Check expected results
    assert df.columns == original_columns

    df.drop(drop_col, inplace=True)  # Rename Method here
    for col in df.columns:  # To make sure they are all strings
        assert type(col) == str
    assert df.columns == col_result  # Check expected results
Exemple #5
0
def test_access_underlying_numpy_array():
    matrix = np.array([[1, 2, 3], [7, 3, 6], [7, 7, 9]])
    df = ie.DataFrame(matrix)

    output = df[0:2, 1:2]
    expected_out = np.array([[2], [3]])

    assert (output == expected_out).all()
Exemple #6
0
def test_checks_data_dict_simple():

    try:
        ko.DataFrame(data=[1, 2, 3, 4, 5])
        error_catched = 0
    except TypeError:
        error_catched = 1

    assert error_catched == 1
def test_df_indexing_col(col, expected_output):
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }
    df = ko.DataFrame(data=my_dict)

    result = df.loc(col)

    assert np.array_equal(result, expected_output)
def test_df_indexing_bracket_col_int_ind(col, row, expected_output):
    my_dict = {
        'int': np.array([1, 2, 3, 4]),
        'float': np.array([1.1, 2.2, 3.3, 4.4]),
        'str': np.array(['one', 'two', 'three', 'four']),
        'bool': np.array([True, False, True, False]),
    }
    df = ko.DataFrame(data=my_dict)

    result = df[col][row]

    assert np.array_equal(result, expected_output)
Exemple #9
0
def test_access_col_name_dict():
    dictionary1 = {
        "c1": np.array(["a", "b", "c"]),
        "c2": np.array([1, 3, 5]),
        "c3": np.array([2, 7, 9]),
    }
    df1 = ie.DataFrame(dictionary1)

    output1 = df1["c1"]
    expected_out1 = np.array(["a", "b", "c"])

    assert (output1 == expected_out1).all()
def test_simple_dataframe_index_column():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }
    df = ko.DataFrame(data=my_dict)

    expected_index = np.array([0, 1, 2])
    expected_columns = ['int', 'float', 'str', 'bool']

    assert np.array_equal(df.index, expected_index)
    assert np.array_equal(df.columns, expected_columns)
Exemple #11
0
def test_length_check_simple():
    my_dict_bad = {
        'int': np.array([1, 2, 3, 4]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }
    try:
        ko.DataFrame(data=my_dict_bad)
        error_catched = 0
    except ValueError:
        error_catched = 1

    assert error_catched == 1
Exemple #12
0
def test_checks_loc_errors_simple():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }

    df = ko.DataFrame(my_dict)

    assert np.array_equal(df.loc(0), np.array([1, 2, 3]))
    assert df.loc(0, 0) == 1
    assert df.loc(3, 2)

    # Test to check that it catches bad column parameter
    try:
        df.loc(1.1)
        caught_error_1 = False
    except TypeError:
        caught_error_1 = True
    finally:
        assert caught_error_1

    # Test to check that it catches bad index parameter
    try:
        df.loc('int', 1.1)
        caught_error_2 = False
    except TypeError:
        caught_error_2 = True
    finally:
        assert caught_error_2

    # Test to check that it catches bad column with index
    try:
        df.loc(2.23, 1.1)
        caught_error_3 = False
    except TypeError:
        caught_error_3 = True
    finally:
        assert caught_error_3

    # Test to check that it catches bad column with good index
    try:
        df.loc(2.2, 1)
        caught_error_4 = False
    except TypeError:
        caught_error_4 = True
    finally:
        assert caught_error_4
Exemple #13
0
def test_len():
    my_dict = {
        "int": np.array([1, 2, 3, 4, 5]),
        "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]),
        "str": np.array(["one", "two", "three", "four", "five"]),
        "bool": np.array([True, False, True, False, True]),
        4: np.array([1, 2, 3, 4, 5]),
        True: np.array([6, 7, 8, 9, 10]),
    }

    df = ko.DataFrame(data=my_dict)

    expected_len = 5

    assert len(df) == expected_len
Exemple #14
0
def test_set_items():
    my_dict = {
        "int": np.array([1, 2, 3, 4, 5]),
        "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]),
        "str": np.array(["one", "two", "three", "four", "five"]),
        "bool": np.array([True, False, True, False, True]),
        4: np.array([1, 2, 3, 4, 5]),
        True: np.array([6, 7, 8, 9, 10]),
    }
    new_dict = {
        "int": np.array([20, 21, 22, 23, 24]),
        "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]),
        "str": np.array(["one", "two", "three", "four", "five"]),
        "bool": np.array([True, False, True, False, True]),
        4: np.array([1, 2, 3, 4, 5]),
        True: np.array([6, 7, 8, 9, 10]),
        "newcol": np.array(['a', 'b', 'c', 'd', 'e']),
        "newcol2": np.array([10, 11, 12, 13, 14]),
    }

    df = ko.DataFrame(data=my_dict)
    new_df = ko.DataFrame(data=new_dict)

    df['int'] = np.array([20, 21, 22, 23, 24])
    df['newcol'] = np.array(['a', 'b', 'c', 'd', 'e'])
    df['newcol2'] = np.array([10, 11, 12, 13, 14])

    # This is to check that the column was updated
    assert (df['int'] == new_df['int']).all()
    # This is to check that a non existant column was added
    assert (df['newcol'] == new_df['newcol']).all()
    # This is to make sure that df.columns was updated
    assert (df.columns == new_df.columns)
    # This is to check that all the items are the same
    for i in range(len(df.columns)):
        assert (df[df.columns[i]] == new_df[df.columns[i]]).all()
Exemple #15
0
def test_checks_frame_print_simple():
    my_dict = {
        'int': [1, 2, 3],
        'float': [1.1, 2.2, 3.3],
        'string': ['one', 'two', 'three'],
    }

    df = pd.DataFrame(my_dict)

    expected_result = 'int            float          string         \n'\
        'int32          float64        <U5            \n'\
        '\n'\
        '1              1.1            one            \n'\
        '2              2.2            two            \n'\
        '3              3.3            three          \n'

    assert df._frame() == expected_result
def test_col_rename_errors():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
        4: np.array([1, 2, 3]),
    }

    df = ko.DataFrame(my_dict)

    # Test that new col is a string
    try:
        df.rename('int', 3, inplace=True)
        caught_error_1 = False
    except TypeError:
        caught_error_1 = True
    finally:
        assert caught_error_1

    # Test that old col is a string
    try:
        df.rename(3, 'int', inplace=True)
        caught_error_2 = False
    except TypeError:
        caught_error_2 = True
    finally:
        assert caught_error_2

    # Test that inplace is Boolean
    try:
        df.rename('int', 'Integer', inplace="Seven")
        caught_error_3 = False
    except TypeError:
        caught_error_3 = True
    finally:
        assert caught_error_3

    # Test that old_col is in the DataFrame
    try:
        df.rename('hello', 'goodbye', inplace=True)
        caught_error_4 = False
    except ValueError:
        caught_error_4 = True
    finally:
        assert caught_error_4
Exemple #17
0
def test_checks_get_row_errors_simple():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }

    df = ko.DataFrame(my_dict)

    assert df.get_row(0) == [1, 1.1, 'one', True]

    try:
        df.get_row('a')
        caught_error_1 = False
    except TypeError:
        caught_error_1 = True

    assert caught_error_1
def test_column_names_types_reassignment(col_index,
                                         col_name,
                                         col_result,
                                         name_type):
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
        4: np.array([1, 2, 3]),
    }

    df = ko.DataFrame(my_dict)

    assert df.columns == ['int', 'float', 'str', 'bool', '4']

    assert df.columns[col_index] == col_name

    df.columns[col_index] = col_result
    assert df.columns[col_index] == col_result
    assert type(df.columns[col_index]) == name_type
Exemple #19
0
def test_get_row():
    my_dict = {
        "int": np.array([1, 2, 3, 4, 5]),
        "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]),
        "str": np.array(["one", "two", "three", "four", "five"]),
        "bool": np.array([True, False, True, False, True]),
        4: np.array([1, 2, 3, 4, 5]),
        True: np.array([6, 7, 8, 9, 10]),
    }

    df = ko.DataFrame(data=my_dict)

    expected_row0 = [1, 1.1, "one", True, 1, 6]
    expected_row1 = [2, 2.2, "two", False, 2, 7]
    expected_row2 = [3, 3.3, "three", True, 3, 8]
    expected_row3 = [4, 4.4, "four", False, 4, 9]
    expected_row4 = [5, 5.5, "five", True, 5, 10]

    assert df.get_row(0) == expected_row0
    assert df.get_row(1) == expected_row1
    assert df.get_row(2) == expected_row2
    assert df.get_row(3) == expected_row3
    assert df.get_row(4) == expected_row4
def test_col_drop_errors():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
        4: np.array([1, 2, 3]),
    }

    df = ko.DataFrame(my_dict)

    # Test that the drop_col is an string
    try:
        df.drop(3, inplace=True)
        caught_error_1 = False
    except TypeError:
        caught_error_1 = True
    finally:
        assert caught_error_1

    # Test that the inplace is Boolean
    try:
        df.drop('int', inplace="Seven")
        caught_error_2 = False
    except TypeError:
        caught_error_2 = True
    finally:
        assert caught_error_2
    # Test that the drop_col is part of the DataFrame
    try:
        df.drop('hello', inplace=True)
        caught_error_3 = False
    except ValueError:
        caught_error_3 = True
    finally:
        assert caught_error_3
Exemple #21
0
def test_checks_set_item_errors_simple():
    my_dict = {
        'int': np.array([1, 2, 3]),
        'float': np.array([1.1, 2.2, 3.3]),
        'str': np.array(['one', 'two', 'three']),
        'bool': np.array([True, False, True]),
    }

    df = ko.DataFrame(my_dict)

    try:
        df['int'] = [1, 2, 3]
        caught_error_1 = 0
    except TypeError:
        caught_error_1 = 1

    try:
        df['int'] = np.array([1, 2, 3, 4, 5])
        caught_error_2 = 0
    except ValueError:
        caught_error_2 = 1

    assert caught_error_1 == 1
    assert caught_error_2 == 1
Exemple #22
0
import numpy as np
import ie_pandas as ie
import pytest


@pytest.mark.parametrize(
    "funct_input,expected",
    [
        (ie.DataFrame([[1, 2, 6], [3, 4, 9], [1, 4, 10], [7, 3, 4]]), [7, 4, 10]),
        (ie.DataFrame([[1, 2, 6], [3.0, 4, 9], [1, 4, 10.0], [7, 3, 4]]), [7, 4, 10.0]),
        (ie.DataFrame([[1, 2, 6], ["ABC", 4, 9], [1, 4, 10], [7, 3, 4]]), [4, 10]),
    ],
)
def test_max(funct_input, expected):
    funct_output = funct_input.max()
    assert expected == funct_output
Exemple #23
0
# Test mean
import numpy as np
import ie_pandas as ie
import pytest


@pytest.mark.parametrize(
    "input, expected_out",
    [
        (
            ie.DataFrame(
                np.array([[1, 2, 3], [7, 3, 6], [7, 7, 9]]),
                ["c1", "c2", "c3"],
                ["r1", "r2", "r3"],
            ),
            [5, 4, 6],
        ),
        (
            ie.DataFrame({
                "c1": np.array(["a", "b", "c"]),
                "c2": np.array([1, 3, 5]),
                "c3": np.array([2, 7, 9]),
            }),
            [3, 6],
        ),
        (ie.DataFrame({
            "c1": [1, 2, 3],
            "c2": [6, 3, 6],
            "c3": [7, 7, 1]
        }), [2, 5, 5]),
        (
Exemple #24
0
def test_access_without_col_name():
    df2 = ie.DataFrame([[1, 2, 3], [7, 3, 6], [7, 7, 9]])

    output2 = df2["0"]
    expected_out2 = np.array([1, 7, 7])
    assert (output2 == expected_out2).all()
Exemple #25
0
# Test index
import numpy as np
import ie_pandas as ie
import pytest


@pytest.mark.parametrize(
    "input, expected_out",
    [
        (ie.DataFrame(np.array([[1, 2, 3], [7, 3, 6], [7, 7, 9]])), [0, 1, 2]),
        (
            ie.DataFrame({
                "c1": np.array(["a", "b", "c"]),
                "c2": np.array([1, 3, 5]),
                "c3": np.array([2, 7, 9]),
            }),
            [0, 1, 2],
        ),
    ],
)
def test_index_created_when_not_specified(input, expected_out):

    output = input.index
    assert output == expected_out


@pytest.mark.parametrize(
    "data,given_index,expected_index",
    [
        ({
            "c0": [1],
Exemple #26
0
def test_correct_row_index_names(data, given_index, expected_index):
    df = ie.DataFrame(data, index=given_index)

    row_index = df.index
    assert expected_index == row_index
Exemple #27
0
# Test median
import numpy as np
import ie_pandas as ie
import pytest


@pytest.mark.parametrize(
    "input, expected_out",
    [
        (
            ie.DataFrame(
                np.array([[1, 2, 3], [2, 3, 6], [3, 7, 9]]),
                ["c1", "c2", "c3"],
                ["r1", "r2", "r3"],
            ),
            [2, 3, 6],
        ),
        (
            ie.DataFrame(
                {
                    "c1": np.array(["a", "b", "c"]),
                    "c2": np.array([1, 3, 5]),
                    "c3": np.array([2, 7, 9]),
                }
            ),
            [3, 7],
        ),
        (
            ie.DataFrame(
                {
                    "c1": np.array(["a", 1, 2]),