def test_checks_fullcols_index_simple(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } # Test wrong index length try: df = ko.DataFrame(my_dict, columns=['Integer', 'Float', 'String', 'Boolean'], index=[1, 2, 3, 4]) caught_error_1 = False except ValueError: caught_error_1 = True # Test wrong index type try: df = ko.DataFrame(my_dict, columns=['Integer', 'Float', 'String', 'Boolean'], index={ 1: 1, 2: 2, 3: 3 }) caught_error_2 = False except TypeError: caught_error_2 = True # Test wrong index member type try: df = ko.DataFrame(my_dict, columns=['Integer', 'Float', 'String', 'Boolean'], index=['a', 'b', 'c']) caught_error_3 = False except TypeError: caught_error_3 = True assert caught_error_1 is True assert caught_error_2 is True assert caught_error_3 is True expected_index = np.array([1, 2, 3]) df = ko.DataFrame(my_dict, columns=['Integer', 'Float', 'String', 'Boolean'], index=[1, 2, 3]) assert np.array_equal(df.index, expected_index)
def test_checks_column_list_type_simple(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } try: ko.DataFrame(data=my_dict, columns=[1, 2, 3, 4]) # Good ko.DataFrame(data=my_dict, columns="One Two Three Four") # Bad error_catched = 0 except TypeError: error_catched = 1 assert error_catched == 1
def test_aggregation(): my_dict = { "int": np.array([1, 2, 3, 4, 5]), "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]), "str": np.array(["one", "two", "three", "four", "five"]), "bool": np.array([True, False, True, False, True]), 4: np.array([1, 2, 3, 4, 5]), True: np.array([6, 7, 8, 9, 10]), } df = ko.DataFrame(data=my_dict) expected_sum = [15, 16.5, 3, 15, 40] expected_min = [1, 1.1, False, 1, 6] expected_max = [5, 5.5, True, 5, 10] expected_mean = [3.0, 3.3, 0.6, 3.0, 8.0] expected_median = [3.0, 3.3, 1.0, 3.0, 8.0] expected_std = [ 1.4142135623730951, 1.5556349186104046, 0.48989794855663565, 1.4142135623730951, 1.4142135623730951, ] assert df.sum() == expected_sum assert df.min() == expected_min assert df.max() == expected_max assert df.mean() == expected_mean assert df.median() == expected_median assert df.std() == expected_std
def test_column_drop(drop_col, col_result): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), 4: np.array([1, 2, 3]), } df = ko.DataFrame(my_dict) original_columns = ['int', 'float', 'str', 'bool', '4'] # This is to make sure constructor works fine assert df.columns == original_columns df2 = df.drop(drop_col, inplace=False) # Rename Method here for col in df2.columns: # To make sure they are all strings assert type(col) == str assert list(df2.df) == col_result # Chgeck keys in self.df assert df2.columns == col_result # Check expected results assert df.columns == original_columns df.drop(drop_col, inplace=True) # Rename Method here for col in df.columns: # To make sure they are all strings assert type(col) == str assert df.columns == col_result # Check expected results
def test_access_underlying_numpy_array(): matrix = np.array([[1, 2, 3], [7, 3, 6], [7, 7, 9]]) df = ie.DataFrame(matrix) output = df[0:2, 1:2] expected_out = np.array([[2], [3]]) assert (output == expected_out).all()
def test_checks_data_dict_simple(): try: ko.DataFrame(data=[1, 2, 3, 4, 5]) error_catched = 0 except TypeError: error_catched = 1 assert error_catched == 1
def test_df_indexing_col(col, expected_output): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } df = ko.DataFrame(data=my_dict) result = df.loc(col) assert np.array_equal(result, expected_output)
def test_df_indexing_bracket_col_int_ind(col, row, expected_output): my_dict = { 'int': np.array([1, 2, 3, 4]), 'float': np.array([1.1, 2.2, 3.3, 4.4]), 'str': np.array(['one', 'two', 'three', 'four']), 'bool': np.array([True, False, True, False]), } df = ko.DataFrame(data=my_dict) result = df[col][row] assert np.array_equal(result, expected_output)
def test_access_col_name_dict(): dictionary1 = { "c1": np.array(["a", "b", "c"]), "c2": np.array([1, 3, 5]), "c3": np.array([2, 7, 9]), } df1 = ie.DataFrame(dictionary1) output1 = df1["c1"] expected_out1 = np.array(["a", "b", "c"]) assert (output1 == expected_out1).all()
def test_simple_dataframe_index_column(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } df = ko.DataFrame(data=my_dict) expected_index = np.array([0, 1, 2]) expected_columns = ['int', 'float', 'str', 'bool'] assert np.array_equal(df.index, expected_index) assert np.array_equal(df.columns, expected_columns)
def test_length_check_simple(): my_dict_bad = { 'int': np.array([1, 2, 3, 4]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } try: ko.DataFrame(data=my_dict_bad) error_catched = 0 except ValueError: error_catched = 1 assert error_catched == 1
def test_checks_loc_errors_simple(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } df = ko.DataFrame(my_dict) assert np.array_equal(df.loc(0), np.array([1, 2, 3])) assert df.loc(0, 0) == 1 assert df.loc(3, 2) # Test to check that it catches bad column parameter try: df.loc(1.1) caught_error_1 = False except TypeError: caught_error_1 = True finally: assert caught_error_1 # Test to check that it catches bad index parameter try: df.loc('int', 1.1) caught_error_2 = False except TypeError: caught_error_2 = True finally: assert caught_error_2 # Test to check that it catches bad column with index try: df.loc(2.23, 1.1) caught_error_3 = False except TypeError: caught_error_3 = True finally: assert caught_error_3 # Test to check that it catches bad column with good index try: df.loc(2.2, 1) caught_error_4 = False except TypeError: caught_error_4 = True finally: assert caught_error_4
def test_len(): my_dict = { "int": np.array([1, 2, 3, 4, 5]), "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]), "str": np.array(["one", "two", "three", "four", "five"]), "bool": np.array([True, False, True, False, True]), 4: np.array([1, 2, 3, 4, 5]), True: np.array([6, 7, 8, 9, 10]), } df = ko.DataFrame(data=my_dict) expected_len = 5 assert len(df) == expected_len
def test_set_items(): my_dict = { "int": np.array([1, 2, 3, 4, 5]), "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]), "str": np.array(["one", "two", "three", "four", "five"]), "bool": np.array([True, False, True, False, True]), 4: np.array([1, 2, 3, 4, 5]), True: np.array([6, 7, 8, 9, 10]), } new_dict = { "int": np.array([20, 21, 22, 23, 24]), "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]), "str": np.array(["one", "two", "three", "four", "five"]), "bool": np.array([True, False, True, False, True]), 4: np.array([1, 2, 3, 4, 5]), True: np.array([6, 7, 8, 9, 10]), "newcol": np.array(['a', 'b', 'c', 'd', 'e']), "newcol2": np.array([10, 11, 12, 13, 14]), } df = ko.DataFrame(data=my_dict) new_df = ko.DataFrame(data=new_dict) df['int'] = np.array([20, 21, 22, 23, 24]) df['newcol'] = np.array(['a', 'b', 'c', 'd', 'e']) df['newcol2'] = np.array([10, 11, 12, 13, 14]) # This is to check that the column was updated assert (df['int'] == new_df['int']).all() # This is to check that a non existant column was added assert (df['newcol'] == new_df['newcol']).all() # This is to make sure that df.columns was updated assert (df.columns == new_df.columns) # This is to check that all the items are the same for i in range(len(df.columns)): assert (df[df.columns[i]] == new_df[df.columns[i]]).all()
def test_checks_frame_print_simple(): my_dict = { 'int': [1, 2, 3], 'float': [1.1, 2.2, 3.3], 'string': ['one', 'two', 'three'], } df = pd.DataFrame(my_dict) expected_result = 'int float string \n'\ 'int32 float64 <U5 \n'\ '\n'\ '1 1.1 one \n'\ '2 2.2 two \n'\ '3 3.3 three \n' assert df._frame() == expected_result
def test_col_rename_errors(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), 4: np.array([1, 2, 3]), } df = ko.DataFrame(my_dict) # Test that new col is a string try: df.rename('int', 3, inplace=True) caught_error_1 = False except TypeError: caught_error_1 = True finally: assert caught_error_1 # Test that old col is a string try: df.rename(3, 'int', inplace=True) caught_error_2 = False except TypeError: caught_error_2 = True finally: assert caught_error_2 # Test that inplace is Boolean try: df.rename('int', 'Integer', inplace="Seven") caught_error_3 = False except TypeError: caught_error_3 = True finally: assert caught_error_3 # Test that old_col is in the DataFrame try: df.rename('hello', 'goodbye', inplace=True) caught_error_4 = False except ValueError: caught_error_4 = True finally: assert caught_error_4
def test_checks_get_row_errors_simple(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } df = ko.DataFrame(my_dict) assert df.get_row(0) == [1, 1.1, 'one', True] try: df.get_row('a') caught_error_1 = False except TypeError: caught_error_1 = True assert caught_error_1
def test_column_names_types_reassignment(col_index, col_name, col_result, name_type): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), 4: np.array([1, 2, 3]), } df = ko.DataFrame(my_dict) assert df.columns == ['int', 'float', 'str', 'bool', '4'] assert df.columns[col_index] == col_name df.columns[col_index] = col_result assert df.columns[col_index] == col_result assert type(df.columns[col_index]) == name_type
def test_get_row(): my_dict = { "int": np.array([1, 2, 3, 4, 5]), "float": np.array([1.1, 2.2, 3.3, 4.4, 5.5]), "str": np.array(["one", "two", "three", "four", "five"]), "bool": np.array([True, False, True, False, True]), 4: np.array([1, 2, 3, 4, 5]), True: np.array([6, 7, 8, 9, 10]), } df = ko.DataFrame(data=my_dict) expected_row0 = [1, 1.1, "one", True, 1, 6] expected_row1 = [2, 2.2, "two", False, 2, 7] expected_row2 = [3, 3.3, "three", True, 3, 8] expected_row3 = [4, 4.4, "four", False, 4, 9] expected_row4 = [5, 5.5, "five", True, 5, 10] assert df.get_row(0) == expected_row0 assert df.get_row(1) == expected_row1 assert df.get_row(2) == expected_row2 assert df.get_row(3) == expected_row3 assert df.get_row(4) == expected_row4
def test_col_drop_errors(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), 4: np.array([1, 2, 3]), } df = ko.DataFrame(my_dict) # Test that the drop_col is an string try: df.drop(3, inplace=True) caught_error_1 = False except TypeError: caught_error_1 = True finally: assert caught_error_1 # Test that the inplace is Boolean try: df.drop('int', inplace="Seven") caught_error_2 = False except TypeError: caught_error_2 = True finally: assert caught_error_2 # Test that the drop_col is part of the DataFrame try: df.drop('hello', inplace=True) caught_error_3 = False except ValueError: caught_error_3 = True finally: assert caught_error_3
def test_checks_set_item_errors_simple(): my_dict = { 'int': np.array([1, 2, 3]), 'float': np.array([1.1, 2.2, 3.3]), 'str': np.array(['one', 'two', 'three']), 'bool': np.array([True, False, True]), } df = ko.DataFrame(my_dict) try: df['int'] = [1, 2, 3] caught_error_1 = 0 except TypeError: caught_error_1 = 1 try: df['int'] = np.array([1, 2, 3, 4, 5]) caught_error_2 = 0 except ValueError: caught_error_2 = 1 assert caught_error_1 == 1 assert caught_error_2 == 1
import numpy as np import ie_pandas as ie import pytest @pytest.mark.parametrize( "funct_input,expected", [ (ie.DataFrame([[1, 2, 6], [3, 4, 9], [1, 4, 10], [7, 3, 4]]), [7, 4, 10]), (ie.DataFrame([[1, 2, 6], [3.0, 4, 9], [1, 4, 10.0], [7, 3, 4]]), [7, 4, 10.0]), (ie.DataFrame([[1, 2, 6], ["ABC", 4, 9], [1, 4, 10], [7, 3, 4]]), [4, 10]), ], ) def test_max(funct_input, expected): funct_output = funct_input.max() assert expected == funct_output
# Test mean import numpy as np import ie_pandas as ie import pytest @pytest.mark.parametrize( "input, expected_out", [ ( ie.DataFrame( np.array([[1, 2, 3], [7, 3, 6], [7, 7, 9]]), ["c1", "c2", "c3"], ["r1", "r2", "r3"], ), [5, 4, 6], ), ( ie.DataFrame({ "c1": np.array(["a", "b", "c"]), "c2": np.array([1, 3, 5]), "c3": np.array([2, 7, 9]), }), [3, 6], ), (ie.DataFrame({ "c1": [1, 2, 3], "c2": [6, 3, 6], "c3": [7, 7, 1] }), [2, 5, 5]), (
def test_access_without_col_name(): df2 = ie.DataFrame([[1, 2, 3], [7, 3, 6], [7, 7, 9]]) output2 = df2["0"] expected_out2 = np.array([1, 7, 7]) assert (output2 == expected_out2).all()
# Test index import numpy as np import ie_pandas as ie import pytest @pytest.mark.parametrize( "input, expected_out", [ (ie.DataFrame(np.array([[1, 2, 3], [7, 3, 6], [7, 7, 9]])), [0, 1, 2]), ( ie.DataFrame({ "c1": np.array(["a", "b", "c"]), "c2": np.array([1, 3, 5]), "c3": np.array([2, 7, 9]), }), [0, 1, 2], ), ], ) def test_index_created_when_not_specified(input, expected_out): output = input.index assert output == expected_out @pytest.mark.parametrize( "data,given_index,expected_index", [ ({ "c0": [1],
def test_correct_row_index_names(data, given_index, expected_index): df = ie.DataFrame(data, index=given_index) row_index = df.index assert expected_index == row_index
# Test median import numpy as np import ie_pandas as ie import pytest @pytest.mark.parametrize( "input, expected_out", [ ( ie.DataFrame( np.array([[1, 2, 3], [2, 3, 6], [3, 7, 9]]), ["c1", "c2", "c3"], ["r1", "r2", "r3"], ), [2, 3, 6], ), ( ie.DataFrame( { "c1": np.array(["a", "b", "c"]), "c2": np.array([1, 3, 5]), "c3": np.array([2, 7, 9]), } ), [3, 7], ), ( ie.DataFrame( { "c1": np.array(["a", 1, 2]),