def test_sort_index(): # test on list df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, columns=['a', 'b'], index=[10, 8, 9], sort=False) df.sort_index() assert isinstance(df.index, list) assert_frame_equal( df, rc.DataFrame({ 'a': [2, 3, 1], 'b': [5, 6, 4] }, columns=['a', 'b'], index=[8, 9, 10], sort=False)) # fails on mixed type columns df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, columns=['a', 'b'], index=[10, 'a', 9]) with pytest.raises(TypeError): df.sort_index()
def test_delete_columns(): df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9] }, columns=['a', 'b', 'c']) # cannot delete bad column with pytest.raises(ValueError): df.delete_columns(['bad', 'a']) df.delete_columns(['a', 'c']) assert_frame_equal(df, rc.DataFrame({'b': [4, 5, 6]})) assert df.index == [0, 1, 2] # insert some data back in df[1, 'a'] = 77 assert df.data == [[4, 5, 6], [None, 77, None]] df.delete_columns(['b', 'a']) assert_frame_equal(df, rc.DataFrame()) assert df.columns == [] assert df.index == [] # insert some data back in, fresh columns and index df[1, 'e'] = 77 assert df.data == [[77]]
def test_index(): actual = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=['a', 'b', 'c'], columns=['b', 'a']) result = actual.index assert result == ['a', 'b', 'c'] assert isinstance(result, list) # test that a copy is returned result.append('bad') assert actual.index == ['a', 'b', 'c'] actual.index = [9, 10, 11] assert actual.index == [9, 10, 11] assert isinstance(result, list) # index too long with pytest.raises(ValueError): actual.index = [1, 3, 4, 5, 6] assert actual.index_name == 'index' actual.index_name = 'new name' assert actual.index_name == 'new name' actual = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=['a', 'b', 'c'], index_name='letters') assert actual.index_name == 'letters'
def test_get_locations(): df = rc.DataFrame({ 'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8] }, index=[2, 4, 6, 8]) # multi row, multi columns assert_frame_equal(df.get_locations([0, 2]), rc.DataFrame({ 'a': [1, 3], 'b': [5, 7] }, index=[2, 6])) # multiple rows, single columns assert_frame_equal(df.get_locations([1, 3], 'a'), rc.DataFrame({'a': [2, 4]}, index=[4, 8])) assert df.get_locations([0, 2], 'b', as_list=True) == [5, 7] # single row, multiple columns assert_frame_equal(df.get_locations([2]), rc.DataFrame({ 'a': [3], 'b': [7] }, index=[6]))
def test_get_data_mutability(): # the .data method only returns a shallow copy, and changes to the return values will corrupt the DataFrame df = rc.DataFrame({'a': [1, 2, 3], 'b': [1.0, 2.55, 3.1], 'c': ['first', 'second', None]}, columns=['a', 'b', 'c']) orig_data = deepcopy(df.data) data = df.data data[0].append(99) assert df.data != orig_data assert df.data[0] == [1, 2, 3, 99] # using the get commands returns a shallow copy df = rc.DataFrame({'a': [1, 2, 3], 'b': [[1], [2], [3]]}, columns=['a', 'b']) orig_data = deepcopy(df.data) new_df = df['a'] new_df[3, 'a'] = 100 assert df.data == orig_data # get a slice new_df = df['b'] # mutate inner value new_df[1, 'b'].append(22) # changes the new_df assert new_df.data == [[[1], [2, 22], [3]]] # changes original df assert new_df.data[0] == df.data[1]
def test_sort_multi_index(): df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, columns=['a', 'b'], index=[(10, 'c'), (10, 'a'), (10, 'b')], sort=False) df.sort_index() assert isinstance(df.index, list) assert_frame_equal( df, rc.DataFrame({ 'a': [2, 3, 1], 'b': [5, 6, 4] }, columns=['a', 'b'], index=[(10, 'a'), (10, 'b'), (10, 'c')], sort=False)) # fails on mixed type columns df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, columns=['a', 'b'], index=[(10, 'c'), 'a', (10, 'b')]) if PYTHON3: with pytest.raises(TypeError): df.sort_index()
def test_from_dataframe(): df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['a', 'b', 9]) actual = rc.ViewSeries.from_dataframe(df, 'b') expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9]) assert_series_equal(actual, expected) df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=['a', 'b', 'e'], sort=True, index_name='date') actual = rc.ViewSeries.from_dataframe(df, 'a', -1) expected = rc.ViewSeries([1, 2, 3], data_name='a', index=['a', 'b', 'e'], sort=True, offset=-1, index_name='date') assert_series_equal(actual, expected) df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=['a', 'b', 9], use_blist=True) actual = rc.ViewSeries.from_dataframe(df, 'b') expected = rc.ViewSeries([4, 5, 6], data_name='b', index=['a', 'b', 9]) assert_series_equal(actual, expected)
def test_get_location(): df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) # forward indexing, all columns assert_frame_equal(df.get_location(2), rc.DataFrame({'a': [3], 'b': [7]}, index=[6])) assert df.get_location(2, as_dict=True) == {'index': 6, 'a': 3, 'b': 7} assert df.get_location(2, as_dict=True, index=False) == {'a': 3, 'b': 7} # reverse indexing, all columns assert_frame_equal(df.get_location(-1), rc.DataFrame({'a': [4], 'b': [8]}, index=[8])) assert df.get_location(-1, as_dict=True) == {'index': 8, 'a': 4, 'b': 8} assert df.get_location(-1, as_dict=True, index=False) == {'a': 4, 'b': 8} # forward indexing, one column assert_frame_equal(df.get_location(0, ['a']), rc.DataFrame({'a': [1]}, index=[2])) assert df.get_location(0, ['a'], as_dict=True) == {'index': 2, 'a': 1} assert df.get_location(0, ['a'], as_dict=True, index=False) == {'a': 1} # reverse indexing, all columns assert_frame_equal(df.get_location(-2, ['b']), rc.DataFrame({'b': [7]}, index=[6])) assert df.get_location(-2, ['b'], as_dict=True) == {'index': 6, 'b': 7} assert df.get_location(-2, ['b'], as_dict=True, index=False) == {'b': 7} # single value for column and not list returns just the value assert df.get_location(1, 'b') == 6
def test_get_slice_as_dict(): # fails for non-sort DataFrame df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8]) with pytest.raises(RuntimeError): df.get_slice(2, 4) df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, index=[2, 4, 6, 8], sort=True) assert df.get_slice(2, 8, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) assert df.get_slice(1, 8, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) assert df.get_slice(2, 10, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) assert df.get_slice(1, 10, as_dict=True) == ([2, 4, 6, 8], {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) assert df.get_slice(4, 4, ['b'], as_dict=True) == ([4], {'b': [6]}) assert df.get_slice(3, 4, ['b'], as_dict=True) == ([4], {'b': [6]}) assert df.get_slice(4, 5, ['b'], as_dict=True) == ([4], {'b': [6]}) assert df.get_slice(3, 5, ['b'], as_dict=True) == ([4], {'b': [6]}) assert df.get_slice(4, 6, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) assert df.get_slice(3, 6, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) assert df.get_slice(4, 7, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) assert df.get_slice(3, 7, ['a'], as_dict=True) == ([4, 6], {'a': [2, 3]}) assert df.get_slice(None, 5, ['a'], as_dict=True) == ([2, 4], {'a': [1, 2]}) assert df.get_slice(5, None, ['a'], as_dict=True) == ([6, 8], {'a': [3, 4]}) assert df.get_slice(3, 3, as_dict=True) == ([], {'a': [], 'b': []}) assert df.get_slice(0, 0, as_dict=True) == ([], {'a': [], 'b': []}) assert df.get_slice(10, 10, as_dict=True) == ([], {'a': [], 'b': []})
def test_input_data_mutability(): input_data = {'a': [1, 2, 3], 'b': [4, 5, 6]} # without defining column order df = rc.DataFrame(input_data) orig_data = deepcopy(df.data) # change input_data input_data['c'] = [6, 7, 8] assert df.to_dict(index=False) != input_data assert df.data == orig_data # change an inner index of input data input_data['a'].append(99) assert df.data == orig_data # Now make an inner element a mutable item, confirm that mutability remains input_data = {'a': [[1], [2], [3]], 'b': [4, 5, 6]} df = rc.DataFrame(input_data) orig_data = deepcopy(df.data) # changing the input data changes the inner data in DataFrame input_data['a'][0].append(11) assert df.data != orig_data assert df.get(0, 'a') == [1, 11] # using set to change the DataFrame data does not effect the input data df[1, 'a'] = [2, 22] assert input_data['a'] == [[1, 11], [2], [3]] df.set(columns='b', values=[44, 55, 66]) assert input_data['b'] == [4, 5, 6]
def test_get_columns_sorted(): df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], columns=['a', 'b', 'c'], index_name='start_10', sort=True) expected = rc.DataFrame({'a': [4], 'c': [None]}, index=[99], columns=['a', 'c'], index_name='start_10', sort=True) actual = df.get(99, ['a', 'c']) assert_frame_equal(actual, expected) # test with boolean list actual = df.get(99, [True, False, True]) assert_frame_equal(actual, expected) # columns out of order expected = rc.DataFrame({'c': [8], 'b': [5]}, index=[11], columns=['c', 'b'], index_name='start_10', sort=True) actual = df.get(11, ['c', 'b']) assert_frame_equal(actual, expected) # test boolean list not same length as columns with pytest.raises(ValueError): df.get(99, [True, False]) # test index out of bounds with pytest.raises(ValueError): df.get(88, ['a', 'c'])
def test_sort_column(): df = rc.DataFrame({ 'a': [2, 1, 3], 'b': ['a', 'c', 'b'] }, columns=['a', 'b'], index=[10, 8, 9], dropin=blist) df.sort_columns('a') assert isinstance(df.index, blist) assert_frame_equal( df, rc.DataFrame({ 'a': [1, 2, 3], 'b': ['c', 'a', 'b'] }, columns=['a', 'b'], index=[8, 10, 9], dropin=blist)) df.sort_columns('a', reverse=True) assert isinstance(df.index, blist) assert_frame_equal( df, rc.DataFrame({ 'a': [3, 2, 1], 'b': ['b', 'a', 'c'] }, columns=['a', 'b'], index=[9, 10, 8], dropin=blist))
def test_get_rows_sorted(): df = rc.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7], 'c': [7, 8, 9, None]}, index=[10, 11, 12, 99], columns=['a', 'b', 'c'], index_name='start_10', sort=True) expected = rc.DataFrame({'c': [8, 9]}, index=[11, 12], index_name='start_10', sort=True) actual = df.get([11, 12], 'c') assert_frame_equal(actual, expected) # get as a list assert df.get([11, 12], 'c', as_list=True) == [8, 9] # test with boolean list actual = df.get([False, True, True, False], 'c') assert_frame_equal(actual, expected) # index out of order expected = rc.DataFrame({'c': [7, None]}, index=[10, 99], index_name='start_10', sort=True) actual = df.get([99, 10], 'c') assert_frame_equal(actual, expected) # get as a list assert df.get([False, True, True, False], 'c', as_list=True) == [8, 9] # get entire column assert df.get(columns='b', as_list=True) == [4, 5, 6, 7] # items not in index raise errors with pytest.raises(ValueError): df.get([11, 88], 'c', as_list=True) # not enough items in boolean list with pytest.raises(ValueError): df.get([True, True], 'c')
def test_data_function(): # Example function for testing def assert_approx_equal(left_data, right_data, precision=0.00001): for i in range(len(left_data)): for j in range(len(left_data[i])): assert abs(left_data[i][j] - right_data[i][j]) <= precision df1 = rc.DataFrame({ 'a': [1.0, 3.0], 'b': [4.0, 6.0] }, columns=['a', 'b'], index=[1, 3]) df2 = rc.DataFrame({ 'a': [1.0, 3.001], 'b': [4.0, 6.001] }, columns=['a', 'b'], index=[1, 3]) # confirm fails with standard compare with pytest.raises(AssertionError): assert_frame_equal(df1, df2) # passes with function and proper parameters assert_frame_equal(df1, df2, assert_approx_equal, {'precision': 0.01}) # fails with function and precision parameter to low with pytest.raises(AssertionError): assert_frame_equal(df1, df2, assert_approx_equal, {'precision': 0.00001})
def test_set_square_brackets(): df = rc.DataFrame(sorted=False) df[1, 'a'] = 2 assert df.data == [[2]] # df[[0, 3], 'b'] - - set index = [0, 3], column = b df[[0, 3], 'b'] = 4 assert df.data == [[2, None, None], [None, 4, 4]] # df[1:2, 'b'] - - set index slice 1:2, column = b df[1:3, 'b'] = 5 assert df.data == [[2, None, None], [5, 5, 5]] assert df.sorted is False # with sorted = True df = rc.DataFrame(sorted=True) df[1, 'a'] = 2 assert df.data == [[2]] # df[[0, 3], 'b'] - - set index = [0, 3], column = b df[[0, 3], 'b'] = 4 assert df.data == [[None, 2, None], [4, None, 4]] # df[1:2, 'b'] - - set index slice 1:2, column = b df[1:3, 'b'] = 5 assert df.data == [[None, 2, None], [4, 5, 5]] assert df.sorted is True
def test_append_row(): actual = rc.DataFrame({ 'a': [1, 3], 'b': [4, 6], 'c': [7, 9] }, index=[10, 12], columns=['a', 'b', 'c']) # append row with new columns, ignore new columns actual.append_row(14, {'a': 10, 'c': 13, 'd': 99}, new_cols=False) expected = rc.DataFrame( { 'a': [1, 3, 10], 'b': [4, 6, None], 'c': [7, 9, 13] }, index=[10, 12, 14], columns=['a', 'b', 'c']) assert_frame_equal(actual, expected) # append row with new columns, add new columns actual.append_row(16, {'a': 14, 'b': 15, 'd': 100}) expected = rc.DataFrame( { 'a': [1, 3, 10, 14], 'b': [4, 6, None, 15], 'c': [7, 9, 13, None], 'd': [None, None, None, 100] }, index=[10, 12, 14, 16], columns=['a', 'b', 'c', 'd']) assert_frame_equal(actual, expected)
def test_iterrows(): df = rc.DataFrame({ 'first': [1, 2, 3, 4, 5], 'second': ['a', 2, 'b', None, 5] }) expected = [{ 'index': 0, 'first': 1, 'second': 'a' }, { 'index': 1, 'first': 2, 'second': 2 }, { 'index': 2, 'first': 3, 'second': 'b' }, { 'index': 3, 'first': 4, 'second': None }, { 'index': 4, 'first': 5, 'second': 5 }] actual = list() for x in df.iterrows(): actual.append(x) assert actual == expected # index = False df = rc.DataFrame({ 'first': [1, 2, 3, 4, 5], 'second': ['a', 2, 'b', None, 5] }) expected = [{ 'first': 1, 'second': 'a' }, { 'first': 2, 'second': 2 }, { 'first': 3, 'second': 'b' }, { 'first': 4, 'second': None }, { 'first': 5, 'second': 5 }] actual = list() for x in df.iterrows(index=False): actual.append(x) assert actual == expected
def test_get_slicer(): df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9], 'd': [10, 11, 12]}, columns=['a', 'b', 'c', 'd'], sort=False) # df[1:2] -- get slice from index 1 to 2, all columns assert_frame_equal(df[1:2], rc.DataFrame({'a': [2, 3], 'b': [5, 6], 'c': [8, 9], 'd': [11, 12]}, columns=['a', 'b', 'c', 'd'], index=[1, 2], sort=False)) # df[0:1, ['c', 'd']] -- get slice from index 0 to 1, columns ['c', 'd'] assert_frame_equal(df[0:1, ['c', 'd']], rc.DataFrame({'c': [7, 8], 'd': [10, 11]}, columns=['c', 'd'], index=[0, 1], sort=False)) assert_frame_equal(df[0:1, ['d', 'c']], rc.DataFrame({'d': [10, 11], 'c': [7, 8]}, columns=['d', 'c'], index=[0, 1], sort=False)) # df[1:1, 'c'] -- get slice 1 to 1 and column 'c' assert_frame_equal(df[1:1, 'c'], rc.DataFrame({'c': [8]}, index=[1], sort=False)) # test indexes not in the range with pytest.raises(IndexError): x = df[4:5, 'c'] with pytest.raises(IndexError): x = df[0:8, 'c'] with pytest.raises(IndexError): x = df[2:1, 'c']
def test_json(): df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9] }, index=[4, 5, 6], columns=['b', 'a', 'c']) str = df.to_json() actual = rc.from_json(str) assert_frame_equal(df, actual) df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9] }, use_blist=True, sorted=False) str = df.to_json() actual = rc.from_json(str) assert_frame_equal(df, actual) # empty DataFrame df = rc.DataFrame({'a': [], 'b': [], 'c': []}) str = df.to_json() actual = rc.from_json(str) assert_frame_equal(df, actual) df = rc.DataFrame() str = df.to_json() actual = rc.from_json(str) assert_frame_equal(df, actual)
def test_get_matrix_sorted(): df = rc.DataFrame( { 'a': [2, 1, 3], 'b': [5, 4, 6], 'c': [8, 7, 9], 'd': [11, 10, 12] }, index=['y', 'x', 'z'], columns=['a', 'b', 'c', 'd'], index_name='letters', sort=True) expected = rc.DataFrame({ 'b': [4, 6], 'd': [10, 12] }, index=['x', 'z'], columns=['b', 'd'], index_name='letters', sort=True) actual = df.get(['x', 'z'], ['b', 'd']) assert_frame_equal(actual, expected) # test with booleans actual = df.get([True, False, True], [False, True, False, True]) assert_frame_equal(actual, expected) # columns out of order expected = rc.DataFrame({ 'd': [10, 12], 'c': [7, 9] }, index=['x', 'z'], columns=['d', 'c'], index_name='letters', sort=True) actual = df.get(['x', 'z'], ['d', 'c']) assert_frame_equal(actual, expected) # get everything everything = df.get() assert_frame_equal(everything, df) # boolean list does not match index length with pytest.raises(ValueError): df.get([True, False], [False, True, False, True]) # boolean list does not match columns length with pytest.raises(ValueError): df.get([True, False, True], [False, True]) # missing index with pytest.raises(ValueError): df.get_matrix(['BAD', 'x'], ['a', 'b']) # missing column with pytest.raises(ValueError): df.get_matrix(['x', 'y'], ['a', 'b', 'BAD'])
def test_set_col_index_subset(): actual = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9] }, index=[10, 11, 12], columns=['a', 'b', 'c'], sorted=False) # by index value actual.set(columns='b', indexes=[12, 11, 10], values=[66, 55, 44]) assert actual.data == [[1, 2, 3], [44, 55, 66], [7, 8, 9]] actual.set(columns='a', indexes=[12, 10], values=[33, 11]) assert actual.data == [[11, 2, 33], [44, 55, 66], [7, 8, 9]] # new rows actual.set(columns='c', indexes=[12, 13, 14], values=[120, 130, 140]) assert actual.data == [[11, 2, 33, None, None], [44, 55, 66, None, None], [7, 8, 120, 130, 140]] assert actual.index == [10, 11, 12, 13, 14] # new row new columns actual.set(columns='z', indexes=[14, 15, 16], values=['zoo', 'boo', 'hoo']) assert actual.data == [[11, 2, 33, None, None, None, None], [44, 55, 66, None, None, None, None], [7, 8, 120, 130, 140, None, None], [None, None, None, None, 'zoo', 'boo', 'hoo']] assert actual.index == [10, 11, 12, 13, 14, 15, 16] assert all( [isinstance(actual.data[x], list) for x in range(len(actual.columns))]) # values list shorter than indexes, raise error with pytest.raises(ValueError): actual.set(indexes=[10, 11], columns='a', values=[1]) # by boolean list actual = rc.DataFrame({ 'c': [1, 2], 'a': [4, 5], 'b': [7, 8] }, index=['first', 'second'], columns=['a', 'b', 'c'], sorted=False) actual.set(columns='c', indexes=[False, True], values=[99]) assert actual.data == [[4, 5], [7, 8], [1, 99]] # boolean list not size of existing index with pytest.raises(ValueError): actual.set(indexes=[True, False, True], columns='a', values=[1, 2]) # boolean list True entries not same size as values list with pytest.raises(ValueError): actual.set(indexes=[True, True, False], columns='b', values=[4, 5, 6]) with pytest.raises(ValueError): actual.set(indexes=[True, True, False], columns='b', values=[4])
def test_from_df_view(): # sort = False df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=['a', 'b', 9], sort=False) srs = rc.ViewSeries.from_dataframe(df, 'b') assert srs.sort is False assert srs.index is df.index assert srs.data is df.get_entire_column('b', True) # change cell df['a', 'b'] = 22 assert srs.data == [22, 5, 6] assert srs.index == ['a', 'b', 9] # add a row df[11, 'b'] = -88 assert srs.data == [22, 5, 6, -88] assert srs.index == ['a', 'b', 9, 11] # append row df.append_row(12, {'a': 55, 'b': 77}) assert srs.data == [22, 5, 6, -88, 77] assert srs.index == ['a', 'b', 9, 11, 12] # sort = True df = rc.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6] }, index=[0, 1, 5], sort=True) srs = rc.ViewSeries.from_dataframe(df, 'a') assert srs.sort is True assert srs.index is df.index assert srs.data is df.get_entire_column('a', True) # change cell df[1, 'a'] = 22 assert srs.data == [1, 22, 3] assert srs.index == [0, 1, 5] # add a row end df[6, 'a'] = 4 assert srs.data == [1, 22, 3, 4] assert srs.index == [0, 1, 5, 6] # add value in middle df[2, 'a'] = 12 assert srs.data == [1, 22, 12, 3, 4] assert srs.index == [0, 1, 2, 5, 6] # append row df.append_row(7, {'a': 55, 'b': 77}) assert srs.data == [1, 22, 12, 3, 4, 55] assert srs.index == [0, 1, 2, 5, 6, 7]
def test_len(): df = rc.DataFrame() assert len(df) == 0 df = rc.DataFrame({'a': [1, 2, 3], 'b': [1.0, 2.55, 3.1]}, columns=['a', 'b'], sort=False) assert len(df) == 3 df['a', 3] = 99 assert len(df) == 4
def test_validate_data(): df = rc.DataFrame({'a': [2, 1, 3], 'b': ['a', 'c', 'b']}, columns=['a', 'b'], index=[10, 8, 9]) df.validate_integrity() df._data[1] = ['a', 'c'] assert df.data == [[2, 1, 3], ['a', 'c']] with pytest.raises(ValueError): df.validate_integrity() # validate empty df = rc.DataFrame() df.validate_integrity()
def test_json_multi_index(): df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[('a', 4), ('b', 5), ('c', 6)]) str = df.to_json() actual = rc.DataFrame.from_json(str) assert_frame_equal(df, actual) df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, index=[('a', 4), ('b', 5), ('c', 6)], index_name=('first', 'second')) str = df.to_json() actual = rc.DataFrame.from_json(str) assert_frame_equal(df, actual)
def test_default_empty_init(): actual = rc.DataFrame() assert isinstance(actual, rc.DataFrame) assert actual.data == [] assert actual.columns == [] assert actual.index == [] assert actual.sort is True assert isinstance(actual.index, list) assert isinstance(actual.columns, list) assert isinstance(actual.data, list) assert all( [isinstance(actual.data[x], list) for x in range(len(actual.columns))]) actual = rc.DataFrame(sort=False) assert actual.sort is False assert isinstance(actual.index, list) assert isinstance(actual.columns, list) assert isinstance(actual.data, list) assert all( [isinstance(actual.data[x], list) for x in range(len(actual.columns))]) actual = rc.DataFrame(columns=['a', 'b', 'c']) assert actual.data == [[], [], []] assert actual.columns == ['a', 'b', 'c'] assert actual.index == [] assert actual.sort is True assert isinstance(actual.index, list) assert isinstance(actual.columns, list) assert isinstance(actual.data, list) assert all( [isinstance(actual.data[x], list) for x in range(len(actual.columns))]) actual = rc.DataFrame(index=[1, 2, 3], columns=['a', 'b']) assert actual.data == [[None, None, None], [None, None, None]] assert actual.columns == ['a', 'b'] assert actual.index == [1, 2, 3] assert actual.sort is False assert isinstance(actual.index, list) assert isinstance(actual.columns, list) assert isinstance(actual.data, list) assert all( [isinstance(actual.data[x], list) for x in range(len(actual.columns))]) actual = rc.DataFrame(index=[1, 2, 3], columns=['a', 'b'], sort=True) assert actual.sort is True assert isinstance(actual.index, list) assert isinstance(actual.columns, list) assert isinstance(actual.data, list) assert all( [isinstance(actual.data[x], list) for x in range(len(actual.columns))])
def test_reset_index(): # no index defined df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b']) df.reset_index() expected = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'index_0': [0, 1, 2]}, columns=['a', 'b', 'index_0']) assert_frame_equal(df, expected) # with index and index name defined df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo') df.reset_index() expected = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'jelo': ['x', 'y', 'z']}, columns=['a', 'b', 'jelo'], sort=False) assert_frame_equal(df, expected) # with a tuple multi-index df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo')) df.reset_index() expected = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'melo': ['a', 'b', 'c'], 'helo': [10, 11, 12], 'gelo': ['x', 'y', 'z']}, columns=['a', 'b', 'melo', 'helo', 'gelo'], sort=False) assert_frame_equal(df, expected) # drop df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo') df.reset_index(drop=True) expected = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], }, columns=['a', 'b'], sort=False) assert_frame_equal(df, expected)
def test_bad_initialization(): # index but no columns with pytest.raises(ValueError): rc.DataFrame(index=[1, 2, 3]) # wrong number in index with pytest.raises(ValueError): rc.DataFrame({'a': [1, 2, 3]}, index=[1]) # wrong number of columns with pytest.raises(ValueError): rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a']) with pytest.raises(ValueError): rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b', 'c', 'TOO', 'MANY']) # columns does not match dict keys with pytest.raises(ValueError): rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['BAD', 'VALUE']) # index is not a list with pytest.raises(TypeError): rc.DataFrame({'a': [1]}, index=1) # columns is not a list with pytest.raises(TypeError): rc.DataFrame({'a': [1]}, columns='a') # bad data type with pytest.raises(TypeError): rc.DataFrame(data=[1, 2, 3])
def test_append_rows(): actual = rc.DataFrame({ 'a': [1, 3], 'b': [4, 6], 'c': [7, 9] }, index=[10, 12], columns=['a', 'b', 'c']) # append rows with new columns, ignore new columns actual.append_rows([14, 15], { 'a': [10, 11], 'c': [13, 14], 'd': [99, 100] }, new_cols=False) expected = rc.DataFrame( { 'a': [1, 3, 10, 11], 'b': [4, 6, None, None], 'c': [7, 9, 13, 14] }, index=[10, 12, 14, 15], columns=['a', 'b', 'c']) assert_frame_equal(actual, expected) # append row with new columns, add new columns actual.append_rows([16, 17], { 'a': [14, 15], 'b': [15, 16], 'd': [100, 101] }) expected = rc.DataFrame( { 'a': [1, 3, 10, 11, 14, 15], 'b': [4, 6, None, None, 15, 16], 'c': [7, 9, 13, 14, None, None], 'd': [None, None, None, None, 100, 101] }, index=[10, 12, 14, 15, 16, 17], columns=['a', 'b', 'c', 'd']) assert_frame_equal(actual, expected) # try to append existing row with pytest.raises(IndexError): actual.append_rows([10, 11], {'a': [8, 9]}) with pytest.raises(ValueError): actual.append_rows([16, 17], {'a': [14, 15, 999]})
def test_get_locations(): df = rc.DataFrame({ 'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8] }, index=[2, 4, 6, 8]) assert_frame_equal(df.get_locations([0, 2]), rc.DataFrame({ 'a': [1, 3], 'b': [5, 7] }, index=[2, 6])) assert_frame_equal(df.get_locations([1, 3], 'a'), rc.DataFrame({'a': [2, 4]}, index=[4, 8])) assert df.get_locations([0, 2], 'b', as_list=True) == [5, 7]