def test_options_context(): # Straight test set_option('modify_input_data', False) assert not get_option('modify_input_data') with options(modify_input_data=True): assert get_option('modify_input_data') assert not get_option('modify_input_data') # With some data df = pd.DataFrame({'x': [0, 1, 2, 3]}) df2 = df >> define(y='2*x') assert not df.equals(df2) with options(modify_input_data=True): df3 = df >> define(z='3*x') assert df.equals(df3) assert df is df3 df4 = df >> define(w='4*x') assert not df.equals(df4) # That the options context manager should not muffle # an exception. with pytest.raises(ValueError): with options(modify_input_data=True): raise ValueError() # The above exception should not leave a modified option assert not get_option('modify_input_data') with pytest.raises(ValueError): assert not get_option('time_travel')
def test_data_mutability(): # These tests affirm that we know the consequences of the verbs. # A test in the Mutable section should not fail without a change # in implementation. That change should be triggered when Pandas # implements a consistent copy-on-write policy. # # When a test in the mutable section fails, it is bad news. The # should be no memory usage gains by reusing the original data, # except for the case of `rename`. df = pd.DataFrame({'x': [0, 1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2, 3]}) # Default to not mutable df >> define(z='x**2') assert 'z' not in df df >> group_by(z='x**2') assert 'z' not in df arr = df >> pull('x') arr[0] = 99 assert df.loc[0, 'x'] != 99 df2 = df >> slice_rows(3) df2.loc[0, 'x'] = 999 assert df.loc[0, 'x'] != 999 set_option('modify_input_data', True) df2 = df.copy() df2 >> define(z='x**2') assert 'z' in df2 df2 = df.copy() df2 >> group_by(z='x**2') assert 'z' in df2 df2 = df.copy() arr = df2 >> pull('x') arr[0] = 99 assert df2.loc[0, 'x'] == 99 # Not mutable df2 = df.copy() df2 >> create(z='x**2') assert 'z' not in df2 df2 >> sample_n(3) >> define(z='x**2') assert 'z' not in df2 df2 >> sample_frac(.5) >> define(z='x**2') assert 'z' not in df2 df2 >> select('x') >> define(z='x**2') assert 'z' not in df2 df2 >> select('x', 'y') >> define(z='x**2') assert 'z' not in df2 # dataframe.rename has copy-on-write (if copy=False) that affects # only the new frame. This creates possibility for "action at a # distance" effects on the new frame when the original is modified result = df2 >> rename(x='z') df2['y'] = 3 result['x'] = 4 assert 'z' not in df2 assert df2.loc[0, 'y'] != 4 assert result.loc[0, 'x'] != 3 assert result is df2 df2 >> arrange('x') >> define(z='x**2') assert 'z' not in df2 df2 >> query('x%2') >> define(z='x**2') assert 'z' not in df2 df2 >> group_indices(z='x%2') assert 'z' not in df2 set_option('modify_input_data', False)