def test_add_columns_same_aliases_param_values_fail(): """ (?) Passing the same aliases to the attributes is allowed """ left_df = util.iris(['sepallength', 'sepalwidth'], size=10) right_df = util.iris(['sepallength', 'sepalwidth'], size=10) test_df = util.iris( ['sepallength', 'sepalwidth', 'sepallength', 'sepalwidth'], size=10) arguments = { 'parameters': {'aliases': '_col,_col'}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': left_df, 'df2': right_df}) test_df.columns = ['sepallength_col', 'sepalwidth_col', 'sepallength_col', 'sepalwidth_col'] assert result['out'].equals(test_df)
def test_add_columns_missing_input_implies_no_code_success(): arguments = { 'parameters': {}, 'named_inputs': { }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) assert instance.generate_code() is None
def test_add_columns_no_output_implies_no_code_success(): arguments = { 'parameters': {}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { } } instance = AddColumnsOperation(**arguments) assert instance.generate_code() is None
def test_add_columns_invalid_aliases_param_value_fail(): left_df = util.iris(['sepallength', 'sepalwidth'], size=10) right_df = util.iris(['sepallength', 'sepalwidth'], size=10) arguments = { 'parameters': {'aliases': 'invalid'}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) with pytest.raises(IndexError) as idx_err: util.execute(instance.generate_code(), {'df1': left_df, 'df2': right_df}) assert 'list index out of range' in str(idx_err.value)
def test_add_columns_suffixes_params_success(): params = {AddColumnsOperation.ALIASES_PARAM: '_l,_r'} n_in = {'input data 1': 'df1', 'input data 2': 'df2'} n_out = {'output data': 'out'} instance = AddColumnsOperation(parameters=params, named_inputs=n_in, named_outputs=n_out) code = instance.generate_code() expected_code = dedent(""" out = pd.merge(df1, df2, left_index=True, right_index=True, suffixes=('_l', '_r')) """.format(out=n_out['output data'], in0=n_in['input data 1'], in1=n_in['input data 2'])) result, msg = compare_ast(ast.parse(code), ast.parse(expected_code)) assert result, msg + format_code_comparison(code, expected_code)
def test_add_columns_success(): left_df = util.iris(['sepallength', 'sepalwidth'], size=10) right_df = util.iris(['petallength', 'petalwidth', 'class'], size=10) arguments = { 'parameters': {}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': left_df, 'df2': right_df}) assert result['out'].equals(util.iris([ 'sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'class'], size=10))
def test_add_columns_aliases_param_success(): left_df = util.iris(['sepallength', 'sepalwidth'], size=10) right_df = util.iris(['sepallength', 'sepalwidth'], size=10) test_df = util.iris( ['sepallength', 'sepalwidth', 'sepallength', 'sepalwidth'], size=10) arguments = { 'parameters': {'aliases': '_value0,_value1'}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': left_df, 'df2': right_df}) test_df.columns = ['sepallength_value0', 'sepalwidth_value0', 'sepallength_value1', 'sepalwidth_value1'] assert result['out'].equals(test_df)
def test_add_columns_success(): slice_size = 10 left_df = ['df1', util.iris(['sepallength', 'sepalwidth'], slice_size)] right_df = [ 'df2', util.iris(['petallength', 'petalwidth', 'class'], slice_size) ] arguments = { 'parameters': {}, 'named_inputs': { 'input data 1': left_df[0], 'input data 2': right_df[0] }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) result = util.execute(instance.generate_code(), dict([left_df, right_df])) assert result['out'].equals(util.iris(size=slice_size))
def test_add_columns_different_size_dataframes_success(): """ In this case, AddColumnsOperation() returns a dataframe with the lowest size passed. """ left_df = util.iris(['sepallength', 'sepalwidth'], size=10) right_df = util.iris(['petallength', 'petalwidth', 'class'], size=5) arguments = { 'parameters': {}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': left_df, 'df2': right_df}) assert result['out'].equals(util.iris([ 'sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'class'], size=5))
def test_add_columns_repetead_column_names_success(): """ AddColumnsOperation() automatically adds aliases to repetead column names """ left_df = util.iris(['sepallength', 'class'], size=10) right_df = util.iris(['sepallength', 'class'], size=10) test_df = util.iris( ['sepallength', 'class', 'sepallength', 'class'], size=10) arguments = { 'parameters': {}, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = AddColumnsOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': left_df, 'df2': right_df}) test_df.columns = ['sepallength_ds0', 'class_ds0', 'sepallength_ds1', 'class_ds1'] assert result['out'].equals(test_df)