コード例 #1
0
def test_add_columns_same_aliases_param_values_fail():
    """
    (?)
    Passing the same aliases to the attributes is allowed
    """
    left_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    right_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    test_df = util.iris(
        ['sepallength', 'sepalwidth', 'sepallength', 'sepalwidth'], size=10)
    arguments = {
        'parameters': {'aliases': '_col,_col'},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    result = util.execute(instance.generate_code(),
                          {'df1': left_df, 'df2': right_df})
    test_df.columns = ['sepallength_col', 'sepalwidth_col',
                       'sepallength_col', 'sepalwidth_col']
    assert result['out'].equals(test_df)
コード例 #2
0
def test_add_columns_missing_input_implies_no_code_success():
    arguments = {
        'parameters': {},
        'named_inputs': {
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    assert instance.generate_code() is None
コード例 #3
0
def test_add_columns_no_output_implies_no_code_success():
    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
        }
    }
    instance = AddColumnsOperation(**arguments)
    assert instance.generate_code() is None
コード例 #4
0
def test_add_columns_invalid_aliases_param_value_fail():
    left_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    right_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    arguments = {
        'parameters': {'aliases': 'invalid'},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    with pytest.raises(IndexError) as idx_err:
        util.execute(instance.generate_code(),
                     {'df1': left_df, 'df2': right_df})
    assert 'list index out of range' in str(idx_err.value)
コード例 #5
0
def test_add_columns_suffixes_params_success():
    params = {AddColumnsOperation.ALIASES_PARAM: '_l,_r'}
    n_in = {'input data 1': 'df1', 'input data 2': 'df2'}
    n_out = {'output data': 'out'}

    instance = AddColumnsOperation(parameters=params,
                                   named_inputs=n_in,
                                   named_outputs=n_out)

    code = instance.generate_code()
    expected_code = dedent("""
    out = pd.merge(df1, df2, left_index=True, 
        right_index=True, suffixes=('_l', '_r'))
    """.format(out=n_out['output data'],
               in0=n_in['input data 1'],
               in1=n_in['input data 2']))
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
コード例 #6
0
def test_add_columns_success():
    left_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    right_df = util.iris(['petallength', 'petalwidth', 'class'], size=10)
    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    result = util.execute(instance.generate_code(),
                          {'df1': left_df, 'df2': right_df})
    assert result['out'].equals(util.iris([
        'sepallength', 'sepalwidth',
        'petallength', 'petalwidth', 'class'], size=10))
コード例 #7
0
def test_add_columns_aliases_param_success():
    left_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    right_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    test_df = util.iris(
        ['sepallength', 'sepalwidth', 'sepallength', 'sepalwidth'], size=10)
    arguments = {
        'parameters': {'aliases': '_value0,_value1'},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    result = util.execute(instance.generate_code(),
                          {'df1': left_df, 'df2': right_df})
    test_df.columns = ['sepallength_value0', 'sepalwidth_value0',
                       'sepallength_value1', 'sepalwidth_value1']
    assert result['out'].equals(test_df)
コード例 #8
0
def test_add_columns_success():
    slice_size = 10
    left_df = ['df1', util.iris(['sepallength', 'sepalwidth'], slice_size)]
    right_df = [
        'df2',
        util.iris(['petallength', 'petalwidth', 'class'], slice_size)
    ]

    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data 1': left_df[0],
            'input data 2': right_df[0]
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    result = util.execute(instance.generate_code(), dict([left_df, right_df]))
    assert result['out'].equals(util.iris(size=slice_size))
コード例 #9
0
def test_add_columns_different_size_dataframes_success():
    """
    In this case, AddColumnsOperation() returns a dataframe with the lowest
    size passed.
    """
    left_df = util.iris(['sepallength', 'sepalwidth'], size=10)
    right_df = util.iris(['petallength', 'petalwidth', 'class'], size=5)
    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    result = util.execute(instance.generate_code(),
                          {'df1': left_df, 'df2': right_df})
    assert result['out'].equals(util.iris([
        'sepallength', 'sepalwidth',
        'petallength', 'petalwidth', 'class'], size=5))
コード例 #10
0
def test_add_columns_repetead_column_names_success():
    """
    AddColumnsOperation() automatically adds aliases to repetead column names
    """
    left_df = util.iris(['sepallength', 'class'], size=10)
    right_df = util.iris(['sepallength', 'class'], size=10)
    test_df = util.iris(
        ['sepallength', 'class', 'sepallength', 'class'], size=10)
    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AddColumnsOperation(**arguments)
    result = util.execute(instance.generate_code(),
                          {'df1': left_df, 'df2': right_df})
    test_df.columns = ['sepallength_ds0', 'class_ds0',
                       'sepallength_ds1', 'class_ds1']
    assert result['out'].equals(test_df)