Exemple #1
0
def test_join_remove_right_with_case_columns_success():
    params = {
        'left_attributes': ['id', 'cod'],
        'right_attributes': ['id2', 'cod2'],
        JoinOperation.KEEP_RIGHT_KEYS_PARAM: False,
        JoinOperation.MATCH_CASE_PARAM: True,
        'aliases': '_left,_right'
    }
    n_in = {'input data 1': 'df1', 'input data 2': 'df2'}
    n_out = {'output data': 'out'}
    instance = JoinOperation(params, named_inputs=n_in, named_outputs=n_out)

    code = instance.generate_code()
    expected_code = dedent("""
        cols_to_remove = [c+'_right' for c in df2.columns if c in df1.columns]

        data1_tmp = df1[['id', 'cod']].applymap(lambda col: str(col).lower())
        data1_tmp.columns = [c+"_lower" for c in data1_tmp.columns]
        data1_tmp = pd.concat([df1, data1_tmp], axis=1, sort=False)

        data2_tmp = df2[['id2', 'cod2']].applymap(lambda col: str(col).lower())
        data2_tmp.columns = [c+"_lower" for c in data2_tmp.columns]
        data2_tmp = pd.concat([df2, data2_tmp], axis=1, sort=False)

        out = pd.merge(data1_tmp, data2_tmp, left_on=col1, right_on=col2,
            copy=False, suffixes=['_left', '_right'], how='inner')
        out.drop(col1+col2, axis=1, inplace=True)

        out.drop(cols_to_remove, axis=1, inplace=True)""")

    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
Exemple #2
0
def test_join_krk_param_success():
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    test_df = util.titanic(['name', 'homedest', 'embarked', 'name'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': 1,
            'match_case': '1',
            'join_type': 'inner',
            'left_attributes': ['name'],
            'right_attributes': ['name']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    test_df.columns = ['name_l', 'homedest_l', 'embarked_r', 'name_r']
    assert result['out'].equals(test_df)
Exemple #3
0
def test_join_left_join_keep_columns_minimal_params_success():
    params = {
        'left_attributes': ['id', 'cod'],
        'right_attributes': ['id', 'cod'],
        JoinOperation.JOIN_TYPE_PARAM: 'left',
        JoinOperation.KEEP_RIGHT_KEYS_PARAM: True,
        'aliases': '_left,_right'
    }
    n_in = {'input data 1': 'df1', 'input data 2': 'df2'}
    n_out = {'output data': 'out'}
    instance = JoinOperation(params, named_inputs=n_in, named_outputs=n_out)

    code = instance.generate_code()
    expected_code = dedent("""
        {out} = pd.merge({in0}, {in1}, how='{type}', 
            suffixes=['_left', '_right'], left_on=['id', 'cod'],
            right_on=['id', 'cod'])
        """.format(
        out=n_out['output data'],
        in0=n_in['input data 1'],
        in1=n_in['input data 2'],
        type=params[JoinOperation.JOIN_TYPE_PARAM],
    ))

    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
Exemple #4
0
def test_join_match_case_param_success():
    """
    Match case converts a column to lower then, it adds a _lower to the column
    name and finally it drops the column. (Seems redundant...)
    """
    df1 = util.titanic(['name', 'embarked'], size=10)
    df2 = util.titanic(['homedest', 'name'], size=10)
    test_df = util.titanic(['name', 'embarked', 'homedest'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': False,
            'join_type': 'inner',
            'left_attributes': ['name'],
            'right_attributes': ['name']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})

    test_df.columns = ['name_l', 'embarked_l', 'homedest_r']
    assert result['out'].equals(test_df)
Exemple #5
0
def test_join_outer_replace_success():
    """
    This only happens when you pass '_outer'
    """
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': True,
            'join_type': '_outer',
            'left_attributes': ['homedest'],
            'right_attributes': ['embarked']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    with pytest.raises(KeyError) as key_err:
        util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    assert '' in str(key_err.value)
Exemple #6
0
def test_join_custom_suffixes_success():
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    test_df = util.titanic(['name', 'homedest', 'embarked'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': True,
            'join_type': 'inner',
            'left_attributes': ['name'],
            'right_attributes': ['name'],
            'aliases': '_esquerdo,_direito'
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    test_df.columns = [
        'name_esquerdo', 'homedest_esquerdo', 'embarked_direito'
    ]
    assert result['out'].equals(test_df)
Exemple #7
0
def test_join_missing_left_or_right_param_failure():
    params = {'right_attributes': ['id', 'cod']}
    with pytest.raises(ValueError):
        n_in = {'input data 1': 'df1', 'input data 2': 'df2'}
        n_out = {'output data': 'out'}
        JoinOperation(params, named_inputs=n_in, named_outputs=n_out)

    params = {'left_attributes': ['id', 'cod']}
    with pytest.raises(ValueError):
        JoinOperation(params, named_inputs=n_in, named_outputs=n_out)
Exemple #8
0
def test_join_missing_output_implies_no_code_success():
    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': False,
            'join_type': 'inner',
            'left_attributes': ['homedest'],
            'right_attributes': ['embarked']
        },
        'named_inputs': {},
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    assert instance.generate_code() is None
Exemple #9
0
def test_join_merge_outer_parameter_success():
    """
    there's a line of code that replaces '_outer' to ''
    """
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    test_df1 = df1.copy()
    test_df2 = df2.copy()

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': True,
            'join_type': 'outer',
            'left_attributes': ['homedest'],
            'right_attributes': ['embarked']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})

    cols1 = [c + '_l' for c in test_df1.columns]
    cols2 = [c + '_r' for c in test_df2.columns]

    test_df1.columns = cols1
    test_df2.columns = cols2

    keys1 = [c + '_l' for c in ['homedest']]
    keys2 = [c + '_r' for c in ['embarked']]

    test_out = pd.merge(test_df1,
                        test_df2,
                        how='outer',
                        suffixes=['_l', '_r'],
                        left_on=keys1,
                        right_on=keys2)

    cols_to_remove = keys2
    test_out.drop(cols_to_remove, axis=1, inplace=True)
    assert result['out'].equals(test_out)
Exemple #10
0
def test_join_success():
    slice_size = 10
    df = [
        'df',
        util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'],
                  slice_size)
    ]

    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data': df[0],
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), dict([df]))
    assert result['out'].equals(util.iris(size=slice_size))
Exemple #11
0
def test_join_inner_join_minimal_with_remove_right_columns_success():
    params = {
        'left_attributes': ['id', 'cod'],
        'right_attributes': ['id', 'cod'],
        'aliases': '_left,_right'
    }
    n_in = {'input data 1': 'df1', 'input data 2': 'df2'}
    n_out = {'output data': 'out'}
    instance = JoinOperation(params, named_inputs=n_in, named_outputs=n_out)

    code = instance.generate_code()
    expected_code = dedent("""
        cols_to_remove = [c+'_right' for c in df2.columns if c in df1.columns]

        out = pd.merge(df1, df2, how='inner', suffixes=['_left', '_right'],
                left_on=['id', 'cod'], right_on=['id', 'cod'])

        out.drop(cols_to_remove, axis=1, inplace=True)""")

    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
Exemple #12
0
def test_join_invalid_right_attributes_param_fail():
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': False,
            'join_type': 'inner',
            'left_attributes': ['homedest'],
            'right_attributes': 'invalid'
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    with pytest.raises(NameError) as nam_err:
        util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    assert "invalid" in str(nam_err.value)
Exemple #13
0
def test_join_missing_attributes_param_fail():
    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': False,
            'join_type': 'inner'
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }

    with pytest.raises(ValueError) as val_err:
        JoinOperation(**arguments)
    assert "Parameters 'left_attributes' and 'right_attributes'" \
           " must be informed for task" in str(val_err.value)