Ejemplo n.º 1
0
def test_distinct_missing_input_implies_no_code_success():
    arguments = {
        'parameters': {},
        'named_inputs': {},
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = DistinctOperation(**arguments)
    assert instance.generate_code() is None
Ejemplo n.º 2
0
def test_remove_duplicated_by_attributes_success():
    params = {'attributes': ['name']}
    n_in = {'input data': 'df1'}
    n_out = {'output data': 'out'}
    instance = DistinctOperation(params,
                                 named_inputs=n_in,
                                 named_outputs=n_out)
    code = instance.generate_code()
    expected_code = "{} = {}.drop_duplicates(subset={}, keep='first')"\
        .format(n_out['output data'], n_in['input data'], params['attributes'])
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
Ejemplo n.º 3
0
def test_remove_duplicated_minimal_params_success():
    params = {}
    n_in = {'input data': 'df1'}
    n_out = {'output data': 'out'}
    instance = DistinctOperation(params,
                                 named_inputs=n_in,
                                 named_outputs=n_out)
    code = instance.generate_code()
    expected_code = "{out} = {in1}.drop_duplicates(subset=None, keep='first')"\
        .format(out=n_out['output data'], in1=n_in['input data'])
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
Ejemplo n.º 4
0
def test_distinct_missing_attributes_param_success():
    df = util.iris(['sepallength'], size=10)
    df.loc[0:3, 'sepallength'] = 'test'
    df.loc[6:9, 'sepallength'] = 'distinct'
    test_df = df.copy()

    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = DistinctOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df': df})
    assert result['out'].equals(test_df.drop(index=[1, 2, 3, 7, 8, 9]))
Ejemplo n.º 5
0
def test_distinct_invalid_attribute_param_fail():
    df = util.iris(['petallength'], 10)
    df.loc[0:3, 'petallength'] = 10
    df.loc[6:9, 'petallength'] = 10

    arguments = {
        'parameters': {
            'attributes': 'invalid'
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = DistinctOperation(**arguments)
    with pytest.raises(NameError) as nam_err:
        util.execute(instance.generate_code(), {'df': df})
    assert "name 'invalid' is not defined" in str(nam_err.value)
Ejemplo n.º 6
0
def test_distinct_success():
    slice_size = 10
    df = [
        'df',
        util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'],
                  slice_size)
    ]

    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data': df[0],
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = DistinctOperation(**arguments)
    result = util.execute(instance.generate_code(), dict([df]))
    assert result['out'].equals(util.iris(size=slice_size))