def test_sample_or_partition_seed_success(): """ seeds 4294967296 or higher (integer limit) will be set to 0 seeds lower than 0 will be set to 0 """ df = util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'], size=10) test_df = df.copy() arguments = { 'parameters': { 'type': 'value', 'seed': 4294967296, 'value': 10 }, 'named_inputs': { 'input data': 'df', }, 'named_outputs': { 'sampled data': 'out' } } instance = SampleOrPartitionOperation(**arguments) result = util.execute(instance.generate_code(), {'df': df}) test_out = test_df.sample(n=10, random_state=0) assert result['out'].equals(test_out)
def test_sample_or_partition_fraction_percentage_success(): params = {'fraction': 45} n_in = {'input data': 'input_1'} n_out = {'sampled data': 'output_1'} instance = SampleOrPartitionOperation(params, named_inputs=n_in, named_outputs=n_out) code = instance.generate_code() expected_code = "output_1 = input_1.sample(frac={}, random_state={})"\ .format(params['fraction'] * 0.01, 'None') result, msg = compare_ast(ast.parse(code), ast.parse(expected_code)) assert result, msg
def test_sample_or_partition_minimal_params_success(): params = {'fraction': '3', 'seed': '0'} n_in = {'input data': 'input_1'} n_out = {'sampled data': 'output_1'} instance = SampleOrPartitionOperation(params, named_inputs=n_in, named_outputs=n_out) code = instance.generate_code() expected_code = "output_1 = input_1.sample(frac={}, random_state={})"\ .format('0.03', params['seed']) result, msg = compare_ast(ast.parse(code), ast.parse(expected_code)) assert result, msg
def test_sample_or_partition_no_output_implies_no_code_success(): arguments = { 'parameters': { 'type': 'percent', 'fraction': 60 }, 'named_inputs': { 'input data': 'df', }, 'named_outputs': {} } instance = SampleOrPartitionOperation(**arguments) assert instance.generate_code() is None
def test_sample_or_partition_type_head_success(): params = { 'value': 365, 'seed': 0, 'type': SampleOrPartitionOperation.TYPE_HEAD } n_in = {'input data': 'input_1'} n_out = {'sampled data': 'output_1'} instance = SampleOrPartitionOperation(params, named_inputs=n_in, named_outputs=n_out) code = instance.generate_code() expected_code = "output_1 = input_1.head({})".format(params['value']) result, msg = compare_ast(ast.parse(code), ast.parse(expected_code)) assert result, msg
def test_sample_or_partition_type_value_success(): params = { 'value': '400', 'seed': '0', 'type': SampleOrPartitionOperation.TYPE_VALUE } n_in = {'input data': 'input_1'} n_out = {'sampled data': 'output_1'} instance = SampleOrPartitionOperation(params, named_inputs=n_in, named_outputs=n_out) code = instance.generate_code() expected_code = "output_1 = input_1.sample(n={}, random_state=0)"\ .format(params['value']) result, msg = compare_ast(ast.parse(code), ast.parse(expected_code)) assert result, msg
def test_sample_or_partition_head_success(): df = util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'], size=10) test_df = df.copy() arguments = { 'parameters': { 'type': 'head', 'value': 2 }, 'named_inputs': { 'input data': 'df', }, 'named_outputs': { 'sampled data': 'out' } } instance = SampleOrPartitionOperation(**arguments) result = util.execute(instance.generate_code(), {'df': df}) assert len(result['out']) == 2 assert result['out'].equals(test_df.iloc[:2, :])
def test_sample_or_partition_success(): slice_size = 10 df = [ 'df', util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'], slice_size) ] arguments = { 'parameters': {}, 'named_inputs': { 'input data': df[0], }, 'named_outputs': { 'output data': 'out' } } instance = SampleOrPartitionOperation(**arguments) result = util.execute(instance.generate_code(), dict([df])) assert result['out'].equals(util.iris(size=slice_size))