def test_defaults(): dummy_set = App('dummy_set', '', '1.0') @dummy_set.metric(description='', input_fields=[IN1, [IN2, IN3]], output_fields=[OUT1]) def test_defaults(val, df=None): sums = val.sum() if df is not None: sums += df.sum().sum() return pd.Series([sums]) data_map = {'input': {}, 'output': pd.DataFrame()} data_map['input']['data'] = DUMMY_DATAFRAME[[IN1.key[-1], IN2.key[-1]]] report = dummy_set.run(data_map) assert len(report['existing_results_skipped']) == 0 assert len(report['unneeded_metrics']) == 0 assert len(report['metrics_missing_input']) == 0 assert len(report['run_results']) == 1 assert report['run_results']['test_defaults']['result'] == "Success" assert OUT1.get_by_path(data_map)[0] == 6 data_map['input']['data'] = DUMMY_DATAFRAME report = dummy_set.run(data_map) assert len(report['existing_results_skipped']) == 0 assert len(report['unneeded_metrics']) == 0 assert len(report['metrics_missing_input']) == 0 assert len(report['run_results']) == 1 assert report['run_results']['test_defaults']['result'] == "Success" assert OUT1.get_by_path(data_map)[0] == 45
def test_subfield_missmatch(): with pytest.raises(AssertionError): dummy_set = App('dummy_set', '', '1.0') @dummy_set.metric(description='', input_fields=[[IN1, IN_CONF]], output_fields=[OUT1]) # pylint: disable=unused-variable def test_subfield_missmatch1(df): return df dummy_set = App('dummy_set', '', '1.0') @dummy_set.metric(description='', input_fields=[[IN1, IN2]], output_fields=[OUT1]) # pylint: disable=unused-variable def test_subfield_missmatch(df): return df data_map = {'input': {}, 'output': pd.DataFrame()} data_map['input']['data'] = {'in1': [1, 2, 3], 'in2': [4, 5, 6]} report = dummy_set.run(data_map) assert len(report['existing_results_skipped']) == 0 assert len(report['unneeded_metrics']) == 0 assert len(report['metrics_missing_input']) == 0 assert report['run_results']['test_subfield_missmatch']['result'].strip( ).endswith("is not a DataFrame and can't return a column set")
class dummy_factory(object): def __init__(self): self.count = 0 self.app = App('dummy_cascade_set', '', '1.0') self.fail_node_list = [] def add_node(self, inputs, outputs, has_fallback=False): assert len( inputs ) <= 10, 'Only supports generating node with less then 10 inputs' self.count += 1 name = 'node' + str(self.count) def func_base(df, valid_input): if name in self.fail_node_list: raise ValueError if not valid_input: val = 1000 else: val = df.sum().sum() return tuple(pd.Series(val * (i + 1)) for i in range(len(outputs))) def func_no_default(df): return func_base(df, True) func = func_base if not has_fallback: func = func_no_default setattr(func, '__name__', name) self.app.add_metric(func, '', inputs, outputs)
class dummy_factory(object): def __init__(self): self.count = 0 self.app = App('dummy_cascade_set', '', '1.0') self.fail_node_list = [] def add_node(self, inputs, outputs, has_fallback=False): assert len(inputs) <= 10, 'Only supports generating node with less then 10 inputs' self.count += 1 name = 'node' + str(self.count) def func_base(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, valid_input): if name in self.fail_node_list: raise ValueError if not valid_input: val = name + ' default' else: arg_list = [arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10] val = name + str([arg for arg in arg_list if arg is not None]).replace("'","") return tuple( val + str(i) for i in range(len(outputs)) ) def func_no_default(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10): return func_base(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, True) func = func_base if not has_fallback: func = func_no_default args = [None] * (10 - len(inputs)) partial_func = partial(func, *args) setattr(partial_func, '__name__', name) self.app.add_metric(partial_func, '', inputs, outputs)
def test_input_mismatch(): dummy_set2 = App('dummy_set', '', '1.0') with pytest.raises(AssertionError): @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_BAD_OUT] ) def dummy_good_no_default1(dummy, foo): # pylint: disable=unused-argument return with pytest.raises(AssertionError): @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_BAD_OUT] ) def dummy_good_no_default2(): return with pytest.raises(AssertionError): @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_BAD_OUT] ) def dummy_good_no_default3(valid_input=True): # pylint: disable=unused-argument return
def test_duplicate_metric(): dummy_set2 = App('dummy_set', '', '1.0') @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_GOOD_NO_DEFAULT] ) def dummy_good_no_default(dummy): return dummy with pytest.raises(AssertionError): @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_BAD_OUT] ) def dummy_good_no_default(dummy): return dummy with pytest.raises(AssertionError): @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_GOOD_NO_DEFAULT] ) def dummy_good_no_default2(dummy): return dummy
def test_output_mismatch(): def base_dummy(dummy): ret = 'foo' return tuple( ret for i in range(dummy) ) dummy_set2 = App('dummy_set', '', '1.0') with pytest.raises(AssertionError): @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[] ) def dummy_out0(dummy): return base_dummy(dummy) @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_BAD_OUT] ) def dummy_out1(dummy): return base_dummy(dummy) @dummy_set2.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_BAD_OUT2, FIELD_RESULT_DUMMY_BAD_OUT3] ) def dummy_out2(dummy): return base_dummy(dummy) data_map = {'input': {}, 'output': {}} for out_count in range(4): FIELD_INPUT_DUMMY1.set_by_path(data_map, out_count) report = dummy_set2.run(data_map) assert len(report['existing_results_skipped']) == 0 assert len(report['unneeded_metrics']) == 0 assert len(report['metrics_missing_input']) == 0 assert len(report['run_results']) == 2 assert report['run_results']['dummy_out1']['result'] == "Success" if out_count == 2: assert report['run_results']['dummy_out2']['result'] == "Success" else: assert report['run_results']['dummy_out2']['result'].strip().endswith( "Metric didn't produce expected number of outputs")
def test_mix(): dummy_set = App('dummy_set', '', '1.0') val = 'foo' @dummy_set.metric(description='', input_fields=[[IN1, IN2], IN_CONF], output_fields=[OUT1]) def test_mix(df, conf): return pd.Series([conf + str(df.sum().sum())]) data_map = {'input': {}, 'output': pd.DataFrame()} data_map['input']['data'] = DUMMY_DATAFRAME data_map['input']['config'] = 'foo' report = dummy_set.run(data_map) assert len(report['existing_results_skipped']) == 0 assert len(report['unneeded_metrics']) == 0 assert len(report['metrics_missing_input']) == 0 assert report['run_results']['test_mix']['result'] == "Success" test_sum = DUMMY_DATAFRAME[[IN1.key[-1], IN2.key[-1]]].sum().sum() assert OUT1.get_by_path(data_map)[0] == val + str(test_sum)
def test_missing_one(): dummy_set = App('dummy_set', '', '1.0') @dummy_set.metric(description='', input_fields=[[IN1, IN5]], output_fields=[OUT1]) def test_missing_one(df): return pd.Series([df.sum().sum()]) data_map = {'input': {}, 'output': pd.DataFrame()} data_map['input']['data'] = DUMMY_DATAFRAME report = dummy_set.run(data_map) assert len(report['existing_results_skipped']) == 0 assert len( report['unneeded_metrics']) == len(dummy_set.graph.get_metrics()) - 1 assert report['metrics_missing_input'] == { 'test_missing_one': { 'bad_field': 'input/data/in5', 'reason': 'Missing input', 'has_default': False } } assert len(report['run_results']) == 0
# WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. # -*- coding: utf-8 -*- """OKCompute Example Aplication Provides command line interface to demenstrate basic framework usage """ import argparse import json from okcompute import Field, App #: The okcompute app object. This is the handle for running analysis example_app = App(name='example_app', description='an example okcompute application', version='1.0') #: Fields used to reference data for analysis. The first parameter is the key. # This key is a hierarchal path to a dict like object passed into the app to # track state. The specific names (input/internal/output) are not special, and # are just used here for clarity. #: Input Fields. These map to data passed into the analysis FIELD_IN1 = Field(key=['input', 'in1'], description='dummy input 1') FIELD_IN2 = Field(['input', 'in2'], 'dummy input 2') #: Fields used to store intermediary values. These are values produced by # one analysis node, to be used by others. These can also be passed in as # input, and if skip_existing_results is used, the nodes that produce these # ouptuts can be skipped
def __init__(self): self.count = 0 self.app = App('dummy_cascade_set', '', '1.0') self.fail_node_list = []
['output', 'dummy_good_fallback'], 'Dummy analysis result') FIELD_RESULT_DUMMY_GOOD_DEFAULT = Field( ['output', 'dummy_good_default'], 'Dummy analysis result') FIELD_RESULT_DUMMY_VALIDATION = Field( ['output', 'dummy_validation'], 'Dummy analysis result') FIELD_RESULT_DUMMY_BAD_OUT = Field( ['output', 'dummy_bad_out'], 'Dummy analysis result') FIELD_RESULT_DUMMY_BAD_OUT2 = Field( ['output', 'dummy_bad_out2'], 'Dummy analysis result') FIELD_RESULT_DUMMY_BAD_OUT3 = Field( ['output', 'dummy_bad_out3'], 'Dummy analysis result') FIELDS_DUMMY_OUTPUT = [item for item in globals( ).keys() if item.startswith("FIELD_RESULT_")] dummy_set = App('dummy_set', '', '1.0') @dummy_set.metric( description='', input_fields=[FIELD_INPUT_DUMMY1], output_fields=[FIELD_RESULT_DUMMY_GOOD_NO_DEFAULT] ) def dummy_good_no_default(dummy): return dummy @dummy_set.metric( description='', input_fields=[FIELD_INPUT_DUMMY_VALIDATION], output_fields=[FIELD_RESULT_DUMMY_VALIDATION]