def test_get_outputs_str_named(self): outputs = { 'default': [ { 'name': 'a_name', 'variable': 'a_variable', 'type': 'a_type', } ], 'debug': [ { 'name': 'another_name', 'variable': 'another_variable', } ] } pipeline = MLPipeline(['a_primitive', 'another_primitive'], outputs=outputs) returned = pipeline.get_outputs('debug') expected = [ { 'name': 'another_name', 'variable': 'another_variable', } ] assert returned == expected
def test_fit_predict_args_in_init(): def add(a, b): return a + b primitive = { 'name': 'add', 'primitive': add, 'produce': { 'args': [ { 'name': 'a', 'type': 'float', }, { 'name': 'b', 'type': 'float', }, ], 'output': [{ 'type': 'float', 'name': 'out' }] } } primitives = [primitive] init_params = {'add': {'b': 10}} pipeline = MLPipeline(primitives, init_params=init_params) out = pipeline.predict(a=3) assert out == 13
def test_fit_debug_str(self): mlpipeline = MLPipeline(['a_primitive']) mlpipeline.blocks['a_primitive#1'].fit_args = [ { 'name': 'fit_input', 'type': 'whatever' } ] expected_return = dict() expected_return['debug'] = 'tm' expected_return['fit'] = { 'a_primitive#1': { 'time': 0, 'memory': 0, } } returned = mlpipeline.fit(debug='tm') assert isinstance(returned, dict) assert set(returned.keys()) == set(expected_return.keys()) # fit / produce assert set(returned['fit'].keys()) == set(expected_return['fit'].keys()) # block name for block_name, dictionary in expected_return['fit'].items(): assert set(returned['fit'][block_name].keys()) == set(dictionary.keys())
def test__get_block_args(self): input_names = {'a_block': {'arg_3': 'arg_3_alt'}} pipeline = MLPipeline(list(), input_names=input_names) block_args = [ { 'name': 'arg_1', }, { 'name': 'arg_2', 'default': 'arg_2_value' }, { 'name': 'arg_3', }, { 'name': 'arg_4', 'required': False }, ] context = {'arg_1': 'arg_1_value', 'arg_3_alt': 'arg_3_value'} args = pipeline._get_block_args('a_block', block_args, context) expected = { 'arg_1': 'arg_1_value', 'arg_3': 'arg_3_value', } assert args == expected
def test_get_diagram_fit(self): f = open('tests/data/diagrams/diagram_fit.txt', 'r') expected = f.read()[:-1] f.close() output = [ { 'name': 'output_variable', 'type': 'another_whatever', 'variable': 'a_primitive#1.output_variable' } ] pipeline = MLPipeline(['a_primitive'], outputs={'default': output}) pipeline.blocks['a_primitive#1'].produce_args = [ { 'name': 'input_variable', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].fit_args = [ { 'name': 'input_variable', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].produce_output = output assert str(pipeline.get_diagram()) == expected
def test_get_hyperparameters(self): block_1 = Mock() block_1.get_hyperparameters.return_value = {'a': 'a'} block_2 = Mock() block_2.get_hyperparameters.return_value = { 'b': 'b', 'c': 'c', } blocks = OrderedDict(( ('a.primitive.Name#1', block_1), ('a.primitive.Name#2', block_2), )) mlpipeline = MLPipeline(list()) mlpipeline.blocks = blocks hyperparameters = mlpipeline.get_hyperparameters() assert hyperparameters == { 'a.primitive.Name#1': { 'a': 'a', }, 'a.primitive.Name#2': { 'b': 'b', 'c': 'c', }, } block_1.get_hyperparameters.assert_called_once_with() block_2.get_hyperparameters.assert_called_once_with()
def test_get_hyperparameters_flat(self): block_1 = Mock() block_1.get_hyperparameters.return_value = { 'a': 'a' } block_2 = Mock() block_2.get_hyperparameters.return_value = { 'b': 'b', 'c': 'c', } blocks = OrderedDict(( ('a.primitive.Name#1', block_1), ('a.primitive.Name#2', block_2), )) mlpipeline = MLPipeline(['a_primitive']) mlpipeline.blocks = blocks hyperparameters = mlpipeline.get_hyperparameters(flat=True) assert hyperparameters == { ('a.primitive.Name#1', 'a'): 'a', ('a.primitive.Name#2', 'b'): 'b', ('a.primitive.Name#2', 'c'): 'c', } block_1.get_hyperparameters.assert_called_once_with() block_2.get_hyperparameters.assert_called_once_with()
def test__get_block_variables_is_str(self): pipeline = MLPipeline(['a_primitive']) pipeline.blocks['a_primitive#1'].produce_outputs = 'get_produce_outputs' pipeline.blocks['a_primitive#1'].instance.get_produce_outputs.return_value = [ { 'name': 'output_from_function', 'type': 'test' } ] outputs = pipeline._get_block_variables( 'a_primitive#1', 'produce_outputs', {'output': 'name_output'} ) expected = { 'output_from_function': { 'name': 'output_from_function', 'type': 'test', } } assert outputs == expected pipeline.blocks['a_primitive#1'].instance.get_produce_outputs.assert_called_once_with()
def test_fit_start(self): # Setup variables primitives = [ 'sklearn.preprocessing.StandardScaler', 'sklearn.linear_model.LogisticRegression' ] pipeline = MLPipeline(primitives) # Mock the first block block_mock = Mock() pipeline.blocks['sklearn.preprocessing.StandardScaler#1'] = block_mock # Run first block context = { 'X': self.X, 'y': self.y } int_start = 1 str_start = 'sklearn.linear_model.LogisticRegression#1' pipeline.fit(start_=int_start, **context) pipeline.fit(start_=str_start, **context) # Assert that mock has not been called block_mock.fit.assert_not_called()
def test_get_diagram_multiple_blocks(self): f = open('tests/data/diagrams/diagram_multiple_blocks.txt', 'r') expected = f.read()[:-1] f.close() first_output = [ { 'name': 'output_variable_a', 'type': 'another_whatever', 'variable': 'a_primitive#1.output_variable_a' } ] second_output = [ { 'name': 'output_variable_b', 'type': 'another_whatever', 'variable': 'b_primitive#1.output_variable_b' } ] pipeline = MLPipeline(['a_primitive', 'b_primitive'], outputs={'default': second_output}) pipeline.blocks['a_primitive#1'].produce_args = [ { 'name': 'input_variable', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].produce_output = first_output pipeline.blocks['b_primitive#1'].produce_args = first_output pipeline.blocks['b_primitive#1'].produce_output = second_output assert str(pipeline.get_diagram()) == expected
def test_predict_debug(self): outputs = { 'default': [{ 'name': 'a_name', 'variable': 'a_primitive#1.a_variable', 'type': 'a_type', }] } mlpipeline = MLPipeline(['a_primitive'], outputs=outputs) mlpipeline.blocks['a_primitive#1'].produce_args = [{ 'name': 'input', 'type': 'whatever' }] mlpipeline.blocks['a_primitive#1'].produce_output = [{ 'name': 'a_name', 'type': 'a_type' }] expected_return = dict() expected_return = { "a_primitive#1": { "elapsed": 0, "input": {"whatever"}, "output": {"whatever"} } } returned, debug_returned = mlpipeline.predict(debug=True) assert len([returned]) == len(outputs["default"]) assert isinstance(debug_returned, dict) assert set(debug_returned.keys()) == set(expected_return.keys()) for block_name, dictionary in expected_return.items(): assert set(debug_returned[block_name].keys()) == set( dictionary.keys())
def test_predict_no_debug(self): outputs = { 'default': [ { 'name': 'a_name', 'variable': 'a_primitive#1.a_variable', 'type': 'a_type', }, { 'name': 'b_name', 'variable': 'a_primitive#1.b_variable', 'type': 'b_type', }, ] } mlpipeline = MLPipeline(['a_primitive'], outputs=outputs) mlpipeline.blocks['a_primitive#1'].produce_args = [{ 'name': 'input', 'type': 'whatever' }] mlpipeline.blocks['a_primitive#1'].produce_output = [{ 'name': 'a_name', 'type': 'a_type' }, { 'name': 'b_name', 'type': 'b_type' }] returned = mlpipeline.predict(debug=False) assert len(returned) == len(outputs["default"]) for returned_output, expected_output in zip(returned, outputs["default"]): assert returned_output == expected_output["variable"]
def test_fit_debug(self): mlpipeline = MLPipeline(['a_primitive']) mlpipeline.blocks['a_primitive#1'].fit_args = [{ 'name': 'fit_input', 'type': 'whatever' }] expected_return = dict() expected_return["fit"] = { "a_primitive#1": { "elapsed": 0, "input": {"whatever"} } } returned = mlpipeline.fit(debug=True) print(returned) assert isinstance(returned, dict) assert set(returned.keys()) == set( expected_return.keys()) # fit / produce assert set(returned["fit"].keys()) == set( expected_return["fit"].keys()) # block name for block_name, dictionary in expected_return["fit"].items(): assert set(returned["fit"][block_name].keys()) == set( dictionary.keys())
def test_get_predict_args(self): pipeline = MLPipeline(['a_primitive']) pipeline.blocks['a_primitive#1'].produce_args = [ { 'name': 'input', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].fit_args = [ { 'name': 'fit_input', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].produce_output = [ { 'name': 'output', 'type': 'another_whatever' } ] outputs = pipeline.get_predict_args() expected = [ { 'name': 'input', 'type': 'whatever' } ] assert outputs == expected
def test_fit_produce_debug_str(self): outputs = { 'default': [ { 'name': 'a_name', 'variable': 'a_primitive#1.a_variable', 'type': 'a_type', } ] } mlpipeline = MLPipeline(['a_primitive'], outputs=outputs) mlpipeline.blocks['a_primitive#1'].fit_args = [ { 'name': 'fit_input', 'type': 'whatever' } ] mlpipeline.blocks['a_primitive#1'].produce_args = [ { 'name': 'input', 'type': 'whatever' } ] mlpipeline.blocks['a_primitive#1'].produce_output = [ { 'name': 'a_name', 'type': 'a_type' } ] expected_return = dict() expected_return['debug'] = 'tm' expected_return['fit'] = { 'a_primitive#1': { 'time': 0, 'memory': 0, } } expected_return['produce'] = { 'a_primitive#1': { 'time': 0, 'memory': 0, } } returned, debug_returned = mlpipeline.fit(output_='default', debug='tm') assert len([returned]) == len(outputs['default']) assert isinstance(debug_returned, dict) assert set(debug_returned.keys()) == set(expected_return.keys()) # fit / produce assert set(debug_returned['fit'].keys()) == set(expected_return['fit'].keys()) assert set(debug_returned['produce'].keys()) == set(expected_return['produce'].keys()) for block_name, dictionary in expected_return['fit'].items(): assert set(debug_returned['fit'][block_name].keys()) == set(dictionary.keys()) for block_name, dictionary in expected_return['produce'].items(): assert set(debug_returned['produce'][block_name].keys()) == set(dictionary.keys())
def test_get_tunable_hyperparameters(self): mlpipeline = MLPipeline(['a_primitive']) tunable = dict() mlpipeline._tunable_hyperparameters = tunable returned = mlpipeline.get_tunable_hyperparameters() assert returned == tunable assert returned is not tunable
def test_get_outputs_invalid(self): pipeline = MLPipeline(['a_primitive']) pipeline.blocks['a_primitive#1'].produce_output = [{ 'name': 'output', 'type': 'whatever' }] with pytest.raises(ValueError): pipeline.get_outputs('a_primitive#1.invalid')
def test_fit_no_debug(self): mlpipeline = MLPipeline(['a_primitive']) mlpipeline.blocks['a_primitive#1'].fit_args = [{ 'name': 'fit_input', 'type': 'whatever' }] returned = mlpipeline.fit(debug=False) assert returned is None
def test_get_outputs_int(self): pipeline = MLPipeline(['a_primitive', 'another_primitive']) returned = pipeline.get_outputs(-1) expected = [{ 'name': 'another_primitive#1', 'variable': 'another_primitive#1', }] assert returned == expected
def test_get_outputs_invalid(self, mlblock_mock): outputs = { 'default': [ { 'name': 'a_name', 'variable': 'a_variable', 'type': 'a_type', } ], 'debug': [ { 'name': 'another_name', 'variable': 'another_variable', } ] } mlblock_mock.side_effect = [MagicMock(), MagicMock()] pipeline = MLPipeline(['a_primitive', 'another_primitive'], outputs=outputs) pipeline.blocks['a_primitive#1'].produce_output = [ { 'name': 'output', 'type': 'whatever' } ] pipeline.blocks['another_primitive#1'].produce_output = [ { 'name': 'something', } ] returned = pipeline.get_outputs(['default', 'debug', -1, 'a_primitive#1.output']) expected = [ { 'name': 'a_name', 'variable': 'a_variable', 'type': 'a_type' }, { 'name': 'another_name', 'variable': 'another_variable', }, { 'name': 'something', 'variable': 'another_primitive#1.something', }, { 'name': 'output', 'type': 'whatever', 'variable': 'a_primitive#1.output' } ] assert returned == expected
def test_get_outputs_combination(self): outputs = { 'default': [ { 'name': 'a_name', 'variable': 'a_variable', 'type': 'a_type', } ], 'debug': [ { 'name': 'another_name', 'variable': 'another_variable', } ] } pipeline = MLPipeline(['a_primitive', 'another_primitive'], outputs=outputs) pipeline.blocks['a_primitive#1'].produce_output = [ { 'name': 'output', 'type': 'whatever' } ] pipeline.blocks['another_primitive#1'].produce_output = [ { 'name': 'something', } ] returned = pipeline.get_outputs(['default', 'debug', -1, 'a_primitive#1.output']) expected = [ { 'name': 'a_name', 'variable': 'a_variable', 'type': 'a_type' }, { 'name': 'another_name', 'variable': 'another_variable', }, { 'name': 'another_primitive#1', 'variable': 'another_primitive#1', }, { 'name': 'output', 'type': 'whatever', 'variable': 'a_primitive#1.output' } ] assert returned == expected
def test_get_tunable_hyperparameters_flat(self): mlpipeline = MLPipeline(['a_primitive']) mlpipeline._tunable_hyperparameters = { 'block_1': { 'hp_1': { 'type': 'int', 'range': [ 1, 10 ], } }, 'block_2': { 'hp_1': { 'type': 'str', 'default': 'a', 'values': [ 'a', 'b', 'c' ], }, 'hp_2': { 'type': 'bool', 'default': True, } } } returned = mlpipeline.get_tunable_hyperparameters(flat=True) expected = { ('block_1', 'hp_1'): { 'type': 'int', 'range': [ 1, 10 ], }, ('block_2', 'hp_1'): { 'type': 'str', 'default': 'a', 'values': [ 'a', 'b', 'c' ], }, ('block_2', 'hp_2'): { 'type': 'bool', 'default': True, } } assert returned == expected
def test_get_inputs_fit(self): expected = { 'input': { 'name': 'input', 'type': 'whatever', }, 'fit_input': { 'name': 'fit_input', 'type': 'whatever', }, 'another_input': { 'name': 'another_input', 'type': 'another_whatever', } } pipeline = MLPipeline(['a_primitive', 'another_primitive']) pipeline.blocks['a_primitive#1'].produce_args = [ { 'name': 'input', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].fit_args = [ { 'name': 'fit_input', 'type': 'whatever' } ] pipeline.blocks['a_primitive#1'].produce_output = [ { 'name': 'output', 'type': 'another_whatever' } ] pipeline.blocks['another_primitive#1'].produce_args = [ { 'name': 'output', 'type': 'another_whatever' }, { 'name': 'another_input', 'type': 'another_whatever' } ] inputs = pipeline.get_inputs() assert inputs == expected
def test_get_output_variables(self): outputs = { 'default': [{ 'name': 'a_name', 'variable': 'a_variable', 'type': 'a_type', }] } pipeline = MLPipeline(['a_primitive'], outputs=outputs) names = pipeline.get_output_variables() assert names == ['a_variable']
def test_get_outputs_str_variable(self): pipeline = MLPipeline(['a_primitive', 'another_primitive']) pipeline.blocks['a_primitive#1'].produce_output = [{ 'name': 'output', 'type': 'whatever' }] returned = pipeline.get_outputs('a_primitive#1.output') expected = [{ 'name': 'output', 'type': 'whatever', 'variable': 'a_primitive#1.output' }] assert returned == expected
def test_set_hyperparameters(self): block_1 = get_mlblock_mock() block_2 = get_mlblock_mock() blocks = OrderedDict(( ('a.primitive.Name#1', block_1), ('a.primitive.Name#2', block_2), )) mlpipeline = MLPipeline(['a_primitive']) mlpipeline.blocks = blocks hyperparameters = {'a.primitive.Name#2': {'some': 'arg'}} mlpipeline.set_hyperparameters(hyperparameters) block_1.set_hyperparameters.assert_not_called() block_2.set_hyperparameters.assert_called_once_with({'some': 'arg'})
def test_set_hyperparameters_flat(self): block_1 = Mock() block_2 = Mock() blocks = OrderedDict(( ('a.primitive.Name#1', block_1), ('a.primitive.Name#2', block_2), )) mlpipeline = MLPipeline(list()) mlpipeline.blocks = blocks hyperparameters = {('a.primitive.Name#2', 'some'): 'arg'} mlpipeline.set_hyperparameters(hyperparameters) block_1.set_hyperparameters.assert_not_called() block_2.set_hyperparameters.assert_called_once_with({'some': 'arg'})
def test__get_block_variables_is_dict(self): pipeline = MLPipeline(['a_primitive']) pipeline.blocks['a_primitive#1'].produce_outputs = [{ 'name': 'output', 'type': 'whatever' }] outputs = pipeline._get_block_variables('a_primitive#1', 'produce_outputs', {'output': 'name_output'}) expected = { 'name_output': { 'name': 'output', 'type': 'whatever', } } assert outputs == expected
def test__get_outputs_defaults(self): self_ = MagicMock(autospec=MLPipeline) pipeline = dict() outputs = {'default': ['some', 'outputs']} returned = MLPipeline._get_outputs(self_, pipeline, outputs) expected = {'default': ['some', 'outputs']} assert returned == expected self_._get_block_outputs.assert_not_called()
def test_fit_pending_one_primitive(self): block_1 = get_mlblock_mock() block_2 = get_mlblock_mock() blocks = OrderedDict(( ('a.primitive.Name#1', block_1), ('a.primitive.Name#2', block_2), )) self_ = MagicMock(autospec=MLPipeline) self_.blocks = blocks self_._last_fit_block = 'a.primitive.Name#1' MLPipeline.fit(self_) expected = [ call('a.primitive.Name#1'), ] self_._fit_block.call_args_list = expected assert not self_._produce_block.called