def test__load_pipeline_value_error(load_mock, gpp_mock): load_mock.return_value = None gpp_mock.return_value = ['a', 'b'] with pytest.raises(ValueError): discovery.load_pipeline('invalid.pipeline') load_mock.assert_called_once_with('invalid.pipeline', ['a', 'b'])
def test_detect_anomalies_fit_pipeine_dict(self): pipeline = load_pipeline('dummy') anomalies = functional.detect_anomalies(data=self.anomalous, pipeline=pipeline, train_data=self.clean) pd.testing.assert_frame_equal(self.events, anomalies)
def test__load_pipeline_success(load_mock, gpp_mock): gpp_mock.return_value = ['a', 'b'] pipeline = discovery.load_pipeline('valid.pipeline') load_mock.assert_called_once_with('valid.pipeline', ['a', 'b']) assert pipeline == load_mock.return_value
def test_load_orion_dict(self): pipeline = load_pipeline('dummy') orion = functional._load_orion(pipeline) assert isinstance(orion, Orion) assert orion._pipeline == pipeline assert not orion._fitted assert orion._hyperparameters is None
def test_fit_pipeline_dict(self): pipeline = load_pipeline('dummy') orion = functional.fit_pipeline(self.data, pipeline) assert isinstance(orion, Orion) assert orion._pipeline == pipeline assert orion._fitted assert orion._hyperparameters is None
def test_load_orion_json_path(self, tmpdir): pipeline = load_pipeline('dummy') path = os.path.join(tmpdir, 'pipeline.json') with open(path, 'w') as json_file: json.dump(pipeline, json_file) orion = functional._load_orion(path) assert isinstance(orion, Orion) assert orion._pipeline == path assert not orion._fitted assert orion._hyperparameters is None
def _get_templates(self, templates): template_dicts = dict() template_names = list() for template in templates: if isinstance(template, str): template_name = template template = load_pipeline(template_name) else: template_name = md5(json.dumps(template)).digest() template_dicts[template_name] = template template_names.append(template_name) return template_names, template_dicts
def __init__(self, template, metric='accuracy', cost=False, init_params=None, stratify=True, cv_splits=5, shuffle=True, random_state=0, preprocessing=0): self._cv = self._get_cv(stratify, cv_splits, shuffle, random_state) if isinstance(metric, str): metric, cost = METRICS[metric] self._metric = metric self._cost = cost if isinstance(template, str): self.template_name = template self.template = load_pipeline(template) else: self.template = template # Make sure to have block number in all init_params names template_params = self.template.setdefault('init_params', dict()) for name, params in list(template_params.items()): if '#' not in name: template_params[name + '#1'] = template_params.pop(name) self._hyperparameters = dict() if init_params: self.set_init_params(init_params) else: self._build_pipeline() self._static = self._count_static_steps() self._preprocessing = preprocessing self.steps = self._pipeline.primitives.copy() self.preprocessing = self.steps[:self._preprocessing] self.static = self.steps[self._preprocessing:self._static] self.tunable = self.steps[self._static:] if self._preprocessing and (self._preprocessing > self._static): raise ValueError('Preprocessing cannot be bigger than static')
def test_fit(self, pipeline_class_mock): """fit prepare the pipeline to make predictions based on the given data.""" # Run instance = GreenGuardPipeline(self.PIPELINE_NAME, 'accuracy') instance.fit('an_X', 'a_y', 'readings') # Asserts pipeline_mock = pipeline_class_mock.return_value pipeline_class_mock.assert_called_once_with( load_pipeline(self.PIPELINE_NAME)) assert instance._pipeline == pipeline_mock pipeline_mock.fit.assert_called_once_with('an_X', 'a_y', readings='readings') assert instance.fitted
def _get_templates(self, templates): template_dicts = dict() template_names = list() for template in templates: if isinstance(template, str): template_name = template if os.path.isfile(template): with open(template, 'r') as json_file: template = json.load(json_file) else: template = deepcopy(load_pipeline(template_name)) else: template_name = md5(json.dumps(template)).digest() template_dicts[template_name] = template template_names.append(template_name) return template_names, template_dicts
def _get_pipeline_dict(pipeline, primitives): if isinstance(pipeline, dict): return pipeline elif isinstance(pipeline, str): return load_pipeline(pipeline) elif isinstance(pipeline, MLPipeline): return pipeline.to_dict() elif isinstance(pipeline, list): if primitives is not None: raise ValueError('if `pipeline` is a `list`, `primitives` must be `None`') return {'primitives': pipeline} elif pipeline is None: if primitives is None: raise ValueError('Either `pipeline` or `primitives` must be not `None`.') return dict()
def test_detect_visualization(self): pipeline = load_pipeline('dummy') pipeline['outputs'] = { 'visualization': [{ 'name': 'y_hat', 'variable': 'orion.primitives.estimators.MeanEstimator#1.y' }] } orion = Orion(pipeline) orion.fit(self.clean) events, visualization = orion.detect(self.anomalous, visualization=True) pd.testing.assert_frame_equal(self.events, events) assert isinstance(visualization, dict) assert 'y_hat' in visualization y_hat = visualization['y_hat'] np.testing.assert_array_equal(y_hat, np.ones(len(self.anomalous)))