예제 #1
0
def _load_orion(pipeline, hyperparameters=None):
    if pipeline is None:
        return Orion()
    elif isinstance(pipeline, Orion):
        return pipeline
    else:
        hyperparameters = _load_dict(hyperparameters)
        try:
            return Orion(pipeline, hyperparameters)
        except ValueError:
            try:
                return Orion.load(pipeline)
            except (FileNotFoundError, UnpicklingError):
                raise ValueError('Invalid pipeline: {}'.format(pipeline))
예제 #2
0
 def setup_class(cls):
     data = pd.DataFrame({
         'timestamp': list(range(100)),
         'value': [1] * 100,
     })
     cls.orion = Orion('dummy')
     cls.orion.fit(data)
예제 #3
0
    def test_detect_visualization(self):
        pipeline = load_pipeline('dummy')
        pipeline['outputs'] = {
            'visualization': [{
                'name':
                'y_hat',
                'variable':
                'orion.primitives.estimators.MeanEstimator#1.y'
            }]
        }
        orion = Orion(pipeline)
        orion.fit(self.clean)

        events, visualization = orion.detect(self.anomalous,
                                             visualization=True)

        pd.testing.assert_frame_equal(self.events, events)

        assert isinstance(visualization, dict)
        assert 'y_hat' in visualization
        y_hat = visualization['y_hat']
        np.testing.assert_array_equal(y_hat, np.ones(len(self.anomalous)))
예제 #4
0
def fit_pipeline(data: Union[str, pd.DataFrame],
                 pipeline: Union[str, MLPipeline, dict] = None,
                 hyperparameters: Union[str, pd.DataFrame] = None,
                 save_path: str = None) -> Orion:
    """Fit an Orion pipeline to the data.

    The pipeine can be passed as:
        * An ``str`` with a path to a JSON file.
        * An ``str`` with the name of a registered Orion pipeline.
        * An ``MLPipeline`` instance.
        * A ``dict`` with an ``MLPipeline`` specification.

    If no pipeline is passed, the default Orion pipeline is used.

    Args:
        data (str or DataFrame):
            Data to which the pipeline should be fitted.
            It can be passed as a path to a CSV file or as a DataFrame.
        pipeline (str, Pipeline or dict):
            Pipeline to use. It can be passed as:
                * An ``str`` with a path to a JSON file.
                * An ``str`` with the name of a registered pipeline.
                * An ``MLPipeline`` instance.
                * A ``dict`` with an ``MLPipeline`` specification.
        hyperparameters (str or dict):
            Hyperparameters to set to the pipeline. It can be passed as a
            hyperparameters ``dict`` in the ``mlblocks`` format or as a
            path to the corresponding JSON file. Defaults to
            ``None``.
        save_path (str):
            Path to the file where the fitted Orion instance will be stored
            using ``pickle``. If not given, the Orion instance is returned.
            Defaults to ``None``.

    Returns:
        Orion:
            If no save_path is provided, the fitted Orion instance is returned.
    """
    data = _load_data(data)
    hyperparameters = _load_dict(hyperparameters)

    if pipeline is None:
        pipeline = Orion.DEFAULT_PIPELINE

    orion = Orion(pipeline, hyperparameters)

    orion.fit(data)

    if save_path:
        orion.save(save_path)
    else:
        return orion
예제 #5
0
class TestOrion:
    @classmethod
    def setup_class(cls):
        cls.clean = pd.DataFrame({
            'timestamp': list(range(100)),
            'value': [1] * 100,
        })
        cls.anomalous = pd.DataFrame({
            'timestamp': list(range(100, 200)),
            'value': [1] * 45 + [10] * 10 + [1] * 45
        })
        cls.events = pd.DataFrame([{
            'start': 145,
            'end': 155,
            'severity': 9.0
        }],
                                  columns=['start', 'end', 'severity'])

        cls.all_data = pd.concat((cls.clean, cls.anomalous))
        cls.all_events = pd.DataFrame([{
            'start': 145,
            'end': 155,
            'severity': 4.275
        }],
                                      columns=['start', 'end', 'severity'])

    def setup(self):
        self.orion = Orion('dummy')

    def test_fit(self):
        self.orion.fit(self.clean)

    def test_detect(self):
        self.orion.fit(self.clean)

        events = self.orion.detect(self.anomalous)

        pd.testing.assert_frame_equal(self.events, events)

    def test_detect_no_visualization(self):
        self.orion.fit(self.clean)

        events, visualization = self.orion.detect(self.anomalous,
                                                  visualization=True)

        pd.testing.assert_frame_equal(self.events, events)

        assert visualization == {}

    def test_detect_visualization(self):
        pipeline = load_pipeline('dummy')
        pipeline['outputs'] = {
            'visualization': [{
                'name':
                'y_hat',
                'variable':
                'orion.primitives.estimators.MeanEstimator#1.y'
            }]
        }
        orion = Orion(pipeline)
        orion.fit(self.clean)

        events, visualization = orion.detect(self.anomalous,
                                             visualization=True)

        pd.testing.assert_frame_equal(self.events, events)

        assert isinstance(visualization, dict)
        assert 'y_hat' in visualization
        y_hat = visualization['y_hat']
        np.testing.assert_array_equal(y_hat, np.ones(len(self.anomalous)))

    def test_fit_detect(self):
        events = self.orion.fit_detect(self.all_data)

        pd.testing.assert_frame_equal(self.all_events, events)

    def test_save_load(self, tmpdir):
        path = os.path.join(tmpdir, 'some/path.pkl')
        self.orion.save(path)

        new_orion = Orion.load(path)
        assert new_orion == self.orion

    def test_evaluate(self):
        self.orion.fit(self.clean)
        scores = self.orion.evaluate(data=self.anomalous,
                                     ground_truth=self.events)

        expected = pd.Series({
            'accuracy': 1.0,
            'f1': 1.0,
            'recall': 1.0,
            'precision': 1.0,
        })
        pd.testing.assert_series_equal(expected, scores)

    def test_evaluate_fit(self):
        scores = self.orion.evaluate(
            data=self.all_data,
            ground_truth=self.all_events,
            fit=True,
        )

        expected = pd.Series({
            'accuracy': 1.0,
            'f1': 1.0,
            'recall': 1.0,
            'precision': 1.0,
        })
        pd.testing.assert_series_equal(expected, scores)

    def test_evaluate_train_data(self):
        scores = self.orion.evaluate(data=self.anomalous,
                                     ground_truth=self.events,
                                     fit=True,
                                     train_data=self.clean)

        expected = pd.Series({
            'accuracy': 1.0,
            'f1': 1.0,
            'recall': 1.0,
            'precision': 1.0,
        })
        pd.testing.assert_series_equal(expected, scores)
예제 #6
0
    def test_save_load(self, tmpdir):
        path = os.path.join(tmpdir, 'some/path.pkl')
        self.orion.save(path)

        new_orion = Orion.load(path)
        assert new_orion == self.orion
예제 #7
0
 def setup(self):
     self.orion = Orion('dummy')