Ejemplo n.º 1
0
    def test_predict(self, load_pipeline_mock, mlpipeline_mock):
        load_pipeline_mock.return_value = dict()

        # Run
        instance = GreenGuardPipeline('a_pipeline', 'accuracy')
        instance.fitted = True
        target_times, readings = self._get_data()
        instance.predict(target_times, readings)
Ejemplo n.º 2
0
    def test_predict(self, pipeline_mock):
        """predict produces results using the pipeline."""
        # Run
        instance = GreenGuardPipeline(self.PIPELINE_NAME, 'accuracy')
        instance.fitted = True
        instance.predict('an_X', 'readings')

        # Asserts
        pipeline_mock.return_value.predict.assert_called_once_with(
            'an_X', readings='readings')
Ejemplo n.º 3
0
    def test_predict(self, from_dict_mock):
        """predict produces results using the pipeline."""
        # Setup
        pipeline_mock = Mock()
        from_dict_mock.return_value = pipeline_mock

        # Run
        instance = GreenGuardPipeline(dict(), 'accuracy')
        instance.fitted = True
        instance.predict('an_X', {'some': 'tables'})

        # Asserts
        pipeline_mock.predict.assert_called_once_with('an_X',
                                                      entityset=None,
                                                      some='tables')
Ejemplo n.º 4
0
    def test_fit(self, pipeline_class_mock):
        """fit prepare the pipeline to make predictions based on the given data."""
        # Run
        instance = GreenGuardPipeline(self.PIPELINE_NAME, 'accuracy')
        instance.fit('an_X', 'a_y', 'readings')

        # Asserts
        pipeline_mock = pipeline_class_mock.return_value
        pipeline_class_mock.assert_called_once_with(
            load_pipeline(self.PIPELINE_NAME))
        assert instance._pipeline == pipeline_mock

        pipeline_mock.fit.assert_called_once_with('an_X',
                                                  'a_y',
                                                  readings='readings')

        assert instance.fitted
Ejemplo n.º 5
0
    def test_fit(self, from_dict_mock):
        """fit prepare the pipeline to make predictions based on the given data."""

        # Setup
        pipeline_mock = Mock()
        from_dict_mock.return_value = pipeline_mock

        # Run
        instance = GreenGuardPipeline(dict(), 'accuracy')
        instance.fit('an_X', 'a_y', {'some': 'tables'})

        # Asserts
        from_dict_mock.assert_called_once_with(dict())
        assert instance._pipeline == pipeline_mock

        pipeline_mock.fit.assert_called_once_with('an_X',
                                                  'a_y',
                                                  entityset=None,
                                                  some='tables')

        assert instance.fitted
Ejemplo n.º 6
0
def evaluate_template(template,
                      target_times,
                      readings,
                      tuning_iterations=50,
                      init_params=None,
                      preprocessing=0,
                      metrics=None,
                      threshold=None,
                      tpr=None,
                      tuning_metric='roc_auc_score',
                      tuning_metric_kwargs=DEFAULT_TUNING_METRIC_KWARGS,
                      cost=False,
                      cv_splits=3,
                      test_size=0.25,
                      random_state=0,
                      cache_path=None,
                      scores={}):
    """Returns the scores for a given template.

    Args:
        template (str):
            Given template to evaluate.
        target_times (DataFrame):
            Contains the specefication problem that we are solving, which has three columns:

                * turbine_id: Unique identifier of the turbine which this label corresponds to.
                * cutoff_time: Time associated with this target.
                * target: The value that we want to predict. This can either be a numerical value
                          or a categorical label. This column can also be skipped when preparing
                          data that will be used only to make predictions and not to fit any
                          pipeline.

        readings (DataFrame):
            Contains the signal data from different sensors, with the following columns:

                * turbine_id: Unique identifier of the turbine which this reading comes from.
                * signal_id: Unique identifier of the signal which this reading comes from.
                * timestamp (datetime): Time where the reading took place, as a datetime.
                * value (float): Numeric value of this reading.

        metric (function or str):
            Metric to use. If an ``str`` is give it must be one of the metrics
            defined in the ``greenguard.metrics.METRICS`` dictionary.
        tuning_iterations (int):
            Number of iterations to be used.
        preprocessing (int, list or dict):
            Number of preprocessing steps to be used.
        init_params (list):
            Initialization parameters for the pipeline.
        cost (bool):
            Wheter the metric is a cost function (the lower the better) or not.
        test_size (float):
            Percentage of the data set to be used for the test.
        cv_splits (int):
            Amount of splits to create.
        random_state (int):
            Random number of train_test split.
        cache_path (str):
            If given, cache the generated cross validation splits in this folder.
            Defatuls to ``None``.

    Returns:
        scores (dict):
            Stores the four types of scores that are being evaluate.
    """
    start_time = datetime.utcnow()
    scores['tuning_metric'] = str(tuning_metric)
    scores['tuning_metric_kwargs'] = tuning_metric_kwargs
    tuning_metric = _scorer(tuning_metric, tuning_metric_kwargs)

    train, test = train_test_split(target_times,
                                   test_size=test_size,
                                   random_state=random_state)

    pipeline = GreenGuardPipeline(template,
                                  metric=tuning_metric,
                                  cost=cost,
                                  cv_splits=cv_splits,
                                  init_params=init_params,
                                  preprocessing=preprocessing,
                                  cache_path=cache_path)

    # Computing the default test score
    fit_predict_time = datetime.utcnow()
    pipeline.fit(train, readings)
    predictions = pipeline.predict(test, readings)
    fit_predict_time = datetime.utcnow() - fit_predict_time

    scores['default_test'] = tuning_metric(test['target'], predictions)

    # Computing the default cross validation score
    default_cv_time = datetime.utcnow()
    session = pipeline.tune(train, readings)
    session.run(1)
    default_cv_time = datetime.utcnow() - default_cv_time

    scores['default_cv'] = pipeline.cv_score

    # Computing the cross validation score with tuned hyperparameters
    average_cv_time = datetime.utcnow()
    session.run(tuning_iterations)
    average_cv_time = (datetime.utcnow() - average_cv_time) / tuning_iterations

    scores['tuned_cv'] = pipeline.cv_score

    # Computing the test score with tuned hyperparameters
    pipeline.fit(train, readings)
    predictions = pipeline.predict(test, readings)
    ground_truth = test['target']

    # compute different metrics
    if tpr:
        tpr = tpr if isinstance(tpr, list) else [tpr]
        for value in tpr:
            threshold = threshold_score(ground_truth, predictions, tpr)
            scores[f'fpr_tpr/{value}'] = fpr_score(ground_truth,
                                                   predictions,
                                                   tpr=tpr)
            predictions_classes = predictions >= threshold
            scores[f'accuracy_tpr/{value}'] = accuracy_score(
                ground_truth, predictions_classes)
            scores[f'f1_tpr/{value}'] = f1_score(ground_truth,
                                                 predictions_classes)
            scores[f'threshold_tpr/{value}'] = threshold_score(
                ground_truth, predictions, value)

            if f'accuracy_tpr/{value}' not in LEADERBOARD_COLUMNS:
                LEADERBOARD_COLUMNS.extend([
                    f'accuracy_tpr/{value}',
                    f'f1_tpr/{value}',
                    f'fpr_tpr/{value}',
                    f'threshold_tpr/{value}',
                ])

    else:
        threshold = 0.5 if threshold is None else threshold
        threshold = threshold if isinstance(threshold, list) else [threshold]

        for value in threshold:
            scores[f'fpr_threshold/{value}'] = fpr_score(ground_truth,
                                                         predictions,
                                                         threshold=value)

            predictions_classes = predictions >= threshold
            scores[f'accuracy_threshold/{value}'] = accuracy_score(
                ground_truth, predictions_classes)

            scores[f'f1_threshold/{value}'] = f1_score(ground_truth,
                                                       predictions_classes)
            scores[f'tpr_threshold/{value}'] = tpr_score(
                ground_truth, predictions, value)

            if f'accuracy_threshold/{value}' not in LEADERBOARD_COLUMNS:
                LEADERBOARD_COLUMNS.extend([
                    f'accuracy_threshold/{value}',
                    f'f1_threshold/{value}',
                    f'fpr_threshold/{value}',
                    f'tpr_threshold/{value}',
                ])

    scores['tuned_test'] = tuning_metric(test['target'], predictions)
    scores['fit_predict_time'] = fit_predict_time
    scores['default_cv_time'] = default_cv_time
    scores['average_cv_time'] = average_cv_time
    scores['total_time'] = datetime.utcnow() - start_time

    return scores
Ejemplo n.º 7
0
def evaluate_template(template, target_times, readings, metric='f1', tuning_iterations=50,
                      preprocessing=0, init_params=None, cost=False, test_size=0.25,
                      cv_splits=3, random_state=0, cache_path=None):
    """Returns the scores for a given template.

    Args:
        template (str):
            Given template to evaluate.
        target_times (DataFrame):
            Contains the specefication problem that we are solving, which has three columns:

                * turbine_id: Unique identifier of the turbine which this label corresponds to.
                * cutoff_time: Time associated with this target.
                * target: The value that we want to predict. This can either be a numerical value
                          or a categorical label. This column can also be skipped when preparing
                          data that will be used only to make predictions and not to fit any
                          pipeline.

        readings (DataFrame):
            Contains the signal data from different sensors, with the following columns:

                * turbine_id: Unique identifier of the turbine which this reading comes from.
                * signal_id: Unique identifier of the signal which this reading comes from.
                * timestamp (datetime): Time where the reading took place, as a datetime.
                * value (float): Numeric value of this reading.

        metric (function or str):
            Metric to use. If an ``str`` is give it must be one of the metrics
            defined in the ``greenguard.metrics.METRICS`` dictionary.
        tuning_iterations (int):
            Number of iterations to be used.
        preprocessing (int, list or dict):
            Number of preprocessing steps to be used.
        init_params (list):
            Initialization parameters for the pipeline.
        cost (bool):
            Wheter the metric is a cost function (the lower the better) or not.
        test_size (float):
            Percentage of the data set to be used for the test.
        cv_splits (int):
            Amount of splits to create.
        random_state (int):
            Random number of train_test split.
        cache_path (str):
            If given, cache the generated cross validation splits in this folder.
            Defatuls to ``None``.

    Returns:
        scores (dict):
            Stores the four types of scores that are being evaluate.
    """
    scores = dict()

    train, test = train_test_split(target_times, test_size=test_size, random_state=random_state)

    if isinstance(metric, str):
        metric, cost = METRICS[metric]

    pipeline = GreenGuardPipeline(
        template,
        metric,
        cost=cost,
        cv_splits=cv_splits,
        init_params=init_params,
        preprocessing=preprocessing,
        cache_path=cache_path
    )

    # Computing the default test score
    pipeline.fit(train, readings)
    predictions = pipeline.predict(test, readings)

    scores['default_test'] = metric(test['target'], predictions)

    # Computing the default cross validation score
    session = pipeline.tune(train, readings)
    session.run(1)

    scores['default_cv'] = pipeline.cv_score

    # Computing the cross validation score with tuned hyperparameters
    session.run(tuning_iterations)

    scores['tuned_cv'] = pipeline.cv_score

    # Computing the test score with tuned hyperparameters
    pipeline.fit(train, readings)
    predictions = pipeline.predict(test, readings)

    scores['tuned_test'] = metric(test['target'], predictions)

    return scores