Example #1
0
def test_unknown_metric_exception(classification_pred):
    metric = Metric('precision_recall')

    y_true, y_pred = classification_pred

    with pytest.raises(NotSupportedMetricException):
        score = metric.score(y_true, y_pred)
Example #2
0
def test_mse_metric(regression_pred):
    metric = Metric('mse')

    y_true, y_pred = regression_pred

    assert metric.score(y_true, y_pred) == 0.00671625
Example #3
0
def test_mae_metric(regression_pred):
    metric = Metric('mae')

    y_true, y_pred = regression_pred

    assert metric.score(y_true, y_pred) == 0.06925
Example #4
0
def test_accuracy_metric(classification_pred):
    metric = Metric('accuracy')

    y_true, y_pred = classification_pred

    assert metric.score(y_true, y_pred) == 0.75
Example #5
0
def test_multi_f1_metric(multi_classification_pred):
    metric = Metric('f1', {'average': 'macro'})

    y_true, y_pred = multi_classification_pred

    assert metric.score(y_true, y_pred) == 0.26666666666666666
Example #6
0
def test_f1_metric(classification_pred):
    metric = Metric('recall')

    y_true, y_pred = classification_pred

    assert metric.score(y_true, y_pred) == 0.5
Example #7
0
def test_multi_recall_metric(multi_classification_pred):
    metric = Metric('recall', {'average': 'macro'})

    y_true, y_pred = multi_classification_pred

    assert metric.score(y_true, y_pred) == 0.3333333333333333
Example #8
0
class MLModel:
    def __init__(self,
                 X_train,
                 y_train,
                 X_test,
                 y_test,
                 metric_name,
                 metric_params,
                 use_bootstrap=False,
                 n_samples=1000,
                 models=None,
                 model_params=None,
                 result_file='results.csv'):
        self._X_train = X_train
        self._y_train = y_train
        self._X_test = X_test
        self._y_test = y_test
        self._metric = Metric(metric_name, metric_params)
        self._use_bootstrap = use_bootstrap
        self._n_samples = n_samples
        self._result_file = result_file

        if use_bootstrap and self._X_train.shape[0] < n_samples:
            raise InvalidBootstrapSampleSizeException(
                f'Cannot use bootstrap sampling: {n_samples} > {self._X_train.shape[0]}'
            )

        self._models = self._init_models() if models is None else models
        self._model_params = self._init_model_params(
        ) if model_params is None else model_params

        if len(self._models) != len(self._model_params):
            raise ValueError(
                'models list and model params list must have the same length')

        self._results = {}

        self._model_mapper = {
            # Classifiers
            'LogisticRegression': LogisticRegression,
            'RandomForestClassifier': RandomForestClassifier,
            'RidgeClassifier': RidgeClassifier,
            'GradientBoostingClassifier': GradientBoostingClassifier,
            'LinearSVC': LinearSVC,

            # Regressors
            'LinearRegression': LinearRegression,
            'RandomForestRegressor': RandomForestRegressor,
            'Ridge': Ridge,
            'LinearSVR': LinearSVR,
            'GradientBoostingRegressor': GradientBoostingRegressor,
        }

    @property
    def models(self):
        return self._models

    @property
    def model_params(self):
        return self._model_params

    @property
    def results(self):
        return self._results

    def run_models(self):
        for model in self._map_models():
            self._train_model(model)

    def run_models_threads(self):
        with parallel_backend('threading', n_jobs=-1):
            Parallel()(delayed(self._train_model)(model)
                       for model in self._map_models())

    def _train_model(self, model):
        self._fit(model)
        y_pred = self._predict(model)
        metric_score = self._metric.score(self._y_test, y_pred)
        self._write_metric_results(model, metric_score)

    def _init_models(self):
        raise NotImplementedError

    def _fit(self, model):
        if self._use_bootstrap:
            indices = get_bootstrap_indices(self._X_train, self._n_samples)
            X_train = self._X_train[indices]
            y_train = self._y_train[indices]
        else:
            X_train = self._X_train
            y_train = self._y_train

        model.fit(X_train, y_train)

    def _predict(self, model):
        return model.predict(self._X_test)

    def _write_metric_results(self, model, metric_score):
        model_name = type(model).__name__
        model_params = str(model.get_params())
        self._results[(model_name, model_params)] = metric_score
        with open(self._result_file, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow((
                datetime.utcnow(),
                type(model).__name__,
                str(model.get_params()),
                self._metric._metric_name,
                metric_score,
            ))

    def _map_models(self):
        return [
            self._model_mapper.get(model)(**model_params)
            for model, model_params in zip(self._models, self._model_params)
        ]

    def _init_model_params(self):
        return [{}, {}, {}]