Esempio n. 1
0
def model(dataset):
    train_path, _ = download_data(dataset.train_path)
    model = Model(method='dt', params=DT_PARAMS,
                  judgment_metric='roc_auc',
                  class_column=dataset.class_column)
    model.train_test(train_path=train_path)
    return model
Esempio n. 2
0
def model(dataset):
    train_path, _ = download_data(dataset.train_path)
    model = Model(method='dt', params=DT_PARAMS,
                  judgment_metric='roc_auc',
                  class_column=dataset.class_column)
    model.train_test(train_path=train_path)
    return model
Esempio n. 3
0
    def test_classifier(self, classifier_id, params):
        """
        Given a set of fully-qualified hyperparameters, create and test a
        model.
        Returns: Model object and performance dictionary
        """
        wrapper = create_wrapper(params, self.datarun.metric)
        wrapper.load_data_from_objects(*self.load_data())
        performance = wrapper.start()

        old_best = self.db.get_best_classifier(datarun_id=self.datarun.id)
        if old_best is not None:
            old_val = old_best.cv_judgment_metric
            old_err = 2 * old_best.cv_judgment_metric_stdev

        new_val = performance["cv_judgment_metric"]
        new_err = 2 * performance["cv_judgment_metric_stdev"]

        _log("Judgment metric (%s): %.3f +- %.3f" %
             (self.datarun.metric, new_val, new_err))

        if old_best is not None:
            if (new_val - new_err) > ():
                _log(
                    "New best score! Previous best (classifier %s): %.3f +- %.3f"
                    % (old_best.id, old_val, old_err))
            else:
                _log("Best so far (classifier %s): %.3f +- %.3f" %
                     (old_best.id, old_val, old_err))

        model = Model(algorithm=wrapper, data=self.dataset.wrapper)
        return model, performance
Esempio n. 4
0
    def test_classifier(self, method, params):
        """
        Given a set of fully-qualified hyperparameters, create and test a
        classifier model.
        Returns: Model object and metrics dictionary
        """
        model = Model(method=method,
                      params=params,
                      judgment_metric=self.datarun.metric,
                      class_column=self.dataset.class_column,
                      verbose_metrics=self.verbose_metrics)
        train_path, test_path = download_data(self.dataset.train_path,
                                              self.dataset.test_path,
                                              self.aws_config)
        metrics = model.train_test(train_path=train_path, test_path=test_path)
        target = self.datarun.score_target

        def metric_string(model):
            if 'cv' in target or 'mu_sigma' in target:
                return '%.3f +- %.3f' % (model.cv_judgment_metric,
                                         2 * model.cv_judgment_metric_stdev)
            else:
                return '%.3f' % model.test_judgment_metric

        logger.info('Judgment metric (%s, %s): %s' %
                    (self.datarun.metric, target[:-len('_judgment_metric')],
                     metric_string(model)))

        old_best = self.db.get_best_classifier(datarun_id=self.datarun.id,
                                               score_target=target)
        if old_best is not None:
            if getattr(model, target) > getattr(old_best, target):
                logger.info(
                    'New best score! Previous best (classifier %s): %s' %
                    (old_best.id, metric_string(old_best)))
            else:
                logger.info('Best so far (classifier %s): %s' %
                            (old_best.id, metric_string(old_best)))

        return model, metrics
Esempio n. 5
0
    def test_classifier(self, method, params):
        """
        Given a set of fully-qualified hyperparameters, create and test a
        classification model.
        Returns: Model object and performance dictionary
        """
        model = Model(code=method,
                      params=params,
                      judgment_metric=self.datarun.metric,
                      label_column=self.dataset.label_column)
        train_path, test_path = download_data(self.dataset.train_path,
                                              self.dataset.test_path,
                                              self.aws_config)
        performance = model.train_test(train_path=train_path,
                                       test_path=test_path)

        old_best = self.db.get_best_classifier(datarun_id=self.datarun.id)
        if old_best is not None:
            old_val = old_best.cv_judgment_metric
            old_err = 2 * old_best.cv_judgment_metric_stdev

        new_val = model.cv_judgment_metric
        new_err = 2 * model.cv_judgment_metric_stdev

        _log('Judgment metric (%s): %.3f +- %.3f' %
             (self.datarun.metric, new_val, new_err))

        if old_best is not None:
            if (new_val - new_err) > ():
                _log(
                    'New best score! Previous best (classifier %s): %.3f +- %.3f'
                    % (old_best.id, old_val, old_err))
            else:
                _log('Best so far (classifier %s): %.3f +- %.3f' %
                     (old_best.id, old_val, old_err))

        return model, performance