Example #1
0
class ClassificationPipeline(Pipeline):
    """
    Classification pipeline
    """
    def __init__(self, **pipeline_constructor_params):
        self.estimator = AutoSklearnClassifier(**pipeline_constructor_params)

    def fit(self, x, y):
        self.estimator.fit(x, y)

    def run(self, x):
        return self.estimator.predict(x)

    def as_json(self):
        pipeline = self.estimator.get_models_with_weights()[0][1]
        return pipeline.config.get_dictionary()
Example #2
0
    def fit_automl(self, run_time):
        """Runs auto-sklearn on the uploaded data and prints results.

        Side effects:
            - Enables upload_widget

        Args:
            run_time (int): The run time for auto-sklearn in seconds.
        Returns:
            automl (AutoSklearnClassifier): fitted auto-sklearn model.
        """

        automl_args = {}

        automl_args['time_left_for_this_task'] = run_time
        # TODO functionality to load this from Mongo
        automl_args['metadata_directory'] = ".metalearning/metalearning_files/"
        #automl_args['metadata_directory'] = "../metalearning/metalearning_files/"

        automl = AutoSklearnClassifier(**automl_args)
        thread = threading.Thread(target=self.update_progress,
                                  args=(self.progress_widget, ))
        thread.start()

        # always load a copy of the latest dataset
        cur_data = self.data[-1].copy()

        y = cur_data.pop(0)
        X, feat_types, _ = model_utils.process_feat_types(cur_data)

        X_train = X.iloc[self.train_idxs]
        y_train = y.iloc[self.train_idxs]

        X_test = X.iloc[self.test_idxs]
        y_test = y.iloc[self.test_idxs]

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            with HiddenPrints():
                automl.fit(X_train, y_train, feat_type=feat_types)

        # Automl has finished fitting:
        self.models.append(copy.deepcopy(automl))

        with self.event_output_widget:
            print("FITTING COMPLETED WITH FITTING TIME PARAMETER AS ",
                  int(run_time / 60), " MINUTES")

        with self.metrics_output_widget:
            y_train_hat = automl.predict(X_train)
            train_accuracy_score = metrics.accuracy_score(y_train, y_train_hat)

            y_test_hat = automl.predict(X_test)
            test_accuracy_score = metrics.accuracy_score(y_test, y_test_hat)

            thresholdout_score = model_utils.thresholdout(
                train_accuracy_score, test_accuracy_score)

            output_str = "Run {}: train acc: {:.4}, noised test acc: {:.4}\n".format(
                self.queries, train_accuracy_score, thresholdout_score)
            print(output_str)

        with self.model_output_widget:
            print("MODELS:")
            print(automl.get_models_with_weights())

        if self.textbox_upload:
            self.upload_button.disabled = False
            self.upload_text.disabled = False
        else:
            self.upload_widget.disabled = False

        if self.queries == self.budget_widget.value:
            self.on_budget_completion()

        return automl
    data = dataframe.values
    X, y = data[:, :-1], data[:, -1]

    # minimally prepare dataset
    X = X.astype('float32')
    y = LabelEncoder().fit_transform(y.astype('str'))

    # split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

    # define search
    model = AutoSklearnClassifier(time_left_for_this_task=10*60, per_run_time_limit=45, n_jobs=6)

    # perform the search
    model.fit(X_train, y_train)
    # summarize
    print(model.sprint_statistics())

    # get model and weights
    model_weights = model.get_models_with_weights()
    for model_weight in model_weights:
        print(model_weight)

    print("Show models")
    models_def = model.show_models()
    print(models_def)

    # evaluate best model
    y_hat = model.predict(X_test)
    acc = accuracy_score(y_test, y_hat)
    print("Test Dataset Accuracy: %.3f" % acc)