Python GamaClassifier.fit_from_file Examples

Programming Language: Python

Namespace/Package Name: gama

Class/Type: GamaClassifier

Method/Function: fit_from_file

Examples at hotexamples.com: 2

Python GamaClassifier.fit_from_file - 2 examples found. These are the top rated real world Python examples of gama.GamaClassifier.fit_from_file extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GamaClassifier(19)

fit(8)

predict(6)

predict_proba(4)

fit_arff(3)

score(3)

cleanup(2)

fit_from_file(2)

predict_arff(2)

predict_from_file(2)

predict_proba_arff(2)

predict_proba_from_file(2)

score_arff(1)

score_from_file(1)

Example #1

Show file

def _test_dataset_problem(
    data,
    metric: str,
    arff: bool = False,
    y_type: Type = pd.DataFrame,
    search: BaseSearch = AsyncEA(),
    missing_values: bool = False,
    max_time: int = 60,
):
    """

    :param data:
    :param metric:
    :param arff:
    :param y_type: pd.DataFrame, pd.Series, np.ndarray or str
    :return:
    """
    gama = GamaClassifier(
        random_state=0,
        max_total_time=max_time,
        scoring=metric,
        search=search,
        n_jobs=1,
        post_processing=EnsemblePostProcessing(ensemble_size=5),
        store="nothing",
    )
    if arff:
        train_path = f"tests/data/{data['name']}_train.arff"
        test_path = f"tests/data/{data['name']}_test.arff"

        X, y = data["load"](return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            stratify=y,
                                                            random_state=0)
        y_test = [str(val) for val in y_test]

        with Stopwatch() as sw:
            gama.fit_from_file(train_path, target_column=data["target"])
        class_predictions = gama.predict_from_file(
            test_path, target_column=data["target"])
        class_probabilities = gama.predict_proba_from_file(
            test_path, target_column=data["target"])
        gama_score = gama.score_from_file(test_path)
    else:
        X, y = data["load"](return_X_y=True)
        if y_type == str:
            databunch = data["load"]()
            y = np.asarray(
                [databunch.target_names[c_i] for c_i in databunch.target])
        if y_type in [pd.Series, pd.DataFrame]:
            y = y_type(y)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            stratify=y,
                                                            random_state=0)
        if missing_values:
            X_train[1:300:2, 0] = X_train[2:300:5, 1] = float("NaN")
            X_test[1:100:2, 0] = X_test[2:100:5, 1] = float("NaN")

        with Stopwatch() as sw:
            gama.fit(X_train, y_train)
        class_predictions = gama.predict(X_test)
        class_probabilities = gama.predict_proba(X_test)
        gama_score = gama.score(X_test, y_test)

    assert (60 * FIT_TIME_MARGIN >
            sw.elapsed_time), "fit must stay within 110% of allotted time."

    assert isinstance(class_predictions,
                      np.ndarray), "predictions should be numpy arrays."
    assert (
        data["test_size"],
    ) == class_predictions.shape, "predict should return (N,) shaped array."

    accuracy = accuracy_score(y_test, class_predictions)
    # Majority classifier on this split achieves 0.6293706293706294
    print(data["name"], metric, "accuracy:", accuracy)
    assert (data["base_accuracy"] <= accuracy
            ), "predictions should be at least as good as majority class."

    assert isinstance(
        class_probabilities,
        np.ndarray), "probability predictions should be numpy arrays."
    assert (data["test_size"],
            data["n_classes"]) == class_probabilities.shape, (
                "predict_proba should return"
                " (N,K) shaped array.")

    # Majority classifier on this split achieves 12.80138131184662
    logloss = log_loss(y_test, class_probabilities)
    print(data["name"], metric, "log-loss:", logloss)
    assert (data["base_log_loss"] >= logloss
            ), "predictions should be at least as good as majority class."

    score_to_match = logloss if metric == "neg_log_loss" else accuracy
    assert score_to_match == pytest.approx(gama_score)
    gama.cleanup("all")
    return gama

Example #2

Show file

File: arff_example.py Project: prabhant/gama

from gama import GamaClassifier

if __name__ == "__main__":
    file_path = "../tests/data/breast_cancer_{}.arff"

    automl = GamaClassifier(max_total_time=180, store="nothing", n_jobs=1)
    print("Starting `fit` which will take roughly 3 minutes.")
    automl.fit_from_file(file_path.format("train"))

    label_predictions = automl.predict_from_file(file_path.format("test"))
    probability_predictions = automl.predict_proba_from_file(
        file_path.format("test"))