Ejemplo n.º 1
0
def balance_class_balance(path="images/class_balance.png"):
    data = load_game()
    y = data["outcome"]

    oz = ClassBalance(labels=["draw", "loss", "win"])
    oz.fit(y)
    return oz.poof(outpath=path)
Ejemplo n.º 2
0
def balance_class_balance(path="images/class_balance.png"):
    data = load_game()
    y = data["outcome"]

    oz = ClassBalance(labels=["draw", "loss", "win"])
    oz.fit(y)
    return oz.poof(outpath=path)
Ejemplo n.º 3
0
def class_prediction_error(ax=None):
    data = load_game(return_dataset=True)
    X, y = data.to_numpy()

    X = OneHotEncoder().fit_transform(X).toarray()

    viz = ClassPredictionError(GaussianNB(), ax=ax)
    return tts_plot(viz, X, y)
Ejemplo n.º 4
0
def rocauc(dataset):
    if dataset == "binary":
        X, y = load_occupancy()
        model = GaussianNB()
    elif dataset == "multiclass":
        X, y = load_game()
        X = OrdinalEncoder().fit_transform(X)
        model = RidgeClassifier()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ROCAUC(model, ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "rocauc_{}".format(dataset))
Ejemplo n.º 5
0
def validation_curve_classifier_knn(path="images/validation_curve_classifier_knn.png"):
    X, y = load_game()
    X = OneHotEncoder().fit_transform(X)

    _, ax = plt.subplots()
    cv = StratifiedKFold(4)
    param_range = np.arange(3, 20, 2)

    print("warning: generating the KNN validation curve can take a very long time!")

    oz = ValidationCurve(
        KNeighborsClassifier(),
        ax=ax,
        param_name="n_neighbors",
        param_range=param_range,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.show(outpath=path)
Ejemplo n.º 6
0
def validation_curve_classifier_svc(path="images/validation_curve_classifier_svc.png"):
    X, y = load_game()
    X = OneHotEncoder().fit_transform(X)

    _, ax = plt.subplots()
    cv = StratifiedKFold(12)
    param_range = np.logspace(-6, -1, 12)

    print("warning: generating the SVC validation curve can take a very long time!")

    oz = ValidationCurve(
        SVC(),
        ax=ax,
        param_name="gamma",
        param_range=param_range,
        logx=True,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.show(outpath=path)
Ejemplo n.º 7
0
def prcurve(dataset):
    if dataset == "binary":
        X, y = load_spam()
        model = RidgeClassifier()
        kws = {}
    elif dataset == "multiclass":
        X, y = load_game()
        X = OrdinalEncoder().fit_transform(X)
        y = LabelEncoder().fit_transform(y)
        model = MultinomialNB()
        kws = {
            "per_class": True,
            "iso_f1_curves": True,
            "fill_area": False,
            "micro": False,
        }
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, shuffle=True)
    oz = PRCurve(model, ax=newfig(), **kws)
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "precision_recall_{}".format(dataset))