Exemple #1
0
def run():
    X, y = get_wine_data()
    X1, y1 = get_abalone_data()

    classifier = DecisionTreeClassifier(max_depth=2, min_samples_leaf=3)
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    title = "Wine - Validation Curve For Decision Tree"
    plot_learning_curve(classifier,
                        title,
                        X,
                        y,
                        ylim=(0.4, 0.6),
                        cv=cv,
                        n_jobs=4).show()

    classifier = DecisionTreeClassifier(max_depth=2, min_samples_leaf=3)
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    title = "Abalone - Validation Curve For Decision Tree"
    plot_learning_curve(classifier,
                        title,
                        X1,
                        y1,
                        ylim=(0.2, 0.4),
                        cv=cv,
                        n_jobs=4).show()
def run():
    X, y = get_wine_data()
    X1, y1 = get_abalone_data()

    dt = DecisionTreeClassifier(max_depth=2,
                                min_samples_leaf=3,
                                splitter='random')
    classifier = AdaBoostClassifier(base_estimator=dt,
                                    random_state=0,
                                    n_estimators=3)
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    title = "Wine - Validation Curve For AdaBoostClassifier"
    plot_learning_curve(classifier,
                        title,
                        X,
                        y,
                        ylim=(0.4, 0.6),
                        cv=cv,
                        n_jobs=4).show()

    dt = DecisionTreeClassifier(max_depth=1,
                                min_samples_leaf=3,
                                splitter='random')
    classifier = AdaBoostClassifier(base_estimator=dt,
                                    random_state=0,
                                    n_estimators=15)
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    title = "Abalone - Validation Curve For AdaBoostClassifier"
    plot_learning_curve(classifier,
                        title,
                        X1,
                        y1,
                        ylim=(0.1, 0.3),
                        cv=cv,
                        n_jobs=4).show()
Exemple #3
0
def run():
    X, y = get_wine_data()
    X1, y1 = get_abalone_data()

    classifier = MLPClassifier(alpha=0.1, hidden_layer_sizes=13, max_iter=5, random_state=0, solver='lbfgs')
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    title = "Wine - Validation Curve For Neural Network"
    plot_learning_curve(classifier, title, X, y, ylim=(0.2, 0.6), cv=cv, n_jobs=4).show()

    classifier = MLPClassifier(alpha=0.1, hidden_layer_sizes=10, max_iter=9, random_state=0, solver='lbfgs')
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    title = "Abalone - Validation Curve For Neural Network"
    plot_learning_curve(classifier, title, X1, y1, ylim=(0.1, 0.3), cv=cv, n_jobs=4).show()
Exemple #4
0
def run():
    X, y = get_wine_data()
    X1, y1 = get_abalone_data()

    classifier = SVC(C=10, kernel='linear')
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    title = "Wine - Validation Curve For Support Vector Machine With Linear Kernel"
    plot_learning_curve(classifier, title, X, y, ylim=(0.0, 1.0), cv=cv, n_jobs=4).show()

    classifier = SVC(C=10, kernel='linear')
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    title = "Abalone - Validation Curve For Support Vector Machine With Linear Kernel"
    plot_learning_curve(classifier, title, X1, y1, ylim=(0.0, 1.), cv=cv, n_jobs=4).show()

    classifier = SVC(C=10, kernel='rbf', gamma=0.001)
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    title = "Wine - Validation Curve For Support Vector Machine with RBF Kernel"
    plot_learning_curve(classifier, title, X, y, ylim=(0.0, 1.0), cv=cv, n_jobs=4).show()

    classifier = SVC(C=10, kernel='rbf', gamma=0.001)
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    title = "Abalone - Validation Curve For Support Vector Machine With Linear Kernel"
    plot_learning_curve(classifier, title, X1, y1, ylim=(0.0, 1.), cv=cv, n_jobs=4).show()
def training_curve(X, y, model):
    with timing():
        plot_learning_curve(X, y, model, XGBOOST_VISUALIZATION_PATH)
fig, axes = plt.subplots(3, 2, figsize=(10, 15))

#X, y = load_digits(return_X_y=True)
x_curves = x
y_curves = y[:, 0]
title = "Learning Curves (Hist)"
# Cross validation with 100 iterations to get smoother mean test and train
# score curves, each time with 20% data randomly selected as a validation set.
cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=0)

estimator = HistGradientBoostingClassifier()
plot_learning_curve(estimator,
                    title,
                    x_curves,
                    y_curves,
                    axes=axes[:, 0],
                    ylim=(0.0, 1.01),
                    cv=cv)

title = r"Learning Curves (Bayes)"
# SVC is more expensive so we do a lower number of CV iterations:
cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
estimator = GaussianNB()
plot_learning_curve(estimator,
                    title,
                    x_curves,
                    y_curves,
                    axes=axes[:, 1],
                    ylim=(0.0, 1.01),
                    cv=cv)
    #
    # train_vecs = []
    # for i in range(len(english_only_df.index)):
    #     top_topics = lda_train.get_document_topics(train_corpus[i], minimum_probability=0.0)
    #     topic_vec = [top_topics[j][1] for j in range(20)]
    #     topic_vec.append(english_only_df.iloc[i]["num_mentions"])
    #     topic_vec.append(english_only_df.iloc[i]["num_hashtags"])
    #     topic_vec.append(len(english_only_df.iloc[i]["combined_text"]))
    #     train_vecs.append(topic_vec)
    #
    # X = np.array(train_vecs)
    # y = np.array(english_only_df["gender"].map({"M": 0, "F": 1}))

    with open("y.pkl", "rb") as pkl:
        y = pickle.load(pkl)

    with open("X.pkl", "rb") as pkl:
        X = pickle.load(pkl)

    lr = LogisticRegression(penalty="l2", random_state=1, max_iter=1000)
    plot_learning_curve(lr, "Learning Curve with Logistic Regression", X, y)

    # Stochastic Gradient Descent
    sgd = SGDClassifier(early_stopping=True, random_state=1)
    plot_learning_curve(sgd, "Learning Curve with Stochastic Gradient Descent",
                        X, y)

    plt.show()

    print("ok")
Exemple #8
0
from MlcLinReg import MlcLinReg
from helpers import plot_learning_curve

scores = list()
scores_sgd = list()
times = list()
times_sgd = list()
feature = 2
batch_size = 1024
iterations = 200

X_train, y_train, X_test, y_test = helpers.load_delicious(1)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_train.toarray(), y_train.toarray(), test_size=0.2,
                                                        random_state=2)

plot_learning_curve(
    estimator=MlcLinReg(learning_rate=0.2,
                        iterations=1000,
                        batch_size=512,
                        l_one=0.15),
    title="Learning Curve",
    X=X_train2,
    y=y_train2,
    cv=5
)
np.set_printoptions(suppress=True)

mlc1 = MlcLinReg(learning_rate=0.1, iterations=500, batch_size=500, l_one=0.2)
mlc1.fit(X_train, y_train)
mlc1.plot_log_loss()