def run(): X, y = get_wine_data() X1, y1 = get_abalone_data() classifier = DecisionTreeClassifier(max_depth=2, min_samples_leaf=3) cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Wine - Validation Curve For Decision Tree" plot_learning_curve(classifier, title, X, y, ylim=(0.4, 0.6), cv=cv, n_jobs=4).show() classifier = DecisionTreeClassifier(max_depth=2, min_samples_leaf=3) cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Abalone - Validation Curve For Decision Tree" plot_learning_curve(classifier, title, X1, y1, ylim=(0.2, 0.4), cv=cv, n_jobs=4).show()
def run(): X, y = get_wine_data() X1, y1 = get_abalone_data() dt = DecisionTreeClassifier(max_depth=2, min_samples_leaf=3, splitter='random') classifier = AdaBoostClassifier(base_estimator=dt, random_state=0, n_estimators=3) cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Wine - Validation Curve For AdaBoostClassifier" plot_learning_curve(classifier, title, X, y, ylim=(0.4, 0.6), cv=cv, n_jobs=4).show() dt = DecisionTreeClassifier(max_depth=1, min_samples_leaf=3, splitter='random') classifier = AdaBoostClassifier(base_estimator=dt, random_state=0, n_estimators=15) cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Abalone - Validation Curve For AdaBoostClassifier" plot_learning_curve(classifier, title, X1, y1, ylim=(0.1, 0.3), cv=cv, n_jobs=4).show()
def run(): X, y = get_wine_data() X1, y1 = get_abalone_data() classifier = MLPClassifier(alpha=0.1, hidden_layer_sizes=13, max_iter=5, random_state=0, solver='lbfgs') cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Wine - Validation Curve For Neural Network" plot_learning_curve(classifier, title, X, y, ylim=(0.2, 0.6), cv=cv, n_jobs=4).show() classifier = MLPClassifier(alpha=0.1, hidden_layer_sizes=10, max_iter=9, random_state=0, solver='lbfgs') cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Abalone - Validation Curve For Neural Network" plot_learning_curve(classifier, title, X1, y1, ylim=(0.1, 0.3), cv=cv, n_jobs=4).show()
def run(): X, y = get_wine_data() X1, y1 = get_abalone_data() classifier = SVC(C=10, kernel='linear') cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) title = "Wine - Validation Curve For Support Vector Machine With Linear Kernel" plot_learning_curve(classifier, title, X, y, ylim=(0.0, 1.0), cv=cv, n_jobs=4).show() classifier = SVC(C=10, kernel='linear') cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) title = "Abalone - Validation Curve For Support Vector Machine With Linear Kernel" plot_learning_curve(classifier, title, X1, y1, ylim=(0.0, 1.), cv=cv, n_jobs=4).show() classifier = SVC(C=10, kernel='rbf', gamma=0.001) cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) title = "Wine - Validation Curve For Support Vector Machine with RBF Kernel" plot_learning_curve(classifier, title, X, y, ylim=(0.0, 1.0), cv=cv, n_jobs=4).show() classifier = SVC(C=10, kernel='rbf', gamma=0.001) cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) title = "Abalone - Validation Curve For Support Vector Machine With Linear Kernel" plot_learning_curve(classifier, title, X1, y1, ylim=(0.0, 1.), cv=cv, n_jobs=4).show()
def training_curve(X, y, model): with timing(): plot_learning_curve(X, y, model, XGBOOST_VISUALIZATION_PATH)
fig, axes = plt.subplots(3, 2, figsize=(10, 15)) #X, y = load_digits(return_X_y=True) x_curves = x y_curves = y[:, 0] title = "Learning Curves (Hist)" # Cross validation with 100 iterations to get smoother mean test and train # score curves, each time with 20% data randomly selected as a validation set. cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=0) estimator = HistGradientBoostingClassifier() plot_learning_curve(estimator, title, x_curves, y_curves, axes=axes[:, 0], ylim=(0.0, 1.01), cv=cv) title = r"Learning Curves (Bayes)" # SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=0) estimator = GaussianNB() plot_learning_curve(estimator, title, x_curves, y_curves, axes=axes[:, 1], ylim=(0.0, 1.01), cv=cv)
# # train_vecs = [] # for i in range(len(english_only_df.index)): # top_topics = lda_train.get_document_topics(train_corpus[i], minimum_probability=0.0) # topic_vec = [top_topics[j][1] for j in range(20)] # topic_vec.append(english_only_df.iloc[i]["num_mentions"]) # topic_vec.append(english_only_df.iloc[i]["num_hashtags"]) # topic_vec.append(len(english_only_df.iloc[i]["combined_text"])) # train_vecs.append(topic_vec) # # X = np.array(train_vecs) # y = np.array(english_only_df["gender"].map({"M": 0, "F": 1})) with open("y.pkl", "rb") as pkl: y = pickle.load(pkl) with open("X.pkl", "rb") as pkl: X = pickle.load(pkl) lr = LogisticRegression(penalty="l2", random_state=1, max_iter=1000) plot_learning_curve(lr, "Learning Curve with Logistic Regression", X, y) # Stochastic Gradient Descent sgd = SGDClassifier(early_stopping=True, random_state=1) plot_learning_curve(sgd, "Learning Curve with Stochastic Gradient Descent", X, y) plt.show() print("ok")
from MlcLinReg import MlcLinReg from helpers import plot_learning_curve scores = list() scores_sgd = list() times = list() times_sgd = list() feature = 2 batch_size = 1024 iterations = 200 X_train, y_train, X_test, y_test = helpers.load_delicious(1) X_train2, X_test2, y_train2, y_test2 = train_test_split(X_train.toarray(), y_train.toarray(), test_size=0.2, random_state=2) plot_learning_curve( estimator=MlcLinReg(learning_rate=0.2, iterations=1000, batch_size=512, l_one=0.15), title="Learning Curve", X=X_train2, y=y_train2, cv=5 ) np.set_printoptions(suppress=True) mlc1 = MlcLinReg(learning_rate=0.1, iterations=500, batch_size=500, l_one=0.2) mlc1.fit(X_train, y_train) mlc1.plot_log_loss()