def lassocvclassifier(training_samples, eval_samples, vectorizer, do_grid_search=False): X_train, Y_train = training_samples X_eval, Y_eval = eval_samples #clf = SGDClassifier(loss='log', penalty= 'l2',l1_ratio=0.0, n_iter=30, shuffle=True, verbose=False, # n_jobs=4, alpha=1e-4, average=True, class_weight=None) clf = LassoCV() clf.fit(X_train, Y_train) #y_train_true, y_train_pred = Y_train, clf.predict(X_train) print_top_10_words = True scores = cross_validation.cross_val_score(clf, X_train, Y_train, cv=5, n_jobs=5, scoring='log_loss') print scores, np.mean(scores), np.median(scores) print(clf) #scores = cross_validation.cross_val_score(clf.best_estimator_, X_train, Y_train, cv=10, scoring='log_loss') #print scores, np.mean(scores), np.median(scores) y_true, y_pred = Y_eval, clf.predict(X_eval) y_prob = clf.predict_proba(X_eval)
class Lasso: def __init__(self, dataset): self.data_train_X = dataset.train_X self.data_test_X = dataset.val_X self.data_train_y = dataset.train_y self.data_test_y = dataset.val_y self.model = None self.predictions_value = None self.probs_value = None def train(self): self.model = LassoCV(cv=5, random_state=56).fit(self.data_train_X, self.data_train_y) def predictions(self, X_test): try: self.predictions_value = self.model.predict(X_test) return self.predictions_value except Exception: print("Error!") def probs(self, X_test): try: self.probs_value = self.model.predict_proba(X_test) return self.probs_value except Exception: print("Error!") def scores_roc(self): try: pred_val = self.model.predict(self.data_test_X) print("Roc val Lasso: " + str(roc_auc_score(self.data_test_y, pred_val))) except Exception: print("Error!")
# SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) estimator = clf_svm plot_learning_curve(estimator, title, X_train_1, y_train_1, (0.95, 1.01), cv=cv, n_jobs=4) plt.show() # In[23]: from sklearn import tree clf = tree.DecisionTreeClassifier(random_state = 0) clf = clf.fit(X_train_1, y_train_1) y_pred = clf.predict_proba(X_test_1) # In[24]: from sklearn.model_selection import cross_val_score cross_val_score(clf, X_train_1, y_train_1, cv=10) # In[25]: plot_learning_curve(clf, "Learning curve (Decision-Tree)", X_train_1, y_train_1, (0.95, 1.01), cv=cv, n_jobs=4)