Example #1
0
def lassocvclassifier(training_samples, eval_samples, vectorizer, do_grid_search=False):
    X_train, Y_train = training_samples
    X_eval, Y_eval = eval_samples
    #clf = SGDClassifier(loss='log', penalty= 'l2',l1_ratio=0.0, n_iter=30, shuffle=True, verbose=False, 
    #                    n_jobs=4, alpha=1e-4, average=True, class_weight=None)
    clf = LassoCV()
   
    clf.fit(X_train, Y_train)
    #y_train_true, y_train_pred = Y_train, clf.predict(X_train)
    print_top_10_words = True
    
    
    scores = cross_validation.cross_val_score(clf, X_train, Y_train, cv=5, n_jobs=5, scoring='log_loss')
    print scores, np.mean(scores), np.median(scores)

    print(clf)
    #scores = cross_validation.cross_val_score(clf.best_estimator_, X_train, Y_train, cv=10, scoring='log_loss')
    #print scores, np.mean(scores), np.median(scores)
    y_true, y_pred = Y_eval, clf.predict(X_eval)
    y_prob = clf.predict_proba(X_eval)
Example #2
0
class Lasso:
    def __init__(self, dataset):
        self.data_train_X = dataset.train_X
        self.data_test_X = dataset.val_X
        self.data_train_y = dataset.train_y
        self.data_test_y = dataset.val_y
        self.model = None
        self.predictions_value = None
        self.probs_value = None

    def train(self):
        self.model = LassoCV(cv=5,
                             random_state=56).fit(self.data_train_X,
                                                  self.data_train_y)

    def predictions(self, X_test):
        try:
            self.predictions_value = self.model.predict(X_test)
            return self.predictions_value
        except Exception:
            print("Error!")

    def probs(self, X_test):
        try:
            self.probs_value = self.model.predict_proba(X_test)
            return self.probs_value
        except Exception:
            print("Error!")

    def scores_roc(self):
        try:
            pred_val = self.model.predict(self.data_test_X)
            print("Roc val Lasso: " +
                  str(roc_auc_score(self.data_test_y, pred_val)))
        except Exception:
            print("Error!")
# SVC is more expensive so we do a lower number of CV iterations:
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
estimator = clf_svm
plot_learning_curve(estimator, title, X_train_1, y_train_1, (0.95, 1.01), cv=cv, n_jobs=4)

plt.show()


# In[23]:


from sklearn import tree

clf = tree.DecisionTreeClassifier(random_state = 0)
clf = clf.fit(X_train_1, y_train_1)
y_pred = clf.predict_proba(X_test_1)


# In[24]:


from sklearn.model_selection import cross_val_score

cross_val_score(clf, X_train_1, y_train_1, cv=10)


# In[25]:


plot_learning_curve(clf, "Learning curve (Decision-Tree)", X_train_1, y_train_1, (0.95, 1.01), cv=cv, n_jobs=4)