Beispiel #1
0
 def logistic_regression(self, use_glm=True):
     """
     (b) it seems the statistical significant predict variable is only Lag2. How disappointing...
     """
     formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
     model = (
         smf.glm(formula, data=self.df, family=sm.families.Binomial())
         if use_glm
         else smf.logit(formula, data=self.transformedDF)
     )
     result = model.fit()
     if use_glm:
         probs = result.fittedvalues
         """Beware the prob here is the index 0's prob, so we should use the lambda function below"""
         pred_values = probs.map(lambda x: 0 if x > 0.5 else 1)
     else:
         """The probability of being 1"""
         probs = Series(result.predict(sm.add_constant(self.df[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]])))
         pred_values = probs.map(lambda x: 1 if x > 0.5 else 0)
     """
     (c) Percentage of currect predictions: (54+557)/(54+557+48+430) = 56.1%.
         Weeks the market goes up the logistic regression is right most of the time, 557/(557+48) = 92.1%.
         Weeks the market goes up the logistic regression is wrong most of the time 54/(430+54) = 11.2%.
     """
     tp.output_table(pred_values.values, self.transformedDF[self.y_col].values)
Beispiel #2
0
 def knn_fit(self, n_neighbors):
     weights = 'uniform'
     #weights = 'distance'
     clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
     clf.fit(self.train_X, self.train_y)
     test_X = self.test_set[self.x_cols].values
     test_y = self.test_set[self.y_col].values
     preds = clf.predict(test_X)
     tp.output_table(preds, test_y)
Beispiel #3
0
 def lda_predict(self, fit_res, threshold=0.5):
     test_X = self.test_set[self.x_cols].values
     test_y = self.test_set[self.y_col].values
     if threshold == 0.5:
         pred_y = fit_res.predict(test_X)
     else:
         pred_y_probs = fit_res.predict_proba(test_X)
         pred_y = np.array([fit_res.classes_[0] if pred_y_probs[i, 0] > threshold else fit_res.classes_[1]
                            for i in xrange(pred_y_probs.shape[0])])
     tp.output_table(pred_y, test_y)
Beispiel #4
0
 def fit_with_knn(self, n_neighbors):
     train_X = pp.scale(self.train_set[self.x_cols].values.astype(float))
     train_y = self.train_set[self.y_col]
     weights = 'uniform'
     #weights = 'distance'
     clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
     clf.fit(train_X, train_y)
     test_X = pp.scale(self.test_set[self.x_cols].values.astype(float))
     test_y = self.test_set[self.y_col].values
     preds = clf.predict(test_X)
     tp.output_table(preds, test_y)
Beispiel #5
0
    def test_all_methods(self):
        x_cols = ["Lag2"]
        formula = "Direction~Lag2"
        # print self.df.shape[0]
        train_data = self.df.ix[(self.df["Year"] >= 1990) & (self.df["Year"] <= 2008), :]
        # print train_data.shape[0]
        """ (d) logistic"""
        model = smf.glm(formula, data=train_data, family=sm.families.Binomial())
        result = model.fit()
        test_data = self.df.ix[self.df["Year"] > 2008, :]
        probs = Series(result.predict(sm.add_constant(test_data[["Lag2"]])))
        pred_values = probs.map(lambda x: "Down" if x > 0.5 else "Up")
        tp.output_table(pred_values.values, test_data[self.y_col].values)

        train_X = train_data[x_cols].values
        train_y = train_data[self.y_col].values
        test_X = test_data[x_cols].values
        test_y = test_data[self.y_col].values
        """ (e) LDA """
        lda_res = LDA().fit(train_X, train_y)
        pred_y = lda_res.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (f) QDA """
        qda_res = QDA().fit(train_X, train_y)
        pred_y = qda_res.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (g) KNN """
        clf = neighbors.KNeighborsClassifier(1, weights="uniform")
        clf.fit(train_X, train_y)
        pred_y = clf.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (h) logistic and LDA """
        """ (i) Is the purpose of the last question going through all methods with no direction?"""
Beispiel #6
0
 def logistic_fit(self):
     model = smf.logit("%s~%s" % (self.y_col, "+".join(self.x_cols)), data=self.train_set)
     logistic_res = model.fit()
     prob_y = logistic_res.predict(self.test_X)
     pred_y = Series(prob_y).map(lambda x: 1 if x > 0.5 else 0)
     tp.output_table(pred_y, self.test_y.values)
Beispiel #7
0
 def qda_fit(self):
     qda_res = QDA().fit(self.train_X.values, self.train_y.values)
     pred_y = qda_res.predict(self.test_X.values)
     tp.output_table(pred_y, self.test_y.values)