def fit_model(self): """Fits a Quadratic Discriminat Analyser to the US sock market index (^GPSC in Yahoo).""" # Create a laggged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol, self.start_train, self.end_period, lags=5) # Use the prior two days of returns as # predictor value, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets X_train = X[X.index < self.start_test] y_train = y[y.index < self.start_test] # Create the prediciting factors for use # in direction forecasting. self.predictors = X[X.index >= self.start_test] # Create the Quadractic Discriminant Analysis model # and the forcasting strategy self.model = QDA() self.model.fit(X_train, y_train)
def fit_model(self): """Fits a Quadratic Discriminant Analyser to the US stock market index (^GPSC in Yahoo).""" # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol, self.start_train, self.end_period, lags=5) # Use the prior two days of returns as # predictor values, with direction as the response x = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets x_train = x[x.index < self.start_test] y_train = y[y.index < self.start_test] # Create the predicting factors for use # in direction forecasting self.predictors = x[x.index >= self.start_test] # Create the Quadratic Discriminant Analysis model # and the forecasting strategy self.model = QDA() self.model.fit(x_train, y_train)
def create_symbol_forecast_model(self): # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1","Lag2"]] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QuadraticDiscriminantAnalysis() model.fit(X_train, y_train) return model
def fit_model(self): snpret = create_lagged_series(self.symbol, self.start_train, self.end_period, lags=5) X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] X_train = X[X.index < self.start_test] y_train = y[y.index < self.start_test] self.predictors = X[X.index >= self.start_test] # print self.predictors.head() self.model = QDA() self.model.fit(X_train, y_train)
def create_symbol_forecast_model(self): # Create a lagged series of the market index snpret = create_lagged_series( self.symbol_list[0], self.model_start_date, self.model_end_date, lags = 5 ) # Use the prior X days of returns as predictor values with direction # as the response. X = snpret[['Lag1','Lag2']] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] #model to use is Quadratic Discriminant Analysis model = QDA() model.fit(X_train, y_train) return model
import datetime import sklearn from sklearn import cross_validation from sklearn.cross_validation import train_test_split from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.svm import SVC from forecast import create_lagged_series if __name__ == "__main__": # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series( "^GSPC", datetime.datetime(2001,1,10), datetime.datetime(2005,12,31), lags=5 ) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1","Lag2"]] y = snpret["Direction"] # Train/test split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, random_state=42 ) # Set the parameters by cross-validation tuned_parameters = [ {'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}
# -*- coding: utf-8 -*- # grid_search.py import sklearn from sklearn.svm import SVC from forecast import create_lagged_series from sklearn.model_selection import GridSearchCV from sklearn.model_selection import train_test_split if __name__ == "__main__": # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series("SPX", '2001-01-10', '2005-12-31', lags=5) # Use the prior two days of returns as predictor values, with direction as the response X = snpret[["Lag1","Lag2"]] y = snpret["Direction"] # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42) # Set the parameters by cross-validation tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}] # Perform the grid search on the tuned parameters model = GridSearchCV(SVC(C=1), tuned_parameters, cv=10) model.fit(X_train, y_train) print("Optimised parameters found on training set:") print(model.best_estimator_, "\n") print("Grid scores calculated on training set:") #means = model.cv_results_['mean_test_score'] #params = model.cv_results_['params'] print(model.cv_results_)
import sklearn from sklearn.cross_validation import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.lda import LDA from sklearn.metrics import confusion_matrix from sklearn.qda import QDA from sklearn.svm import LinearSVC, SVC from forecast import create_lagged_series if __name__ == "__main__": #Create lagged series snpret = create_lagged_series( "^GSPC", datetime.datetime(2001,1,10), datetime.datetime(2008,12,31), lags=5 ) #use prior two days of returns as predictor X = snpret[["Lag1", "Lag2", "Lag3"]] # direction as response y = snpret["Direction"] # Train/test split # 70% of data used for training and 20% used for testing # random_state - data is not sequentially divided, but sampled randomly X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=0.7, random_state=42 ) #create parametrised models
import numpy as np import pandas as pd import matplotlib.pyplot as plt import forecast import datetime from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA from sklearn.metrics import confusion_matrix from sklearn.svm import LinearSVC, SVC #create lag series of S&P500 snpret = forecast.create_lagged_series('^GSPC', datetime.datetime(2001, 1, 10), datetime.datetime(2005, 12, 31)) #use prior 2 days of return as predictor values, #with direction as response X = snpret[['Lag1', 'Lag2']] y = snpret['Direction'] #test data is split into 2 parts: before & after 2005,1,1 start_test = datetime.datetime(2005, 1, 1) #create training & data set X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] #create parametrised models