def call_function(): try: # prepare data trainingSet = [] testSet = [] accuracy = 0.0 split = 0.25 loadDataset("/".join([DATASET_FOLDER, 'med.data']), split, trainingSet, testSet) # generate predictions predictions = [] trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData).astype(np.float) y = np.array(trainingSet)[:, columns].astype(np.float) clf = QDA() clf.fit(X, y) testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData).astype(np.float) y_test = np.array(testSet)[:, columns].astype(np.float) accuracy = clf.score(X_test, y_test) accuracy *= 100 print("Accuracy %:", accuracy) except: e = sys.exc_info()[0] print("<p>Error: %s</p>" % e)
def create_symbol_forecast_model(self): #Create a lagged series of the SP500 Stock market snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) ##use the prior two days of returns as predictors X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] #Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] """ NOTE: we can replace the model with a random fores, SVM, or Logit Regression. just import the library and change the model=QDA() line """ model = QDA() model.fit(X_train, y_train) return model
def create_symbol_forecast_model(self): # Create a lagged series of the S&P500 US stock market index df_ret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, self.model_interval, lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = df_ret[["Lag1", "Lag2"]] Y = df_ret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_train = X[X.index > X.index[ 2]] # avoid 2 nan values TODO --> filter one is timestamp other datetime index X_test = X[X.index >= start_test] Y_train = Y[Y.index < start_test] Y_train = Y[Y.index > Y.index[2]] Y_test = Y[Y.index >= start_test] """ Here we choose QDA, but the strategy would be dependent on different parameters. There is requirements to test the strategy with different models, k-fold cross validation, and also grid searching for parameters optimization """ model = QDA() model.fit( X_train, Y_train ) # TODO --> The model could be fit on the whole dataset, this is on model validation return model
def create_symbol_forecast_model(self): ''' It essentially calls forecasting_ES_movements :return: model ''' # Create a lagged series of the SP500 US stock market index snpret = create_lagged_series(list(self.symbol_dict.keys())[0], self.model_start_date, self.model_end_date, lags=5) # Use the prior two days of return as predictor values, with direction as teh response. snpret = snpret[snpret['Lag5'].notnull()] X = snpret[['Lag1', 'Lag2']] y = snpret['Direction'] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QDA() model.fit(X_train, y_train) return model
def main(): """ Main function. Args: """ # prepare data trainingSet=[] testSet=[] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/combined.csv', split, trainingSet, testSet) print 'Train set: ' + repr(len(trainingSet)) print 'Test set: ' + repr(len(testSet)) # generate predictions predictions=[] trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:,columns] clf = BaggingClassifier(QDA()) clf.fit(X, y) testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:,columns] accuracy = clf.score(X_test,y_test) accuracy *= 100 print("Accuracy %:",accuracy)
def main(): dataset = pd.read_csv("shuttle.csv", header=None).values.astype(np.int32, copy=False) data_train = dataset[0:int(len(dataset) * 0.6)] data_test = dataset[int(len(dataset) * 0.6) + 1:] x, y = np.array([]), np.array([]) for row in dataset: if (row[-1] == 4 or row[-1] == 5): x = np.vstack( (x, [row[3], row[6]])) if len(x) != 0 else [row[3], row[6]] y = np.append(y, row[-1] - 4) #<class 'list'>: [11478, 13, 39, 2155, 809, 4, 2] => 4, 5 lda = LDA(solver="svd", store_covariance=True) splot = visualization(dataset[:, 3], dataset[:, 6], dataset[:, -1]) splot = plot_data(lda, x, y, lda.fit(x, y).predict(x)) plt.axis('tight') plt.show() lda = lda.fit(data_train[:, :-1], data_train[:, -1]) lda = lda.score(data_test[:, :-1], data_test[:, -1]) qda = QDA(store_covariances=True) qda = qda.fit(data_train[:, :-1], data_train[:, -1]) qda = qda.score(data_test[:, :-1], data_test[:, -1]) print("Linear Discriminant Analysis: ", lda) print("Quadratic Discriminant Analysis: ", qda)
def __init__(self): self.classifiers = [ ["Random Forest", RandomForestClassifier()], ["Logistic Regression", LogisticRegression()], ["Stochastic Gradient Descent", SGDClassifier()], ["Nearest Neighbors", KNeighborsClassifier()], ["Linear SVM", SVC(kernel="linear")], ["Polynomial SVM", SVC(kernel="poly")], ["RBF SVM", SVC(kernel="rbf")], ["Sigmoid SVM", SVC(kernel="sigmoid")], ["Decision Tree", DecisionTreeClassifier()], ["Extra Tree", ExtraTreesClassifier()], ["Gradient Boosting", GradientBoostingClassifier()], ["AdaBoost", AdaBoostClassifier()], ["Naive Bayes", GaussianNB()], ["Linear Discriminant Analysis", LDA()], ["Quadratic Discriminant Analysis", QDA()], ["Gaussian Process", GaussianProcessClassifier()], ["Multi-Layer Perceptron", MLPClassifier()], ]
def classify_using_random_sampling(self, X_train, X_test, y_train, y_test, portion_of_sampled_dataset_vector, classifiers_for_experiments): psa = PSA() # ---- settings: number_of_runs_for_random_sampling = 20 # ---- Experimenting: recognition_rate_LIST = np.zeros((len(classifiers_for_experiments), len(portion_of_sampled_dataset_vector))) classifier_index = 0 for classifier in classifiers_for_experiments: print('############### Classifier: ' + classifier) portion_index = 0 for portion_of_sampled_dataset in portion_of_sampled_dataset_vector: print('###### Portion of sampled dataset: ' + str(portion_of_sampled_dataset * 100) + '%') # ---- data reduction with random sampling: recognition_rate_with_random_sampling = [None] * number_of_runs_for_random_sampling for run_index in range(number_of_runs_for_random_sampling): shuffled_samples = self.shuffle_samples_randomly(X=X_train, y=y_train) # shuffle samples of classes randomly # ---- data reduction: number_of_classes = len(shuffled_samples) n_samples = [] for class_index in range(number_of_classes): number_of_samples_of_class = shuffled_samples[class_index].shape[0] n_samples.append(int(number_of_samples_of_class * portion_of_sampled_dataset)) X, y = psa.reduce_data(sorted_samples=shuffled_samples, n_samples=n_samples) # ---- report number of sampled data after PSA: if run_index == 0: # only report once in the multiple runs print('number of sampled data in classes, after random sampling: ' + str(n_samples)) # ---- classify with random sampling: if classifier == 'SVM': # --------- train: clf = SVC(kernel='linear') clf.fit(X=X, y=y) elif classifier == 'LDA': # --------- train: clf = LDA() clf.fit(X=X, y=y) elif classifier == 'QDA': # --------- train: clf = QDA() clf.fit(X=X, y=y) elif classifier == 'Random Forest': # --------- train: clf = RF(max_depth=2, random_state=0) clf.fit(X=X, y=y) elif classifier == 'Logistic Regression': # --------- train: clf = LR() clf.fit(X=X, y=y) elif classifier == 'Gaussian Naive Bayes': # --------- train: clf = GaussianNB() clf.fit(X=X, y=y) # --------- test: labels_predicted = clf.predict(X_test) recognition_rate_with_random_sampling[run_index] = (sum(labels_predicted == y_test) / len(labels_predicted)) * 100 recognition_rate_with_random_sampling_average = np.mean(recognition_rate_with_random_sampling) print('The recognition rate using ' + classifier + ' with data number reduction (random sampling): ' + str(recognition_rate_with_random_sampling_average)) recognition_rate_LIST[classifier_index, portion_index] = recognition_rate_with_random_sampling_average portion_index += 1 classifier_index += 1 return recognition_rate_LIST
def qda(train_x, train_y, test_x): model = QDA(priors=None, reg_param=0.0, store_covariance=False, store_covariances=None, tol=0.0001) return _classify(model, train_x, train_y, test_x)
def fit(self, X, Y): from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA import sklearn.multiclass estimator = QDA(self.reg_param) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) if len(Y.shape) == 2 and Y.shape[1] > 1: problems = [] for est in self.estimator.estimators_: problem = np.any( np.any([np.any(s <= 0.0) for s in est.scalings_])) problems.append(problem) problem = np.any(problems) else: problem = np.any( np.any([np.any(s <= 0.0) for s in self.estimator.scalings_])) if problem: raise ValueError('Numerical problems in QDA. QDA.scalings_ ' 'contains values <= 0.0') return self
def performClassification(X_train, y_train, X_test, y_test, method, parameters={}): """ Perform Classification with the help of serveral Algorithms. """ print('Performing ' + method + ' Classification...') print('Size of train set: ', X_train.shape) print('Size of test set: ', X_test.shape) print('Size of train set: ', y_train.shape) print('Size of test set: ', y_test.shape) classifiers = [ RandomForestClassifier(n_estimators=100, n_jobs=-1), neighbors.KNeighborsClassifier(), SVC(degree=100, C=10000, epsilon=.01), AdaBoostRegressor(), AdaBoostClassifier(**parameters)(), GradientBoostingClassifier(n_estimators=100), QDA(), ] scores = [] for classifier in classifiers: scores.append(benchmark_classifier(classifier, \ X_train, y_train, X_test, y_test)) print(scores)
def fitting(algorithm, X_train_std, y_train, X_test_std, y_test, cluster_name, path_results, name_model): if algorithm == 'gpc': kernel = 1.0 * RBF([1.0, 1.0, 1.0, 1.0, 1.0]) # for GPC #kernel = 1.0 * RBF(1.0) mod = GaussianProcessClassifier(kernel=kernel, random_state=0) mod.fit(X_train_std, y_train) print("Kernel {}".format(mod.kernel_)) if algorithm == 'svc': n_gamma = 30 n_C = 30 edges_gamma = [0.1, 20] edges_C = [0.1, 20] gamma_par = np.linspace(edges_gamma[0], edges_gamma[1], n_gamma) C_par = np.linspace(edges_C[0], edges_C[1], n_C) svm = SVC(kernel='rbf', random_state=0, probability=True) param_grid = [{'C': C_par, 'gamma': gamma_par}] mod = GridSearchCV(estimator=svm, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=-1) mod.fit(X_train_std, y_train) if algorithm == 'gnb': mod = GaussianNB() mod.fit(X_train_std, y_train) if algorithm == 'qda': mod = QDA() mod.fit(X_train_std, y_train) return mod
def perform_training(method, X_train, y_train, X_test, y_test, lag, delta, threshold): if method == 'LR': model = LogisticRegression() elif method == 'LDA': model = LDA() elif method == 'QDA': model = QDA() elif method == 'RF': model = RandomForestClassifier(n_estimators=1000, n_jobs=-1) elif method == 'KNN': model = KNeighborsClassifier() elif method == 'ADA': model = AdaBoostClassifier() elif method == 'GTB': model = GradientBoostingClassifier(n_estimators=100) else: print('Invalid method', method) model.fit(X_train, y_train.values.ravel()) y_pred = model.predict(X_test) hit_rate = (sum(y_pred == y_test.iloc[:, 0])) / len(y_pred) model_dict = {} model_dict["method"] = method model_dict["model"] = model model_dict["lag"] = lag model_dict["delta"] = delta return hit_rate, model_dict
def call_function(): # prepare data try: trainingSet = [] testSet = [] accuracy = 0.0 split = 0.25 loadDataset("/".join([DATASET_FOLDER, 'comb.csv']), split, trainingSet, testSet) print('Train set: ' + repr(len(trainingSet))) print('Test set: ' + repr(len(testSet))) # generate predictions predictions = [] trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:, columns] clf = BaggingClassifier(QDA()) clf.fit(X, y) testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:, columns] accuracy = clf.score(X_test, y_test) accuracy *= 100 print("Accuracy %:", accuracy) except: e = sys.exc_info()[0] print("<p>Error: %s</p>" % e)
def NLMmodelexp1(): modelExperiment( nlmInsampleData, nlmOutsampleData, 'NLMdata/', fullFV, [LR(), DT(), KNC(), RF(), ABC(), GNB(), QDA()], [ 'LogisticRegression', 'DTree', 'KNN', 'RandomForest', 'AdaBoosted', 'GaussianNB', 'QuadraticDiscriminantAnalysis' ], 'NLMmodelExperiment1.csv', 'NLMclassifier_plot1.png', True)
def qda(x_train, y_train, x_test, y_test, monkey): clf = QDA() clf.fit(x_train, y_train) QDA(priors=None, reg_param=0.0) y_pred = clf.predict(x_test) print(len(x_train)) print(len(y_train)) print("qda Accuracy:", metrics.accuracy_score(y_test, y_pred)) print("qda confusion: ") print(metrics.confusion_matrix(y_test, y_pred)) metrics.plot_confusion_matrix(clf, x_test, y_test, normalize='true', values_format='.0%') plt.title(str(monkey) + ": confusion_matrix - Qda algorithm") plt.show()
def qda_run(frame): from sklearn.discriminant_analysis import (QuadraticDiscriminantAnalysis as QDA) model = QDA() acc = run(model, frame.learning, frame.lindeps, frame.testing, frame.tindeps, RUNS) print("QDA accuracy:", acc)
def getQDA(featurevector, labels, featurelength=constants.DECOMP_LENGTH): #all default values except for n_components qda = QDA() qda.fit(featurevector[labels >= 0], labels[labels >= 0]) return qda
def Loop_for_computataion(my_train_data, my_train_label, model_cnn, status, iris_cifar): #Applying the K Fold using 5 splits as mentioned question lda = LDA() qda = QDA() nb = NB() rf = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0) svm = SVC(kernel='rbf', random_state=0) dt = DecisionTreeClassifier(criterion='entropy', random_state=0) #CITATIONS:https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedShuffleSplit.html #Even if i chnage the train and test size (Ex:Train 80% and test 20% I find slight variation in op I,e I mean #i have cross verified changing the sizes and fit it performs correctly) Kfold_stratified_shuffleop = StratifiedShuffleSplit(n_splits=5, train_size=0.8, test_size=0.2, random_state=0) for training_values, testing_values in Kfold_stratified_shuffleop.split( my_train_data, my_train_label): #using the standard naming convention X_train X_test,y_train,y_test X_train, X_test = my_train_data[training_values], my_train_data[ testing_values] y_train, y_test = my_train_label[training_values], my_train_label[ testing_values] print("\n") print("TRAINING VALUES:", training_values, "TESTING VALUES:", testing_values) print("\n") if status == 3: print("ENABLING PCA") meshgrid_pca_analysis(X_train, X_test, y_train, y_test, lda, qda, nb, rf, dt, svm, 1, iris_cifar) elif status == 1: compute_logic_supervised_learning(X_train, X_test, y_train, y_test, lda, qda, nb, rf, dt, svm, 1) elif status == 2: cnn_split = list( StratifiedShuffleSplit(n_splits=2, test_size=0.1).split(X_train, y_train)) idx_tr, idx_val = cnn_split[0] X_val, y_val = X_train[idx_val], y_train[idx_val] X_tr, y_tr = X_train[idx_tr], y_train[idx_tr] X_val = X_val.reshape(len(X_val), 32, 32, 3) X_tr = X_tr.reshape(len(X_tr), 32, 32, 3) X_test = X_test.reshape(len(X_test), 32, 32, 3) y_val = np_utils.to_categorical(y_val, 10) y_tr = np_utils.to_categorical(y_tr, 10) model_cnn.fit(X_tr, y_tr, validation_data=(X_val, y_val)) model_cnn.predict(X_test) else: print("No proper selection")
def removeLabels(self, labeledGroupName, label=None, description=None, commandline=None): # pragma: no cover ''' Delete labeled MeasuredParameterResources that have ResourceType.name=labeledGroupName (such as 'Labeled Plankton'). Restrict deletion to the other passed in options, if specified: label is like 'diatom', description is like 'Using Platform dorado, Parameter {'salinity': ('33.65', '33.70')} from 20130916T124035 to 20130919T233905' (commandline is too long to show in this doc string - see examples in usage note). Note: Some metadatda ResourceTypes will not be removed even though the Resources that use them will be removed. ''' # Remove MeasuredParameter associations with Resource (Labeled data) mprs = MeasuredParameterResource.objects.using(self.args.database).filter(resource__resourcetype__name=labeledGroupName ).select_related('resource') if label: mprs = mprs.filter(resource__name=LABEL, resource__value=label) if self.args.verbose > 1: print(" Removing MeasuredParameterResources with type = '%s' and label = %s" % (labeledGroupName, label)) rs = [] for mpr in mprs: rs.append(mpr.resource) mpr.delete(using=self.args.database) # Remove Resource associations with Resource (label metadata), make rs list distinct with set() before iterating on the delete() if label and description and commandline: try: rrs = ResourceResource.objects.using(self.args.database).filter( (QDA(fromresource__name=LABEL) & QDA(fromresource__value=label)) & ((QDA(toresource__name=DESCRIPTION) & QDA(toresource__value=description)) | (QDA(toresource__name=COMMANDLINE) & QDA(toresource__value=commandline)) ) ) if self.args.verbose > 1: print(" Removing ResourceResources with fromresource__value = '%s' and toresource__value = '%s'" % (label, description)) for rr in rrs: rr.delete(using=self.args.database) except TypeError: # Likely TypeError: __init__() got an unexpected keyword argument 'fromresource__name' if self.args.verbose > 1: print(" Previous Resource associations not found.") else: if self.args.verbose > 1: print(" Removing Resources associated with labeledGroupName = %s'" % labeledGroupName) for r in set(rs): r.delete(using=self.args.database)
def classify_using_sortingByDistanceToMean(self, X_train, X_test, y_train, y_test, portion_of_sampled_dataset_vector, classifiers_for_experiments): psa = PSA() # ---- sort samples of classes according to their ranks: sorted_samples, ranks = self.sort_samples_by_distance_from_mean(X=X_train, y=y_train) # ---- Experimenting: recognition_rate_LIST = np.zeros((len(classifiers_for_experiments), len(portion_of_sampled_dataset_vector))) classifier_index = 0 for classifier in classifiers_for_experiments: print('############### Classifier: ' + classifier) portion_index = 0 for portion_of_sampled_dataset in portion_of_sampled_dataset_vector: print('###### Portion of sampled dataset: ' + str(portion_of_sampled_dataset * 100) + '%') # ---- data reduction with PSA: number_of_classes = len(sorted_samples) n_samples = [] for class_index in range(number_of_classes): number_of_samples_of_class = sorted_samples[class_index].shape[0] n_samples.append(int(number_of_samples_of_class * portion_of_sampled_dataset)) X, y = psa.reduce_data(sorted_samples=sorted_samples, n_samples=n_samples) # ---- report number of sampled data after PSA: print('number of sampled data in classes, after PSA: ' + str(n_samples)) # ---- classify with PSA: if classifier == 'SVM': # --------- train: clf = SVC(kernel='linear') clf.fit(X=X, y=y) elif classifier == 'LDA': # --------- train: clf = LDA() clf.fit(X=X, y=y) elif classifier == 'QDA': # --------- train: clf = QDA() clf.fit(X=X, y=y) elif classifier == 'Random Forest': # --------- train: clf = RF(max_depth=2, random_state=0) clf.fit(X=X, y=y) elif classifier == 'Logistic Regression': # --------- train: clf = LR() clf.fit(X=X, y=y) elif classifier == 'Gaussian Naive Bayes': # --------- train: clf = GaussianNB() clf.fit(X=X, y=y) # --------- test: labels_predicted = clf.predict(X_test) recognition_rate_PSA = (sum(labels_predicted == y_test) / len(labels_predicted)) * 100 print('The recognition rate using ' + classifier + ' with data number reduction (PSA): ' + str(recognition_rate_PSA)) recognition_rate_LIST[classifier_index, portion_index] = recognition_rate_PSA portion_index += 1 classifier_index += 1 return recognition_rate_LIST
def qda(train_size=None): _, _, X_train, X_test, y_train, y_test = dataset() if train_size: X_train, _, y_train, _ = train_test_split(X_train, y_train, train_size=train_size) qda = QDA() qda.fit(X_train, y_train) mae(y_test, qda.predict(X_test)) confusion_matrix(y_test, qda.predict(X_test), qda.score(X_test, y_test))
def qda(train_x, train_y, test_x, test_y, nlabels, with_cfmat=False): model = QDA(priors=None, reg_param=0.0, store_covariance=False, store_covariances=None, tol=0.0001) retval = _classify(model, train_x, train_y, test_x, test_y, nlabels, with_cfmat) fake_importances = np.zeros((train_x.shape[1], )) retval = list(retval) + [fake_importances] return retval
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0): qda = QDA() qda.fit(Xtrain,Ytrain) scores = np.empty((2)) if (verbose == 1): scores[0] = qda.score(Xtrain,Ytrain) print('QDA, train: {0:.02f}% '.format(scores[0]*100)) if (type(Xtest) != type(None)): scores[1] = qda.score(Xtest,Ytest) print('QDA, test: {0:.02f}% '.format(scores[1]*100)) return qda
def test_qda(data): qda_clf = QDA() qda_clf.fit(data.train_x, data.train_y) qda_predict = qda_clf.predict(data.train_x) print('QDA') print('Classification accuracy for train data = {:.2%}'.format( metrics.accuracy_score(data.train_y, qda_predict))) test_result = qda_clf.predict(data.test_x) print('Classification accuracy for test data = {:.2%}'.format( metrics.accuracy_score(data.test_y, test_result)))
def get_model(modelname): return { "mock": lambda: RandomClassifierMock(), "lda": lambda: LDA(), "qda": lambda: QDA(), "logistic": lambda: LogisticRegression(), "gnb": lambda: GaussianNB(), "knn": lambda: KNeighborsClassifier(), "forest": lambda: RandomForestClassifier(), "svm": lambda: SVC(kernel="linear"), "rbf-svm": lambda: SVC(kernel="rbf"), "poly3-svm": lambda: SVC(kernel="linear", degree=3) }[modelname]()
def test_quadratic_discriminant_analysis(data): qda_clf = QDA() qda_clf.fit(data.train_x, data.train_y) qda_predict = qda_clf.predict(data.train_x) print('QDA') print('Classification accuracy for train data = {:.2%}'.format( metrics.accuracy_score(data.train_y, qda_predict))) pred_test = qda_clf.predict(data.test_x) print('Classification accuracy for test data = {:.2%}'.format( metrics.accuracy_score(data.test_y, pred_test)))
def SOmodelexp1(): modelExperiment( SOInsampleData, SOOutsampleData, 'stackoverflowdata/', fullFV, [LR(), DT(), KNC(), RF(n_estimators=200), ABC(), GNB(), QDA()], [ 'LogisticRegression', 'DTree', 'KNN', 'RandomForest', 'AdaBoosted', 'GaussianNB', 'QuadraticDiscriminantAnalysis' ], 'SOmodelExperiment1.csv', 'SOclassifier_plot1.png', True)
def fitLDAQDA(ts): X=ts[["Lag1","Lag2"]] y=ts["Direction"] startTest=datetime(2019,12,1) Xtrain=X[X.index<startTest] Xtest=X[X.index>=startTest] ytrain=y[y.index<startTest] ytest=y[y.index>=startTest] pred=pd.DataFrame(index=ytest.index) pred["Actual"]=ytest models=[("LR", LogisticRegression()),("LDA",LDA()), ("QDA",QDA())] for m in models: fitModel(m[0],m[1],Xtrain, ytrain, Xtest, pred)
def quadratic_discriminant(self): parameters = {'reg_param': [0, .1, .25, .4, .5, .6, .75, .9, 1]} qda = GridSearchCV(QDA(), parameters, cv=self.kfolds, error_score=np.nan) qda.fit(self.train_data, np.ravel(self.train_resp)) resp_pred = np.reshape(qda.predict(self.test_data), self.test_resp.shape) val_acc = np.sqrt(np.mean((resp_pred - self.test_resp)**2)) best = qda best.validation_acc = val_acc return best