class RegularizedQDA: """ Three types of regularization are possible: - regularized the covariance of a class toward the average variance within that class - regularize the covariance of a class toward the pooled covariance across all classes - add some constant amount of variance to each feature """ def __init__(self, avg_weight = 0.1, pooled_weight = 0, extra_variance = 0): self.avg_weight = avg_weight self.pooled_weight = pooled_weight self.extra_variance = extra_variance self.model = QDA() def fit(self, X, Y): self.model.fit(X,Y) I = np.eye(X.shape[1]) a = self.avg_weight p = self.pooled_weight ev = self.extra_variance original_weight = 1.0 - a - p scaled_pooled_cov = p * np.cov(X.T) assert scaled_pooled_cov.shape == I.shape assert all([C.shape == I.shape for C in self.model.rotations]) self.model.rotations = \ [original_weight * C + \ a * np.mean(np.diag(C)) * I + \ scaled_pooled_cov + ev * I \ for C in self.model.rotations] def predict(self, X): return self.model.predict(X)
def get_performance(test_df, X_std, y): Xtest = test_df.ix[:, 'x.1':'x.10'].values ytest = test_df.ix[:, 'y'].values X_std_test = StandardScaler().fit_transform(Xtest) lda_model = LDA() lda_model.fit(X_std, y) qda_model = QDA() qda_model.fit(X_std, y) knn_model = KNeighborsClassifier(n_neighbors=10) knn_model.fit(X_std, y) print "KNN SCORE" print knn_model.score(X_std_test, ytest) print "LDA SCORE" print lda_model.score(X_std_test, ytest) print "QDA SCORE" print qda_model.score(X_std_test, ytest) knn_scores_training = [] knn_scores_test = [] for i in range(1, 12): knn_model = KNeighborsClassifier(n_neighbors=i) knn_model.fit(X_std, y) knn_scores_training.append(knn_model.score(X_std_test, ytest)) knn_scores_test.append(knn_model.score(X_std, y)) plt.plot(range(11), knn_scores_training, 'r--') plt.plot(range(11), knn_scores_test, 'b--') plt.axis([0, 10, 0.3, 1.1]) plt.show()
class SNPForecastingStrategy(Strategy): def __init__(self,symbol,bars): self.symbol=symbol self.bars=bars self.create_periods() self.fit_model() def create_periods(self): self.start_train=datetime.datetime(2001,1,10) self.start_test=datetime.datetime(2005,1,1) self.end_period=datetime.datetime(2005,12,31) def fit_model(self): snpret=create_lagged_series(self.symbol,self.start_train,self.end_period,lags=5) X=snpret[['Lag1','Lag2']] Y=snpret['Direction'] X_train=X[X.index<self.start_test] Y_train=Y[Y.index<self.start_test] self.predictors=X[X.index>=self.start_test] self.model=QDA() self.model.fit(X_train,Y_train) def generate_signals(self): signals=pd.DataFrame(index=self.bars.index) signals['signal']=0.0 signals['signal']=self.model.predict(self.predictors) signals['signal'][0:5]=0.0 signals['positions']=signals['signal'].diff() return signals
def performQDAClass(X_train, y_train, X_test, y_test): """ QDA Classification """ clf = QDA() clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) return accuracy
class SNPForecastingStrategy(Strategy): """ Requires: symbol - A stock symbol on which to form a strategy on. bars - A DataFrame of bars for the above symbol.""" def __init__(self, symbol, bars): self.symbol = symbol self.bars = bars self.create_periods() self.fit_model() def create_periods(self): """Create training/test periods.""" self.start_train = datetime.datetime(2001,1,10) self.start_test = datetime.datetime(2005,1,1) self.end_period = datetime.datetime(2005,12,31) def fit_model(self): """Fits a Quadratic Discriminant Analyser to the US stock market index (^GPSC in Yahoo).""" # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol, self.start_train, self.end_period, lags=5) # Use the prior two days of returns as # predictor values, with direction as the response X = snpret[["Lag1","Lag2"]] y = snpret["Direction"] # Create training and test sets X_train = X[X.index < self.start_test] y_train = y[y.index < self.start_test] # Create the predicting factors for use # in direction forecasting self.predictors = X[X.index >= self.start_test] # Create the Quadratic Discriminant Analysis model # and the forecasting strategy self.model = QDA() self.model.fit(X_train, y_train) def generate_signals(self): """Returns the DataFrame of symbols containing the signals to go long, short or hold (1, -1 or 0).""" signals = pd.DataFrame(index=self.bars.index) signals['signal'] = 0.0 # Predict the subsequent period with the QDA model signals['signal'] = self.model.predict(self.predictors) # Remove the first five signal entries to eliminate # NaN issues with the signals DataFrame signals['signal'][0:5] = 0.0 signals['positions'] = signals['signal'].diff() return signals
def performSVMClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel): """ SVM binary classification """ clf = QDA() clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) return accuracy
def performQDAClass(X_train, y_train, X_test, y_test): """ Gradient Tree Boosting binary Classification """ clf = QDA() clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) #auc = roc_auc_score(y_test, clf.predict(X_test)) return accuracy
def qda(data,labels,n,v_type): train_data,train_labels,test_data,test_labels = split_data(data,labels,v_type) clf = QDA() clf.fit(train_data, train_labels) y_pred = clf.predict(test_data) pure_accuracy_rate = len([y_pred[x] for x in range(len(y_pred)) if y_pred[x] == test_labels[x]])/float(len(test_labels)) report = classification_report(y_pred, test_labels, target_names=rock_names) cm = confusion_matrix(test_labels, y_pred) return pure_accuracy_rate,report,y_pred,test_labels,test_data,clf,cm,"QDA"
def get_QDA(Xtrain, Xtest, Ytrain, Ytest): qda = QDA() qda.fit(Xtrain, Ytrain) # predLabels = qda.predict(Xtest) # print("Classification Rate Test QDA: " + str(np.mean(Ytest==predLabels)*100) + " %") scores = np.empty((4)) scores[0] = qda.score(Xtrain, Ytrain) scores[1] = qda.score(Xtest, Ytest) print('QDA, train: {0:.02f}% '.format(scores[0] * 100)) print('QDA, test: {0:.02f}% '.format(scores[1] * 100)) return qda
def get_QDA(Xtrain, Xtest, Ytrain, Ytest): qda = QDA() qda.fit(Xtrain,Ytrain) # predLabels = qda.predict(Xtest) # print("Classification Rate Test QDA: " + str(np.mean(Ytest==predLabels)*100) + " %") scores = np.empty((4)) scores[0] = qda.score(Xtrain,Ytrain) scores[1] = qda.score(Xtest,Ytest) print('QDA, train: {0:.02f}% '.format(scores[0]*100)) print('QDA, test: {0:.02f}% '.format(scores[1]*100)) return qda
def QuadraticDiscriminantAnalysis(x_train, y_train, x_cv, y_cv): """ Quadratic Discriminant Analysis Classifier """ print "Quadratic Discriminant Analysis" clfr = QDA() clfr.fit(x_train, y_train) #print 'Accuracy in training set: %f' % clfr.score(x_train, y_train) #if y_cv != None: #print 'Accuracy in cv set: %f' % clfr.score(x_cv, y_cv) return clfr
def Call_QDA_Classi(X_train, y_train, X_test, y_test): """ QDA Classification """ clf = QDA() """ print("QDA Classification ",clf.get_params().keys()) ['priors', 'reg_param', 'tol', 'store_covariances'] """ clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) return accuracy
def get_LDA_performance(test_df, X_std, y): X_test = test_df.ix[:, 'x.1':'x.10'].values X_std_test = StandardScaler().fit_transform(X_test) y_test = test_df.ix[:, 'y'].values lda_scores_training = [] lda_scores_test = [] qda_scores_training = [] qda_scores_test = [] knn_scores_training = [] knn_scores_test = [] for d in range(1, 11): lda = LDA(n_components=d) Xred_lda_training = lda.fit_transform(X_std, y) Xred_lda_test = lda.transform(X_std_test) lda_model = LDA() lda_model.fit(Xred_lda_training, y) qda_model = QDA() qda_model.fit(Xred_lda_training, y) knn_model = KNeighborsClassifier(n_neighbors=10) knn_model.fit(Xred_lda_training, y) lda_scores_training.append(1 - lda_model.score(Xred_lda_training, y)) lda_scores_test.append(1 - lda_model.score(Xred_lda_test, y_test)) qda_scores_training.append(1 - qda_model.score(Xred_lda_training, y)) qda_scores_test.append(1 - qda_model.score(Xred_lda_test, y_test)) knn_scores_training.append(1 - knn_model.score(Xred_lda_training, y)) knn_scores_test.append(1 - knn_model.score(Xred_lda_test, y_test)) plt.plot(range(10), lda_scores_training, 'r--', label="Train data") plt.plot(range(10), lda_scores_test, 'b--', label="Test data") plt.title("LDA vs LDA") plt.xlabel('k') plt.ylabel('Score') plt.show() plt.plot(range(10), qda_scores_training, 'r--', label="Train data") plt.plot(range(10), qda_scores_test, 'b--', label="Test data") plt.title("QDA vs LDA") plt.show() plt.plot(range(10), knn_scores_training, 'r--', label="Train data") plt.plot(range(10), knn_scores_test, 'b--', label="Test data") plt.title("KNN vs LDA") plt.show()
def train_classifier(xTrain_s, yTrain_s, kwargs): """ Train a naive baise classifier on xTrain and yTrain and return the trained classifier """ if type(xTrain_s) != list: classifier_s = QDA(**kwargs) classifier_s.fit(xTrain_s, yTrain_s) else: classifier_s = train_classifier_8(xTrain_s, yTrain_s, kwargs) return classifier_s
def QDA_onFullDataset(): #Parsing Full training dataset XFull = common.parseFile('../UCI HAR Dataset/train/X_train.txt') YFull = common.parseFile('../UCI HAR Dataset/train/y_train.txt') #Parsing Full testing dataset XFullTest = common.parseFile('../UCI HAR Dataset/test/X_test.txt') YFullTest = common.parseFile('../UCI HAR Dataset/test/y_test.txt') #Fitting data using QDA classifier clf = QDA() clf.fit(XFull, YFull.flatten()) #Testing the results precision,recall,fscore = common.checkAccuracy(clf.predict(XFullTest),YFullTest,[1,2,3,4,5,6]) print fscore
def QDATrain(self, feature_nor, feature_cn): if not self.valid: print('No data has been loaded') return None nor_num, nor_dim = feature_nor.shape cn_num, cn_dim = feature_cn.shape #Format label for each data, 1 stands for cancer labels = np.hstack((np.zeros(int(nor_num)), np.ones(int(cn_num)))) train_data = np.vstack((feature_nor, feature_cn)) #train the QDA classifier clf = QDA() trained_clf = clf.fit(train_data, labels) #calculate specificity and sensitivity normal_pred = trained_clf.predict(feature_nor) specificity = (normal_pred == 0).sum()/nor_num cancer_pred = trained_clf.predict(feature_cn) sensitivity = (cancer_pred == 1).sum()/cn_num return trained_clf, sensitivity, specificity
def QDA_onFullDataset(): #Parsing Full training dataset XFull = common.parseFile('../UCI HAR Dataset/train/X_train.txt') YFull = common.parseFile('../UCI HAR Dataset/train/y_train.txt') #Parsing Full testing dataset XFullTest = common.parseFile('../UCI HAR Dataset/test/X_test.txt') YFullTest = common.parseFile('../UCI HAR Dataset/test/y_test.txt') #Fitting data using QDA classifier clf = QDA() clf.fit(XFull, YFull.flatten()) #Testing the results precision,recall,fscore = common.checkAccuracy(clf.predict(XFullTest),YFullTest,[1,2,3,4,5,6]) print(fscore)
def QDA_onNonDynamicData(): #Parsing Full training dataset XFull = common.parseFile('../UCI HAR Dataset/train/X_train.txt') YFull = common.parseFile('../UCI HAR Dataset/train/y_train.txt') #Parsing Full testing dataset XFullTest = common.parseFile('../UCI HAR Dataset/test/X_test.txt') YFullTest = common.parseFile('../UCI HAR Dataset/test/y_test.txt') #Getting the dataset associated with Non-Dynamic Activities on training X_NonDynamic, Y_NonDynamic = common.getDataSubset(XFull, YFull.flatten(), [4, 5, 6]) #Getting the dataset associated with Non-Dynamic Activities on testing X_NonDynamicTest, Y_NonDynamicTest = common.getDataSubset( XFullTest, YFullTest.flatten(), [4, 5, 6]) #Fitting data using QDA classifier clf = QDA() clf.fit(X_NonDynamic, Y_NonDynamic.flatten()) precision, recall, fscore = common.checkAccuracy( clf.predict(X_NonDynamicTest), Y_NonDynamicTest, [4, 5, 6]) common.createConfusionMatrix( clf.predict(X_NonDynamicTest).flatten(), Y_NonDynamicTest.flatten(), [4, 5, 6]) print fscore #Getting the dataset associated with Dynamic Activities on training X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(), [1, 2, 3]) #Getting the dataset associated with Dynamic Activities on testing X_DynamicTest, Y_DynamicTest = common.getDataSubset( XFullTest, YFullTest.flatten(), [1, 2, 3]) print len(X_DynamicTest), len(Y_DynamicTest) #Fitting data using QDA classifier clf = QDA() clf.fit(X_Dynamic, Y_Dynamic.flatten()) precision, recall, fscore = common.checkAccuracy( clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3]) common.createConfusionMatrix( clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(), [1, 2, 3]) print fscore
def create_symbol_forecast_model(self): snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index > start_test] model = QDA() model.fit(X_train, y_train) return model
def runQDA(fileNamaParam, trainizingSizeParam): # what percent will you use ? testSplitSize = 1.0 - trainizingSizeParam testAndTrainData = IO_.giveTestAndTrainingData(fileNamaParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] ### classification ## get the test and training sets featureSpace_train, featureSpace_test, vScore_train, vScore_test = cross_validation.train_test_split(trainData, testData, test_size=testSplitSize, random_state=0) ## fire up the model theQDAModel = QDA() theQDAModel.fit(featureSpace_train, vScore_train) thePredictedScores = theQDAModel.predict(featureSpace_test) #print "The original vector: " #print vScore_test #print "The predicted score vector: " #print thePredictedScores evalClassifier(vScore_test, thePredictedScores)
def runQDA(fileNamaParam, trainizingSizeParam): # what percent will you use ? testSplitSize = 1.0 - trainizingSizeParam testAndTrainData = IO_.giveTestAndTrainingData(fileNamaParam) trainData = testAndTrainData[0] testData = testAndTrainData[1] ### classification ## get the test and training sets featureSpace_train, featureSpace_test, vScore_train, vScore_test = cross_validation.train_test_split( trainData, testData, test_size=testSplitSize, random_state=0) ## fire up the model theQDAModel = QDA() theQDAModel.fit(featureSpace_train, vScore_train) thePredictedScores = theQDAModel.predict(featureSpace_test) #print "The original vector: " #print vScore_test #print "The predicted score vector: " #print thePredictedScores evalClassifier(vScore_test, thePredictedScores)
def create_symbol_forecast_model(self): # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QDA() model.fit(X_train, y_train) return model
def qda_predict(train_data, test_data, train_cat, xx, yy): # QDA CLASSIFIER qda_classifier = QDA() qda_fit = qda_classifier.fit(train_data, train_cat) predicted = qda_fit.predict(test_data) contour = qda_fit.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] contour = contour.reshape(xx.shape) return predicted, contour
def qda_predict(train_data, test_data, train_cat, xx, yy): # QDA CLASSIFIER qda_classifier = QDA() qda_fit = qda_classifier.fit(train_data, train_cat) predicted = qda_fit.predict(test_data) contour = qda_fit.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] contour = contour.reshape(xx.shape) return predicted, contour
def train_qda(X, y, priors=None, reg_param=0.0): """ Builds a quadratic discriminant analysis model Returns: clf: Fitted QDA model """ clf = QDA(priors=priors, reg_param=reg_param) clf = clf.fit(X, y) print 'Quadratic Discriminant Analysis completed!' return clf
def QDA(self,membership,group_labels=None,std=3,ellipses=True,dpi=300,fontsize=10,MD=False, legend=False, numbered=False,of='pdf'): self.type = 'QDA' membership = membership.astype(int) qda = QDA() self.fit = qda.fit(self.data, membership).predict(self.data) if ellipses: self.getEllipses(std,membership) self.PlotXDA(membership,group_labels=group_labels,std=std,ellipses=ellipses,dpi=dpi, fontsize=fontsize,MD=MD,legend=legend,numbered=numbered,of=of) self.Store()
def qda(input_file,Output): lvltrace.lvltrace("LVLEntree dans qda") try: ncol=tools.file_col_coma(input_file) data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1)) X = data[:,1:] y = data[:,0] n_samples, n_features = X.shape qda=QDA() qda.fit(X,y) y_pred = qda.predict(X) print "#########################################################################################################\n" print "Quadratic Discriminant Analysis Accuracy " print "classification accuracy:", metrics.accuracy_score(y, y_pred) print "precision:", metrics.precision_score(y, y_pred) print "recall:", metrics.recall_score(y, y_pred) print "f1 score:", metrics.f1_score(y, y_pred) print "\n" print "#########################################################################################################\n" results = Output+"QDA_metrics.txt" file = open(results, "w") file.write("Quadratic Discriminant Analaysis estimator accuracy\n") file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y, y_pred)) file.write("Precision Score: %f\n"%metrics.precision_score(y, y_pred)) file.write("Recall Score: %f\n"%metrics.recall_score(y, y_pred)) file.write("F1 Score: %f\n"%metrics.f1_score(y, y_pred)) file.write("\n") file.write("True Value, Predicted Value, Iteration\n") for n in xrange(len(y)): file.write("%f,%f,%i\n"%(y[n],y_pred[n],(n+1))) file.close() title = "QDA" save = Output + "QDA_confusion_matrix.png" plot_confusion_matrix(y, y_pred,title,save) except (AttributeError): if configuration.normalization == 'normalize': results = Output+"Multinomial_NB_metrics.txt" file = open(results, "w") file.write("In configuration.py file, normalization='normalize' -- Input Values must be superior to 0\n") file.close() lvltrace.lvltrace("LVLSortie dans qda")
def qda(input_file,Output,test_size): lvltrace.lvltrace("LVLEntree dans qda split_test") try: ncol=tools.file_col_coma(input_file) data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1)) X = data[:,1:] y = data[:,0] n_samples, n_features = X.shape X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) print X_train.shape, X_test.shape lda=QDA() lda.fit(X_train,y_train) y_pred = lda.predict(X_test) print "Quadratic Discriminant Analysis Accuracy " print "classification accuracy:", metrics.accuracy_score(y_test, y_pred) print "precision:", metrics.precision_score(y_test, y_pred) print "recall:", metrics.recall_score(y_test, y_pred) print "f1 score:", metrics.f1_score(y_test, y_pred) #LVLprint "\n" results = Output+"QDA_metrics_test.txt" file = open(results, "w") file.write("Quadratic Discriminant Analaysis estimator accuracy\n") file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred)) file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred)) file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred)) file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred)) file.write("\n") file.write("True Value, Predicted Value, Iteration\n") for n in xrange(len(y_test)): file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1))) file.close() title = "QDA %f"%test_size save = Output + "QDA_confusion_matrix"+"_%s.png"%test_size plot_confusion_matrix(y_test, y_pred,title,save) except (AttributeError): if configuration.normalization == 'normalize': results = Output+"Multinomial_NB_metrics_test.txt" file = open(results, "w") file.write("In configuration.py file, normalization='normalize' -- Input Values must be superior to 0\n") file.close() lvltrace.lvltrace("LVLSortie dans qda split_test")
def train_qda(X, y, priors=None, reg_param=0.0): """ Builds a quadratic discriminant analysis model Returns: clf: Fitted QDA model """ clf = QDA(priors=priors, reg_param=reg_param) clf = clf.fit(X,y) print 'Quadratic Discriminant Analysis completed!' return clf
def create_symbol_forecast_model(self): # Create a lagged series of the market index snpret = create_lagged_series( self.symbol_list[0], self.model_start_date, self.model_end_date, lags = 5 ) # Use the prior X days of returns as predictor values with direction # as the response. X = snpret[['Lag1','Lag2']] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] #model to use is Quadratic Discriminant Analysis model = QDA() model.fit(X_train, y_train) return model
def performQDAClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel): """ Quadratic Discriminant Analysis binary Classification """ def replaceTiny(x): if (abs(x) < 0.0001): x = 0.0001 X_train = X_train.apply(replaceTiny) X_test = X_test.apply(replaceTiny) clf = QDA() clf.fit(X_train, y_train) if savemodel == True: fname_out = '{}-{}.pickle'.format(fout, datetime.now()) with open(fname_out, 'wb') as f: cPickle.dump(clf, f, -1) accuracy = clf.score(X_test, y_test) return accuracy
def create_symbol_forecast_model(self): # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QDA() model.fit(X_train, y_train) return model
def performQDAClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel): """ Quadratic Discriminant Analysis binary Classification """ def replaceTiny(x): if (abs(x) < 0.0001): x = 0.0001 X_train = X_train.apply(replaceTiny) X_test = X_test.apply(replaceTiny) clf = QDA() clf.fit(X_train, y_train) if savemodel == True: fname_out = '{}-{}.pickle'.format(fout, datetime.now()) with open(fname_out, 'wb') as f: cPickle.dump(clf, f, -1) accuracy = clf.score(X_test, y_test) return accuracy
class QDAClassifier(Classifier): '''Quadratic Discriminant analysis classifier''' def __init__(self): super(QDAClassifier, self).__init__() self.fig = 20 self.is_trainable = True self.is_trained = False def train(self, classification_data, indices=None, settings_name=None, **kwargs): super(QDAClassifier, self).train(classification_data, indices, settings_name, **kwargs) indices = self.settings['indices'] self.qda = QDA(**self.classifier_kwargs) self.qda.fit(classification_data.data[:, indices], classification_data.are_hurr_actual) return self def classify(self, classification_data): super(QDAClassifier, self).classify(classification_data) indices = self.settings['indices'] self.are_hurr_pred = self.qda.predict(classification_data.data[:, indices]) return self.are_hurr_pred
def QDA_onNonDynamicData(): #Parsing Full training dataset XFull = common.parseFile('../UCI HAR Dataset/train/X_train.txt') YFull = common.parseFile('../UCI HAR Dataset/train/y_train.txt') #Parsing Full testing dataset XFullTest = common.parseFile('../UCI HAR Dataset/test/X_test.txt') YFullTest = common.parseFile('../UCI HAR Dataset/test/y_test.txt') #Getting the dataset associated with Non-Dynamic Activities on training X_NonDynamic,Y_NonDynamic = common.getDataSubset(XFull,YFull.flatten(),[4,5,6]) #Getting the dataset associated with Non-Dynamic Activities on testing X_NonDynamicTest,Y_NonDynamicTest = common.getDataSubset(XFullTest,YFullTest.flatten(),[4,5,6]) #Fitting data using QDA classifier clf = QDA() clf.fit(X_NonDynamic, Y_NonDynamic.flatten()) precision,recall,fscore = common.checkAccuracy(clf.predict(X_NonDynamicTest),Y_NonDynamicTest,[4,5,6]) common.createConfusionMatrix(clf.predict(X_NonDynamicTest).flatten(),Y_NonDynamicTest.flatten(),[4,5,6]) print fscore #Getting the dataset associated with Dynamic Activities on training X_Dynamic,Y_Dynamic = common.getDataSubset(XFull,YFull.flatten(),[1,2,3]) #Getting the dataset associated with Dynamic Activities on testing X_DynamicTest,Y_DynamicTest = common.getDataSubset(XFullTest,YFullTest.flatten(),[1,2,3]) print len(X_DynamicTest),len(Y_DynamicTest) #Fitting data using QDA classifier clf = QDA() clf.fit(X_Dynamic, Y_Dynamic.flatten()) precision,recall,fscore = common.checkAccuracy(clf.predict(X_DynamicTest),Y_DynamicTest,[1,2,3]) common.createConfusionMatrix(clf.predict(X_DynamicTest).flatten(),Y_DynamicTest.flatten(),[1,2,3]) print fscore
def QDAResult3D(): norTrainNum, nor_isTraining = randTestData(t_data_perc, norDataNum) cnTrainNum, cn_isTraining = randTestData(t_data_perc, cnDataNum) isTraining =np.hstack((nor_isTraining, cn_isTraining)) #Training QDA classifier clf = QDA() trained_clf = clf.fit(train_data[isTraining], labels[isTraining]) #Using the remaining data for testing normal_pred = trained_clf.predict(normal_pt[nor_isTraining == False]) trueneg_n = (normal_pred == 0).sum() specificity = trueneg_n/int(norDataNum - norTrainNum) cancer_pred = trained_clf.predict(cancer_pt[cn_isTraining == False]) truepos_n = (cancer_pred == 1).sum() sensitivity = truepos_n/int(cnDataNum - cnTrainNum) return sensitivity, specificity
def QDAResult3D(): norTrainNum, nor_isTraining = randTestData(t_data_perc, norDataNum) cnTrainNum, cn_isTraining = randTestData(t_data_perc, cnDataNum) isTraining = np.hstack((nor_isTraining, cn_isTraining)) #Training QDA classifier clf = QDA() trained_clf = clf.fit(train_data[isTraining], labels[isTraining]) #Using the remaining data for testing normal_pred = trained_clf.predict(normal_pt[nor_isTraining == False]) trueneg_n = (normal_pred == 0).sum() specificity = trueneg_n / int(norDataNum - norTrainNum) cancer_pred = trained_clf.predict(cancer_pt[cn_isTraining == False]) truepos_n = (cancer_pred == 1).sum() sensitivity = truepos_n / int(cnDataNum - cnTrainNum) return sensitivity, specificity
def quadraticDiscriminantAnalysis(dataFile, outputFolder, regParam,parameters): inputData = yaml.load(open(dataFile)) trainingSet = inputData['training'] testingSet = inputData['testing'] inputFile = inputData['inputFile'] label = inputData['label'] resultSet = [] if not os.path.exists(outputFolder): try: os.makedirs(outputFolder) except OSError as exc: if exc.errno != errno.EEXIST: raise exc pass for i in range(len(trainingSet)): """testPredictions = [] trainLabels = [] trainFeatures = [] trainDataSet = arff.load(trainingSet[i]) for row in trainDataSet: content = list(row) trainFeatures.append(content[0:len(content)-1]) trainLabels.append(content[len(content)-1]) testFeatures = [] testLabels = [] testDataSet = arff.load(testingSet[i]) for row in testDataSet: content = list(row) testFeatures.append(content[0:len(content)-1]) testLabels.append(content[len(content)-1])""" train_df = pd.read_csv(trainingSet[i]) train_labels = train_df[label] train_features = train_df.drop(label,axis=1) test_df = pd.read_csv(testingSet[i]) test_predictions = pd.DataFrame(test_df[label]) test_features = test_df.drop(label,axis=1) qda = QDA(reg_param=regParam) qda.fit(train_features, train_labels) test_predictions['predictions'] = qda.predict(test_features) #testPredictions = np.array(qda.predict(testFeatures)).tolist() resultFile = outputFolder + '/result' + str(i + 1) + '.csv' """with open(resultFile,'w') as outfile: outfile.write('predictions:\n') outfile.write(yaml.dump(testPredictions, default_flow_style=False)) outfile.write('true_labels:\n') outfile.write(yaml.dump(testLabels, default_flow_style=False))""" test_predictions.to_csv(resultFile,index=False) resultSet.append(resultFile) resultDict = dict() #parameters = dict() resultDict['results'] = resultSet resultDict['label'] = label #parameters['parameter.p'] = regParam if not parameters: parameters['parameter']='default' resultDict['algo_params'] = parameters resultDict['split_params'] = inputData['split_params'] if 'feature_selection_parameters' in inputData: resultDict['feature_selection_parameters'] = inputData['feature_selection_parameters'] resultDict['feature_selection_algorithm'] = inputData['feature_selection_algorithm'] if 'feature_extraction_parameters' in inputData: resultDict['feature_extraction_parameters'] = inputData['feature_extraction_parameters'] resultDict['feature_extraction_algorithm'] = inputData['feature_extraction_algorithm'] if 'preprocessing_params' in inputData: resultDict['preprocessing_params'] = inputData['preprocessing_params'] resultDict['inputFile'] = inputFile resultDict['algorithm'] = "QuadraticDiscriminantAnalysis" yaml.dump(resultDict, open(outputFolder + '/results.yaml', 'w')) def main(args): inputFile = '' outputFolder = '' parameters=dict() regParam = 0.0 #float; regularizes the covariance estimate as [(1-reg_param)*Sigma + reg_param*np.eye(n_features)] try: opts,args = getopt.getopt(args, "i:o:p:", []) except getopt.GetoptError: print 'QuadraticDiscriminantAnalysis.py -i <inputFile> -o <outputFolder> -p <regParam>' sys.exit(2) for opt,arg in opts: if opt == '-i': inputFile = arg elif opt == '-o': outputFolder = arg elif opt == '-p': regParam = float(arg) parameters['parameter.p']=arg quadraticDiscriminantAnalysis(inputFile, outputFolder, regParam,parameters) if __name__ == "__main__": main(sys.argv[1:])
def QuadDA(X_train, Y_train): qda = QDA() qda.fit(X_train, Y_train) return qda
from varplot import * from sklearn.qda import QDA import numpy as np import pickle data = np.load("sd.npy") truth = np.load("truth.npy") testdata = np.load("sd_test.npy") testtruth = np.load("truth_test.npy") print(len(data)) clf = QDA() clf.fit(data,truth) output=open("qda.pkl",'wb') pickle.dump(clf,output) output.close() print(clf.score(data,truth)) print(clf.score(testdata,testtruth)) s = np.where(truth == 2)[0] st = np.where(testtruth == 2)[0] g = np.where(truth == 1)[0] gt = np.where(testtruth == 1)[0] print("Stars") print(clf.score(data[s],truth[s]))
ell.set_clip_box(splot.bbox) ell.set_alpha(0.5) splot.add_artist(ell) splot.set_xticks(()) splot.set_yticks(()) def plot_lda_cov(lda, splot): plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # LDA lda = LDA(solver='svd', store_covariance=True) y_pred = lda.fit(X, y).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index = 2 * i + 1) plot_lda_cov(lda, splot) plt.axis('tight') # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index = 2 * i + 2) plot_qda_cov(qda, splot) plt.axis('tight') plt.suptitle('LDA vs QDA') plt.show()
y_raw.append(labels) # concatanenate the data sets into one dataframe X = pd.concat(raw) y = pd.concat(y_raw) X_train = np.asarray(X.astype(float)) y = np.asarray(y.astype(float)) X_train = data_preprocess_train(X_train) # train the classifier for each label for i in range(6): print 'Training subject_id = ',subject_id, ' label: ',human_labels[i] y_train = y[:,i] lr.fit(X_train[::subsample,:], y_train[::subsample]) qda.fit(X_train[::subsample,:], y_train[::subsample]) lda.fit(X_train[::subsample,:], y_train[::subsample]) print 'Training Complete' print 'Testing' for subject_id in range(1,num_subjects): test = [] # testing data to be stored here idx = [] # test data ids
import numpy as np import pandas as pd from sklearn.cross_validation import train_test_split data_nonLinear = np.genfromtxt('exp2d2.txt', delimiter=',') temp = pd.DataFrame(data_nonLinear) #Data Visualization temp.head() x = data_nonLinear[:, 0:2] y = data_nonLinear[:, np.newaxis, 2] y = y.ravel() x_train, x_test, y_train, y_test = train_test_split(x, y) #from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.qda import QDA qdd = QDA() #ldd=LinearDiscriminantAnalysis() qdd.fit(x_train, y_train) y_pred = qdd.predict(x_test) from sklearn import metrics print(metrics.accuracy_score(y_test, y_pred)) #Plotting import matplotlib.pyplot as plt x1 = x_test[:, 0] x2 = x_test[:, 1] for i in range(0, len(y_test)): c = ['red' if y_test[i] == 1 else 'blue'] plt.scatter(x1[i], x2[i], color=c) plt.hold(True) plt.hold(False) plt.xlabel('x1') plt.ylabel('x2') plt.show()
X_std_test = StandardScaler().fit_transform(Xtest) ############ LDA ##################### #Construcción y Fit del modelo LDA lda_model = LDA() lda_model.fit(X_std,y) #Score conjunto de entrenamiento y conjunto de testing. print lda_model.score(X_std,y) print lda_model.score(X_std_test,ytest) ############ QDA ##################### #Construcción y Fit del modelo QDA qda_model = QDA() qda_model.fit(X_std,y) #Score conjunto de entrenamiento y conjunto de testing. print qda_model.score(X_std,y) print qda_model.score(X_std_test,ytest) # ############ KNN ##################### # #Construcción y Fit del modelo KNN # knn_model = KNeighborsClassifier(n_neighbors=10) # knn_model.fit(X_std,y) # #Score conjunto de entrenamiento y conjunto de testing. # print knn_model.score(X_std,y) # print knn_model.score(X_std_test,ytest) # # # score_training=[] # score_test=[]
def quadraticDiscriminantAnalysis(dataFile, outputFolder, regParam,parameters): inputData = yaml.load(open(dataFile)) trainingSet = inputData['training'] testingSet = inputData['testing'] inputFile = inputData['inputFile'] label = inputData['label'] resultSet = [] modelset = [] importanceset = [] if not os.path.exists(outputFolder): try: os.makedirs(outputFolder) except OSError as exc: if exc.errno != errno.EEXIST: raise exc pass modelsfolder = outputFolder + "/models/" if not os.path.exists(modelsfolder): try: os.makedirs(modelsfolder) except OSError as exc: if exc.errno != errno.EEXIST: raise exc pass importancefolder = outputFolder + "/FeatureImportance/" if not os.path.exists(importancefolder): try: os.makedirs(importancefolder) except OSError as exc: if exc.errno != errno.EEXIST: raise exc pass for i in range(len(trainingSet)): train_df = pd.read_csv(trainingSet[i]) train_labels = train_df[label] train_features = train_df.drop(label,axis=1) test_df = pd.read_csv(testingSet[i]) test_predictions = pd.DataFrame(test_df[label]) test_features = test_df.drop(label,axis=1) qda = QDA(reg_param=regParam) qda.fit(train_features, train_labels) modelFile = modelsfolder + "QuadraticDiscriminantAnalysisModel" + str(i+1) + ".pkl" with open(modelFile, 'wb') as fd: pickle.dump(qda, fd) modelset.append(modelFile) fd.close() importanceFile = calculateFeatureImportance(train_features,qda,importancefolder,i) importanceset.append(importanceFile) test_predictions['predictions'] = qda.predict(test_features) resultFile = outputFolder + '/result' + str(i + 1) + '.csv' test_predictions.to_csv(resultFile,index=False) resultSet.append(resultFile) resultDict = dict() resultDict['results'] = resultSet resultDict['models'] = modelset resultDict['featureimportance'] = importanceset resultDict['label'] = label if not parameters: parameters['parameter']='default' resultDict['algo_params'] = parameters resultDict['split_params'] = inputData['split_params'] if 'feature_selection_parameters' in inputData: resultDict['feature_selection_parameters'] = inputData['feature_selection_parameters'] resultDict['feature_selection_algorithm'] = inputData['feature_selection_algorithm'] if 'feature_extraction_parameters' in inputData: resultDict['feature_extraction_parameters'] = inputData['feature_extraction_parameters'] resultDict['feature_extraction_algorithm'] = inputData['feature_extraction_algorithm'] if 'preprocessing_params' in inputData: resultDict['preprocessing_params'] = inputData['preprocessing_params'] resultDict['inputFile'] = inputFile resultDict['algorithm'] = "QuadraticDiscriminantAnalysis" yaml.dump(resultDict, open(outputFolder + '/results.yaml', 'w'))
x2 = np.array(df2[feature_x]).reshape(-1, 1) y2 = np.array(df2[feature_y]).reshape(-1, 1) normal_pt = np.hstack([x2, y2]) # In[48]: #Sort given training data with corresponding labels nor_n = np.zeros(int(normal_pt.size / normal_pt.ndim)) can_n = np.ones(int(cancer_pt.size / cancer_pt.ndim)) labels = np.hstack((nor_n, can_n)) train_data = np.vstack((normal_pt, cancer_pt)) # In[49]: clf = QDA() trained_clf = clf.fit(train_data, labels) normal_pred = trained_clf.predict(normal_pt) trueneg_n = (normal_pred == 0).sum() specificity = trueneg_n / int(normal_pt.size / normal_pt.ndim) # In[50]: cancer_pred = trained_clf.predict(cancer_pt) truepos_n = (cancer_pred == 1).sum() sensitivity = truepos_n / int(cancer_pt.size / cancer_pt.ndim) # In[51]: #Generate grids for the entire plot if inRedox: xx, yy = np.meshgrid(np.linspace(0, xaxis_range, grid_resol),
N_tot = len(y) N_st = np.sum(y == 0) N_rr = N_tot - N_st N_train = len(y_train) N_test = len(y_test) N_plot = 5000 + N_rr #---------------------------------------------------------------------- # perform QDA classifiers = [] predictions = [] Ncolors = np.arange(1, X.shape[1] + 1) for nc in Ncolors: clf = QDA() clf.fit(X_train[:, :nc], y_train) y_pred = clf.predict(X_test[:, :nc]) classifiers.append(clf) predictions.append(y_pred) predictions = np.array(predictions) completeness, contamination = completeness_contamination( predictions, y_test) print "completeness", completeness print "contamination", contamination #------------------------------------------------------------ # Compute the decision boundary
def cls_create(xs, ys): rf_cls = QDA() rf_cls.fit(xs, ys) return rf_cls
N_tot = len(y) N_st = np.sum(y == 0) N_rr = N_tot - N_st N_train = len(y_train) N_test = len(y_test) N_plot = 5000 + N_rr #---------------------------------------------------------------------- # perform QDA classifiers = [] predictions = [] Ncolors = np.arange(1, X.shape[1] + 1) for nc in Ncolors: clf = QDA() clf.fit(X_train[:, :nc], y_train) y_pred = clf.predict(X_test[:, :nc]) classifiers.append(clf) predictions.append(y_pred) predictions = np.array(predictions) completeness, contamination = completeness_contamination(predictions, y_test) print "completeness", completeness print "contamination", contamination #------------------------------------------------------------ # Compute the decision boundary clf = classifiers[1]
# Similar as LDA, need not assume same covariance between classes. from sklearn.qda import QDA import numpy as np X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) y = np.array([1, 1, 1, 2, 2, 2]) # Visualization import matplotlib.pyplot as plt plt.figure(1) plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g') plt.scatter(X[y == 2, 0], X[y == 2, 1], color='b') plt.title('X Data Set Visualization') # Classification clf = QDA() clf = clf.fit(X, y) print(clf.predict([[-0.8, -1]])) plt.show()
splot.set_yticks(()) def plot_lda_cov(lda, splot): plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') ############################################################################### for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # LDA lda = LDA(solver="svd", store_covariance=True) y_pred = lda.fit(X, y).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) plot_lda_cov(lda, splot) plt.axis('tight') # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) plt.axis('tight') plt.suptitle('LDA vs QDA') plt.show() plt.savefig('image.png')
def BuildModel(self, data, labels): # Create and train the classifier. qda = SQDA() qda.fit(data, labels) return qda
def BuildModel(self, data, labels): # Create and train the classifier. qda = SQDA() qda.fit(data, labels) return qda
def trainQDA(XTrain, YTrain, XValid, YValid): qda = QDA() qda.fit(XTrain, YTrain) print('QDA score : %f' % (qda.score(XValid, YValid)))
# ***************************************************************************** # Quadratic Discriminant Analysis from sklearn import datasets from sklearn import metrics from sklearn.qda import QDA # load the iris datasets dataset = datasets.load_iris() # fit a QDA model to the data model = QDA() model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))
import numpy as np import sklearn from sklearn.qda import QDA trainX = np.genfromtxt('train_X.csv', delimiter = ',') trainY = np.genfromtxt('train_Y.csv') clf = QDA() clf.fit(trainX, trainY) print clf.score(trainX, trainY)