def tryLinearDiscriminantAnalysis(goFast): from sklearn.datasets import dump_svmlight_file, load_svmlight_file if goFast: training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True) validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True) testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True) else: training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True) validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True) testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True) from sklearn.lda import LDA from sklearn.metrics import accuracy_score from sklearn.grid_search import ParameterGrid from sklearn.decomposition import RandomizedPCA rpcaDataGrid = [{"n_components": [10,45,70,100], "iterated_power": [2, 3, 4], "whiten": [True]}] for rpca_parameter_set in ParameterGrid(rpcaDataGrid): rpcaOperator = RandomizedPCA(**rpca_parameter_set) rpcaOperator.fit(training_data,training_labels) new_training_data = rpcaOperator.transform(training_data,training_labels) new_validation_data = rpcaOperator.transform(validation_data,validation_labels) ldaOperator = LDA() ldaOperator.fit(new_training_data,training_labels) print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
class Ensemble: def __init__(self, data): self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy') self.lda = LDA() self.dec = DecisionTreeClassifier(criterion='entropy') self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25) self.make_prediction(data) def make_prediction(self, data): ''' Make an ensemble prediction ''' self.rf.fit(data.features_train, data.labels_train) self.lda.fit(data.features_train, data.labels_train) self.dec.fit(data.features_train, data.labels_train) self.ada.fit(data.features_train, data.labels_train) pre_pred = [] self.pred = [] ada_pred = self.ada.predict(data.features_test) rf_pred = self.rf.predict(data.features_test) lda_pred = self.lda.predict(data.features_test) dec_pred = self.dec.predict(data.features_test) for i in range(len(rf_pred)): pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ]) for entry in pre_pred: pred_list = sorted(entry, key=entry.count, reverse=True) self.pred.append(pred_list[0])
def startlda(self): from sklearn.lda import LDA clf=LDA() X=np.array(self.traindata) Y=np.array(self.trainclass) y=self.testdata X=[[float(y) for y in x] for x in X] Y=[[int(y) for y in x] for x in Y] y=[[float(y) for y in x] for x in self.testdata] clf.fit(X,Y) print clf.predict(y)
def DLDA(self, trainLabel, featureData, testData): # print featureData == testData # print testData clf = LDA() clf.fit(featureData, trainLabel) testLabel = clf.predict(testData) return testLabel
def test_twomethods(self): key_y_pred = 'y' + conf.SEP + conf.PREDICTION X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2) # = With EPAC wf = Methods(LDA(), SVC(kernel="linear")) r_epac = wf.run(X=X, y=y) # = With SKLEARN lda = LDA() svm = SVC(kernel="linear") lda.fit(X, y) svm.fit(X, y) r_sklearn = [lda.predict(X), svm.predict(X)] # Comparison for i_cls in range(2): comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) == np.asarray(r_sklearn[i_cls])) self.assertTrue(comp, u'Diff Methods') # test reduce r_epac_reduce = [wf.reduce().values()[0][key_y_pred], wf.reduce().values()[1][key_y_pred]] comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn)) self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
def LDAmeanScore(X, Y, n_folds, dim_reduction=0): """ :param X: matrice d'entree du classifieur, n_samples*n_parameters, n_paramters>=2, n_samples>0. DONNES COHERENTES POUR CLASSIFICATION LDA :param Y: matrice des labels, n_samples :param n_folds: nombre de tests pour le KFold, >1 :param dim_reduction: si egale a 0, pas de reduction, si inferieur a 0, best_reduction, sinon on fait une reduction PCA (on reduit a dim_reduction dimensions) :return: le score moyen de la validation croisee, affiche ce score. Si n_folds>n_samples, renvoie -1 """ if dim_reduction > 0 and X.shape[1] > dim_reduction: X = dim_reduction_PCA(X, dim_reduction) if dim_reduction == -1: dim_reduction = best_dimension(X) print "Best dimension : " + str(dim_reduction) X = dim_reduction_PCA(X, dim_reduction) if X.shape[0] > n_folds: # Cross validation pour estimer la performance d'un classifieur LDA kf = KFold(n=len(Y), n_folds=n_folds, shuffle=True, random_state=None) scores = [] for train_index, test_index in kf: X_train, X_test = X[train_index, :], X[test_index, :] Y_train, Y_test = Y[train_index], Y[test_index] cl = LDA() cl.fit(X_train, Y_train) scores.append(cl.score(X_test, Y_test)) print "Score moyen : ", np.mean(np.array(scores)) return 100.0 * np.mean(np.array(scores)) else: return -1
class FldaLite(FLDA): def fit(self, X, y): self.scaler_ = StandardScaler() self.pca_ = PCA(n_components=self.pca_n_components) XX = self.pca_.fit_transform(self.scaler_.fit_transform(X)) self.knn_ = KNeighborsClassifier(n_neighbors=self.knn_n_neighs) self.knn_.fit(XX, y) yy = map(lambda nn: y[nn], self.knn_.kneighbors(XX)[1]) self.cv_ = CountVectorizer(input='content', tokenizer=lambda x: x, lowercase=False) XXX = self.cv_.fit_transform(array(yy)) self.tfidf_transformer_ = TfidfTransformer() XXX = self.tfidf_transformer_.fit_transform(XXX) self.clusterer_ = SpectralClustering(n_clusters=self.n_scented_clusters) yyy = self.clusterer_.fit_predict(XXX) self.lda_ = LDA(**self.lda_params) self.lda_.fit(XX, yyy) return self def transform(self, X): return self.lda_.transform(self.pca_.fit_transform(self.scaler_.fit_transform(X)))
def LDA模型(self, 問題, 答案): lda = LDA() # clf = svm.NuSVC() print('訓練LDA') lda.fit(問題, 答案) print('訓練了') return lambda 問:lda.predict(問)
def lda_on(train_x, train_y, test_x, test_y, feats_name='all_features'): """ Linear Discriminant Analysis """ lda = LDA() lda.fit(train_x, train_y, store_covariance=True) print feats_name, "(train):", lda.score(train_x, train_y) print feats_name, "(test):", lda.score(test_x, test_y) with open(dataset_name + '_lda_classif_' + feats_name + '.pickle', 'w') as w_f: cPickle.dump(lda, w_f) y_pred = lda.predict(test_x) X_train, X_validate, y_train, y_validate = cross_validation\ .train_test_split(train_x, train_y, test_size=0.2, random_state=0) lda.fit(X_train, y_train) print feats_name, "(validation):", lda.score( X_validate, y_validate) y_pred_valid = lda.predict(X_validate) cm_test = confusion_matrix(test_y, y_pred) cm_valid = confusion_matrix(y_validate, y_pred_valid) np.set_printoptions(threshold='nan') with open("cm_test" + feats_name + ".txt", 'w') as w_f: print >> w_f, cm_test with open("cm_valid" + feats_name + ".txt", 'w') as w_f: print >> w_f, cm_valid
def main(): for question in range(3,18): print("Question ", question, " Percent Accuracy") trainingSet_features, trainingSet_labels, testSet_features, testSet_labels = loadTrainingAndTestData(question) #print(len(trainingSet_features)) #print(trainingSet_labels) #print(len(testSet_features)) #print(len(testSet_labels)) #print(trainingSet_labels) nnC = KNeighborsClassifier(n_neighbors=5) nnC.fit(trainingSet_features, trainingSet_labels) nnC_predictions = nnC.predict(testSet_features) print("Nearest Neighbor: %.2f" % (100*accuracy_score(testSet_labels,nnC_predictions)),"%") svmC = svm.SVC() svmC.fit(trainingSet_features, trainingSet_labels) svmCpredictions = svmC.predict(testSet_features) print("Support Vector Machines: %.2f" % (100*accuracy_score(testSet_labels,svmCpredictions)),"%") rfC = RandomForestClassifier(n_estimators=100) rfC.fit(trainingSet_features, trainingSet_labels) rfC_predictions = rfC.predict(testSet_features) print("Random Forrest: %.2f" % (100*accuracy_score(testSet_labels,rfC_predictions)),"%") ldaC = LDA(solver='lsqr') ldaC.fit(trainingSet_features, trainingSet_labels) ldaC_predictions = ldaC.predict(testSet_features) print("Linear Discriminant Analysis Classifier: %.2f" % (100*accuracy_score(testSet_labels,ldaC_predictions)),"%")
def score(train_X, train_y): X_train, X_valid, y_train, y_valid = train_test_split(train_X, train_y, test_size=0.01, random_state=10) clf = LDA() clf.fit(X_train, y_train) y_pred = clf.predict_proba(X_valid) return log_loss(y_valid, y_pred)
def test_classification(): from read import read import numpy, tfidf from sklearn.decomposition import TruncatedSVD from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer m, files = read("training.json") y_map = [str(file["topic"]) for file in files] map = [] for i in range(len(y_map)): if(len(map) == 0 or not map.__contains__(y_map[i])): map.append(y_map[i]) y = numpy.array([map.index(y_map[i]) for i in range(len(y_map))]) print("Construindo TF-IDF...") X, vectorizer = tfidf.vectorizeTFIDF(files) print X.shape print("Performing dimensionality reduction using LDA...") lda = LDA(n_components=9) X = X.toarray() lda.fit(X, y) X = lda.transform(X) mlp = MLPClassifier() mlp.fit(X, y) training_score = mlp.score(X, y) print("training accuracy: %f" % training_score)
def myLDA(X,y): t1 = clock() clf = LDA() clf.fit(X, y) newRep = clf.transform(X) t2 = clock() return t2-t1
def eval_func(chromosome): alldata = LoadFeatures(data_N_x, data_F_x, chromosome) sx, sy, tx, ty = GetData(0.8, alldata) clf = LDA() clf.fit(sx, sy) py = clf.predict(tx) return accuracy_score(ty, py)
def lda(self, reducedArray = []): # components vyjadruju pocet stavov / classov medzi ktorymi rozlisujeme.. staci 0/1 pre target a non-target lda = LDA(n_components=2) if len(reducedArray) > 0: self.ldaMat = lda.fit(np.resize(reducedArray,(len(reducedArray),len(reducedArray[0]))), self.targetVals) else: self.ldaMat = lda.fit(np.resize(self.signalArray,(len(self.signalArray),len(self.signalArray[0]))), self.targetVals)
def LDAClassify_Proba(enrollment_id, trainData, trainLabel, testData): clf = LDA(solver='lsqr') #clf = LDA() clf.fit(trainData, ravel(trainLabel)) testLabel = clf.predict_proba(testData)[:,1] saveResult(enrollment_id, testLabel, 'Proba_sklearn_LDA.csv') return testLabel
def LDAmeanScore(X, Y, n_folds, dim_reduction=0): """ :param X: matrice d'entree du classifieur, n_samples*n_parameters, n_paramters>=2, n_samples>0. DONNES COHERENTES POUR CLASSIFICATION LDA :param Y: matrice des labels, n_samples :param n_folds: nombre de tests pour le KFold, >1 :param dim_reduction: si inferieur ou egale a 0, pas de reduction, sinon, si le nombre de parametre est superieur a dim_reduction, on fait une reduction PCA :return: le score moyen de la validation croisee, affiche ce score. Si n_folds>n_samples, renvoie -1 """ if dim_reduction > 0 and X.shape[1] > dim_reduction: X = dim_reduction_PCA(X, dim_reduction) if (X.shape[0] > n_folds): # Cross validation pour estimer la performance d'un classifieur LDA kf = KFold(n=len(Y), n_folds=n_folds, shuffle=False, random_state=None) scores = [] for train_index, test_index in kf: X_train, X_test = X[train_index, :], X[test_index, :] Y_train, Y_test = Y[train_index], Y[test_index] cl = LDA() cl.fit(X_train, Y_train) scores.append(cl.score(X_test, Y_test)) print 'Score moyen : ', np.mean(np.array(scores)) return 100. * np.mean(np.array(scores)) else: return -1
def classify(Xtrain,Xtest,Ytrain,Ytest): ''' Linear and RBF SVM classifiers ''' scores = np.zeros((5,)) lr = LogisticRegression() lr.fit(Xtrain,Ytrain) scores[0] = lr.score(Xtest,Ytest) lda = LDA() lda.fit(Xtrain,Ytrain) scores[1] = lda.score(Xtest,Ytest) nb = GaussianNB() nb.fit(Xtrain,Ytrain) scores[2] = nb.score(Xtest,Ytest) lsvm = LinearSVC( C = 1) lsvm.fit(Xtrain,Ytrain) scores[3] = lsvm.score(Xtest,Ytest) gsvm = SVC(kernel='rbf', C = 1000) gsvm.fit(Xtrain,Ytrain) scores[4] = gsvm.score(Xtest,Ytest) return scores
def train_lda(): from sklearn.lda import LDA data, classes = get_data_and_classes() classifier = LDA() classifier.fit(data, classes, store_covariance=True) return classifier
def pca_lda(X_train, X_test, y_train, y_test): pca = PCA(n_components=500) lda = LDA() pca.fit(X_train) scores = np.dot(X_train, np.transpose(pca.components_)) lda.fit(scores, y_train) return lda.score(scores, y_train, sample_weight=None)
def read_subpop_data(one_hot=True, fake_data=False, test_size=0.2, undersample=False): labeled_dic = convert_txt_to_npy(LABELED_RL_PATH) unlabeled_dic = convert_txt_to_npy(UNLABELED_RL_PATH, labeled=False) X_train, X_test, y_train, y_test = split_train_test(labeled_dic, test_size=test_size) class DataSets(object): pass data_sets = DataSets() if undersample: from unbalanced_dataset import UnderSampler US = UnderSampler(verbose=True) X_train, y_train = US.fit_transform(X_train, y_train) lda = LDA() lda.fit(X_train, y_train) score = metrics.accuracy_score(lda.predict(X_test), y_test) print("Baseline LDA: %f " % score) if one_hot: y_train = convert_to_one_hot(y_train) y_test = convert_to_one_hot(y_test) data_sets = DataSets() data_sets.test = DataSet(X_test, y_test) data_sets.train = SemiDataSet(unlabeled_dic['data'], X_train, y_train) return data_sets
def main(): logging.basicConfig(format='[%(asctime)s] %(levelname)7s: %(message)s', level=logging.DEBUG) all_image_numbers = generate_all_image_numbers(no_of_persons, samples_person) classes = all_image_numbers[:, 0] all_face_vectors = load_face_vectors_from_disk(all_image_numbers, image_size) classifier = LDA() logging.debug("Training..") classifier.fit(all_face_vectors, classes) while True: function = input( "0)Exit\n" "1)Live test\n" "2)Test image \"test.JPG\"\n" "3)General test\n" "\n" "Choose function:" ) if function == "1": test_live(classifier, all_face_vectors) elif function == "2": test_one_image(classifier, all_face_vectors) elif function == "3": test(all_face_vectors, classes) elif function == "0": return
def pca_lda(X_train,X_test,y_train,y_test): pca = PCA(n_components=500) lda = LDA() pca.fit(X_train) scores = np.dot(X_train,np.transpose(pca.components_)) lda.fit(scores, y_train) return lda.score(scores, y_train, sample_weight=None)
def LDA_train(self, param): H = self.get_Htotal(self.Xtrain, self.GXtrain) # Get the hidden output matrix ############## BOOSTING ############## if (self.D_flag == 1): # If we have given Weights to the samples W_root = np.sqrt(self.D) H = H * W_root lda = LDA(solver='lsqr') # svd , lsqr, eigen lda.fit(H, self.Ytrain.ravel()) self.bo = np.zeros( (1, self.nO)) # In the standard ELM, these does not count proyection = lda.coef_ self.Wo = proyection.T # Write the output weights into the structure Hmeans = copy.deepcopy(lda.means_) self.Hmeans = Hmeans ### CREATE IT INTERNAL TO USE SOMEWERE ELSE self.priors = copy.deepcopy(lda.priors_) threshold = np.dot(proyection, (Hmeans[0, :] + Hmeans[1, :]) / 2) - np.log( self.priors[1] / self.priors[0]) # print Hmeans.shape # print proyection.shape self.bo = -threshold
def do_lda(x, y, folds): indexes = list(range(len(x))) shuffle(indexes) x = list(x[i] for i in indexes) y = list(y[i] for i in indexes) fold_size = len(x) / folds corrects = [] for fold in range(folds): test_x = [] train_x = [] test_y = [] train_y = [] for i in range(len(x)): fold_index = i / fold_size if fold == fold_index: test_x.append(x[i]) test_y.append(y[i]) else: train_x.append(x[i]) train_y.append(y[i]) print 'Partitioned data into fold' test_x, train_x = remove_redundant_dimensions(test_x, train_x) print 'Removed redundant dimensions' lda = LDA() lda.fit(train_x, train_y) print 'Fit lda' predictions = lda.predict(test_x) correct = sum(1 for i in range(len(predictions)) if predictions[i] == test_y[i]) print 'Did fold, correct:', correct corrects.append(correct) return corrects
def LDA_train(self, param): H = self.get_H(self.Xtrain) # Get the hidden output matrix lda = LDA(solver='lsqr') # svd , lsqr, eigen lda.fit(H, self.Ytrain.ravel()) self.bo = np.zeros( (1, self.nO)) # In the standard ELM, these does not count proyection = lda.coef_ self.Wo = proyection.T # Write the output weights into the structure Hmeans = copy.deepcopy(lda.means_) self.Hmeans = Hmeans ### CREATE IT INTERNAL TO USE SOMEWERE ELSE self.priors = copy.deepcopy(lda.priors_) threshold = np.dot(proyection, (Hmeans[0, :] + Hmeans[1, :]) / 2) - np.log( self.priors[1] / self.priors[0]) # print Hmeans.shape # print proyection.shape self.bo = -threshold # print lda.score(self.H, self.Ytrain.ravel()) # print self.score(self.Xtrain, self.Ytrain) # print "***************" self.flag_LDA = 1
class FGDA(BaseEstimator, TransformerMixin): def __init__(self, metric='riemann', tsupdate=False): self.metric = metric self.tsupdate = tsupdate self._ts = TangentSpace(metric=metric, tsupdate=tsupdate) def _fit_lda(self, X, y): self.classes = numpy.unique(y) self._lda = LDA(n_components=len(self.classes) - 1, solver='lsqr', shrinkage='auto') ts = self._ts.fit_transform(X) self._lda.fit(ts, y) W = self._lda.coef_.copy() self._W = numpy.dot( numpy.dot(W.T, numpy.linalg.pinv(numpy.dot(W, W.T))), W) return ts def _retro_project(self, ts): ts = numpy.dot(ts, self._W) return self._ts.inverse_transform(ts) def fit(self, X, y=None): self._fit_lda(X, y) return self def transform(self, X): ts = self._ts.transform(X) return self._retro_project(ts) def fit_transform(self, X, y=None): ts = self._fit_lda(X, y) return self._retro_project(ts)
def get_performance(test_df, X_std, y): Xtest = test_df.ix[:, 'x.1':'x.10'].values ytest = test_df.ix[:, 'y'].values X_std_test = StandardScaler().fit_transform(Xtest) lda_model = LDA() lda_model.fit(X_std, y) qda_model = QDA() qda_model.fit(X_std, y) knn_model = KNeighborsClassifier(n_neighbors=10) knn_model.fit(X_std, y) print "KNN SCORE" print knn_model.score(X_std_test, ytest) print "LDA SCORE" print lda_model.score(X_std_test, ytest) print "QDA SCORE" print qda_model.score(X_std_test, ytest) knn_scores_training = [] knn_scores_test = [] for i in range(1, 12): knn_model = KNeighborsClassifier(n_neighbors=i) knn_model.fit(X_std, y) knn_scores_training.append(knn_model.score(X_std_test, ytest)) knn_scores_test.append(knn_model.score(X_std, y)) plt.plot(range(11), knn_scores_training, 'r--') plt.plot(range(11), knn_scores_test, 'b--') plt.axis([0, 10, 0.3, 1.1]) plt.show()
def test_classification(): from read import read import numpy, tfidf from sklearn.decomposition import TruncatedSVD from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer m, files = read("training.json") y_map = [str(file["topic"]) for file in files] map = [] for i in range(len(y_map)): if (len(map) == 0 or not map.__contains__(y_map[i])): map.append(y_map[i]) y = numpy.array([map.index(y_map[i]) for i in range(len(y_map))]) print("Construindo TF-IDF...") X, vectorizer = tfidf.vectorizeTFIDF(files) print X.shape print("Performing dimensionality reduction using LDA...") lda = LDA(n_components=9) X = X.toarray() lda.fit(X, y) X = lda.transform(X) mlp = MLPClassifier() mlp.fit(X, y) training_score = mlp.score(X, y) print("training accuracy: %f" % training_score)
def main_lda(): X,y=fh_lda() lda=LDA() lda.fit(X,y) splot=plot_LDA(lda, X, y, lda.fit(X,y).predict(X)) return splot
def curve_per_subject(subject, data_path, test_labels): d = load_train_data(data_path, subject) x, y_10m = d['x'], d['y'] n_train_examples = x.shape[0] n_timesteps = x.shape[-1] print 'n_preictal', np.sum(y_10m) print 'n_inetrictal', np.sum(y_10m - 1) x, y = reshape_data(x, y_10m) data_scaler = StandardScaler() x = data_scaler.fit_transform(x) lda = LDA() lda.fit(x, y) pred_1m = lda.predict_proba(x)[:, 1] pred_10m = np.reshape(pred_1m, (n_train_examples, n_timesteps)) pred_10m = np.mean(pred_10m, axis=1) fpr, tpr, threshold = roc_curve(y_10m, pred_10m) c = np.sqrt((1 - tpr) ** 2 + fpr ** 2) opt_threshold = threshold[np.where(c == np.min(c))[0]][-1] print opt_threshold # ------- TEST --------------- d = load_test_data(data_path, subject) x_test, id = d['x'], d['id'] n_test_examples = x_test.shape[0] n_timesteps = x_test.shape[3] x_test = reshape_data(x_test) x_test = data_scaler.transform(x_test) pred_1m = lda.predict_proba(x_test)[:, 1] pred_10m = np.reshape(pred_1m, (n_test_examples, n_timesteps)) pred_10m = np.mean(pred_10m, axis=1) y_pred = np.zeros_like(test_labels) y_pred[np.where(pred_10m >= opt_threshold)] = 1 cm = confusion_matrix(test_labels, y_pred) print print_cm(cm, labels=['interictal', 'preictal']) sn = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0]) sp = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1]) print sn, sp sn, sp = [], [] t_list = np.arange(0.0, 1.0, 0.01) for t in t_list: y_pred = np.zeros_like(test_labels) y_pred[np.where(pred_10m >= t)] = 1 cm = confusion_matrix(test_labels, y_pred) sn_t = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0]) sp_t = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1]) sn.append(sn_t) sp.append(sp_t) return t_list, sn, sp
def curve_per_subject(subject, data_path, test_labels): d = load_train_data(data_path, subject) x, y_10m = d['x'], d['y'] n_train_examples = x.shape[0] n_timesteps = x.shape[-1] print('n_preictal', np.sum(y_10m)) print('n_inetrictal', np.sum(y_10m - 1)) x, y = reshape_data(x, y_10m) data_scaler = StandardScaler() x = data_scaler.fit_transform(x) lda = LDA() lda.fit(x, y) pred_1m = lda.predict_proba(x)[:, 1] pred_10m = np.reshape(pred_1m, (n_train_examples, n_timesteps)) pred_10m = np.mean(pred_10m, axis=1) fpr, tpr, threshold = roc_curve(y_10m, pred_10m) c = np.sqrt((1 - tpr) ** 2 + fpr ** 2) opt_threshold = threshold[np.where(c == np.min(c))[0]][-1] print(opt_threshold) # ------- TEST --------------- d = load_test_data(data_path, subject) x_test, id = d['x'], d['id'] n_test_examples = x_test.shape[0] n_timesteps = x_test.shape[3] x_test = reshape_data(x_test) x_test = data_scaler.transform(x_test) pred_1m = lda.predict_proba(x_test)[:, 1] pred_10m = np.reshape(pred_1m, (n_test_examples, n_timesteps)) pred_10m = np.mean(pred_10m, axis=1) y_pred = np.zeros_like(test_labels) y_pred[np.where(pred_10m >= opt_threshold)] = 1 cm = confusion_matrix(test_labels, y_pred) print(print_cm(cm, labels=['interictal', 'preictal'])) sn = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0]) sp = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1]) print(sn, sp) sn, sp = [], [] t_list = np.arange(0.0, 1.0, 0.01) for t in t_list: y_pred = np.zeros_like(test_labels) y_pred[np.where(pred_10m >= t)] = 1 cm = confusion_matrix(test_labels, y_pred) sn_t = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0]) sp_t = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1]) sn.append(sn_t) sp.append(sp_t) return t_list, sn, sp
def get_LDA(Xtrain, Xtest, Ytrain, Ytest): lda = LDA() lda.fit(Xtrain, Ytrain) scores = np.empty((4)) scores[0] = lda.score(Xtrain, Ytrain) scores[1] = lda.score(Xtest, Ytest) print('LDA, train: {0:.02f}% '.format(scores[0] * 100)) print('LDA, test: {0:.02f}% '.format(scores[1] * 100)) return lda
def lda(data,labels,n,v_type): train_data,train_labels,test_data,test_labels = split_data(data,labels,v_type) clf = LDA() clf.fit(np.array(train_data,dtype=np.float64), np.array(train_labels,dtype=np.float64)) y_pred = clf.predict(test_data) pure_accuracy_rate = len([y_pred[x] for x in range(len(y_pred)) if y_pred[x] == test_labels[x]])/float(len(test_labels)) report = classification_report(y_pred, test_labels, target_names=rock_names) cm = confusion_matrix(test_labels, y_pred) return pure_accuracy_rate,report,y_pred,test_labels,test_data,clf,cm,"LDA"
def get_LDA(Xtrain, Xtest, Ytrain, Ytest): lda = LDA() lda.fit(Xtrain,Ytrain) scores = np.empty((4)) scores[0] = lda.score(Xtrain,Ytrain) scores[1] = lda.score(Xtest,Ytest) print('LDA, train: {0:.02f}% '.format(scores[0]*100)) print('LDA, test: {0:.02f}% '.format(scores[1]*100)) return lda
def computeLDA(data, dim): samples_indexes = range(len(data)) indexes, y = lfw.loadTrainingDataLabels(samples_indexes, min_nb_samples_per_class=10) samples = data[indexes] lda = LDA(dim) lda.fit(data[indexes], y) return lda
def lda(ds, n): ''' Outputs the projection of the data in the best discriminant dimension. Maximum of 2 dimensions for our binary case (values of n greater than this will be ignored by sklearn) ''' selector = LDA(n_components=n) selector.fit(ds.data, ds.target) new_data = selector.transform(ds.data) return Dataset(new_data, ds.target)
def plotLDA3D(X, y, names=[]): plt.cla() lda = LDA(n_components=3) lda.fit(X, y) X = lda.transform(X) fig = plt.figure(1, figsize=(4, 3)) plt.clf() ax = Axes3D(fig, rect=[0, 0, 0.95, 1], elev=48, azim=134) classes = np.unique(y) colors_ = list(six.iteritems(colors.cnames)) hex_ = [color[1] for color in colors_] rgb = [colors.hex2color(color) for color in hex_] colors_ = [] class_label = [] for i in range(0, len(classes)): colors_.append(rgb[i]) if len(names) == 0: class_label.append((str(i), i)) else: class_label.append((names[i], i)) for name, label in class_label: ax.text3D( X[y == label, 0.0].mean(), X[y == label, 1.0].mean() + 1.5, X[y == label, 2.0].mean(), name, horizontalalignment="center", bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"), ) # Reorder the labels to have colors matching the cluster results y = y.astype(int) # y = np.choose(y, class_label) ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot) x_surf = [X[:, 0].min(), X[:, 0].max(), X[:, 0].min(), X[:, 0].max()] y_surf = [X[:, 0].max(), X[:, 0].max(), X[:, 0].min(), X[:, 0].min()] x_surf = np.array(x_surf) y_surf = np.array(y_surf) v0 = lda.transform(lda.coef_[[0]]) v0 /= v0[-1] v1 = lda.transform(lda.coef_[[1]]) v1 /= v1[-1] ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) plt.show()
def LDA佮SVM模型(self, 問題, 答案): sample_weight_constant = np.ones(len(問題)) clf = svm.SVC(C=1) lda = LDA() # clf = svm.NuSVC() print('訓練LDA') lda.fit(問題, 答案) print('訓練SVM') clf.fit(lda.transform(問題), 答案, sample_weight=sample_weight_constant) print('訓練了') return lambda 問:clf.predict(lda.transform(問))
def plot_lda_projection(marker, flname): lda = LDA() lda.fit(marker["individuals"], marker["population_labels"]) print lda.score(marker["individuals"], marker["population_labels"]) proj = lda.transform(marker["individuals"]) n_samples, n_components = proj.shape plt.scatter(proj, marker["population_labels"]) plt.xlabel("Component 0", fontsize=18) plt.ylabel("Population Labels", fontsize=18) plt.savefig(flname, DPI=200)
def fit_data(inputs, labels, method): if method == 'LDA': classifier = LDA() if method == 'SVM': classifier = SVC() if method == 'random_forest': classifier = classifier.fit(inputs, labels) return classifier
def get_LDA_performance(test_df, X_std, y): X_test = test_df.ix[:, 'x.1':'x.10'].values X_std_test = StandardScaler().fit_transform(X_test) y_test = test_df.ix[:, 'y'].values lda_scores_training = [] lda_scores_test = [] qda_scores_training = [] qda_scores_test = [] knn_scores_training = [] knn_scores_test = [] for d in range(1, 11): lda = LDA(n_components=d) Xred_lda_training = lda.fit_transform(X_std, y) Xred_lda_test = lda.transform(X_std_test) lda_model = LDA() lda_model.fit(Xred_lda_training, y) qda_model = QDA() qda_model.fit(Xred_lda_training, y) knn_model = KNeighborsClassifier(n_neighbors=10) knn_model.fit(Xred_lda_training, y) lda_scores_training.append(1 - lda_model.score(Xred_lda_training, y)) lda_scores_test.append(1 - lda_model.score(Xred_lda_test, y_test)) qda_scores_training.append(1 - qda_model.score(Xred_lda_training, y)) qda_scores_test.append(1 - qda_model.score(Xred_lda_test, y_test)) knn_scores_training.append(1 - knn_model.score(Xred_lda_training, y)) knn_scores_test.append(1 - knn_model.score(Xred_lda_test, y_test)) plt.plot(range(10), lda_scores_training, 'r--', label="Train data") plt.plot(range(10), lda_scores_test, 'b--', label="Test data") plt.title("LDA vs LDA") plt.xlabel('k') plt.ylabel('Score') plt.show() plt.plot(range(10), qda_scores_training, 'r--', label="Train data") plt.plot(range(10), qda_scores_test, 'b--', label="Test data") plt.title("QDA vs LDA") plt.show() plt.plot(range(10), knn_scores_training, 'r--', label="Train data") plt.plot(range(10), knn_scores_test, 'b--', label="Test data") plt.title("KNN vs LDA") plt.show()
def lda(X_train, X_val, y_train): print("Performing dimensionality reduction using LDA...") lda = LDA() try: lda.fit(X_train, y_train) except TypeError: X_train = X_train.toarray() X_val = X_val.toarray() lda.fit(X_train, y_train) X_train = lda.transform(X_train) X_val = lda.transform(X_val) return X_train, X_val
def lda_f(train, train_labels, test): # LDA print '' print '----------------' print 'LDA:' # http://scikit-learn.org/0.16/modules/generated/sklearn.lda.LDA.html clf = LDA() clf.fit(train, train_labels) pred = clf.predict(test) return pred
def LinearDiscriminantAnalysis(x_train, y_train, x_cv, y_cv): """ Linear Discriminant Analysis Classifier """ print "Linear Discriminant Analysis" clfr = LDA() clfr.fit(x_train, y_train) #print 'Accuracy in training set: %f' % clfr.score(x_train, y_train) #if y_cv != None: #print 'Accuracy in cv set: %f' % clfr.score(x_cv, y_cv) return clfr
def LDA_select_cv(X, Y, num_features): scores = [] skf = cross_validation.StratifiedKFold(Y, n_folds=10) for train, test in skf: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] XRF_train, imp, ind, std = fitRF(X_train, y_train, est=2000) # RFsel XRF_test = X_test[:, ind] # reorder test set after RFsel clf = LDA() clf.fit(XRF_train[:, 0:num_features], y_train) scores.append(clf.score(XRF_test[:, 0:num_features], y_test)) score = np.mean(scores) return(score)
def train_lda(filename,delim=','): start = time.time() [X_train, X_test, y_train, y_test] = load_and_split_dataset(filename,delim) clf = LDA() clf.fit(X_train, y_train) end = time.time() print('Training Time: '+str((end - start))+'s') y_pred = clf.predict(X_test) print np.sum(y_pred == y_test)/len(y_pred) return y_pred
def plotLDA3D(X, y, names=[]): plt.cla() lda = LDA(n_components=3) lda.fit(X, y) X = lda.transform(X) fig = plt.figure(1, figsize=(4, 3)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) classes = np.unique(y) colors_ = list(six.iteritems(colors.cnames)) hex_ = [color[1] for color in colors_] rgb = [colors.hex2color(color) for color in hex_] colors_ = [] class_label = [] for i in range(0, len(classes)): colors_.append(rgb[i]) if (len(names) == 0): class_label.append((str(i), i)) else: class_label.append((names[i], i)) for name, label in class_label: ax.text3D(X[y == label, 0.0].mean(), X[y == label, 1.0].mean() + 1.5, X[y == label, 2.0].mean(), name, horizontalalignment='center', bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) # Reorder the labels to have colors matching the cluster results y = y.astype(int) #y = np.choose(y, class_label) ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot) x_surf = [X[:, 0].min(), X[:, 0].max(), X[:, 0].min(), X[:, 0].max()] y_surf = [X[:, 0].max(), X[:, 0].max(), X[:, 0].min(), X[:, 0].min()] x_surf = np.array(x_surf) y_surf = np.array(y_surf) v0 = lda.transform(lda.coef_[[0]]) v0 /= v0[-1] v1 = lda.transform(lda.coef_[[1]]) v1 /= v1[-1] ax.w_xaxis.set_ticklabels([]) ax.w_yaxis.set_ticklabels([]) ax.w_zaxis.set_ticklabels([]) plt.show()
def lda_test(img_kind): import pylab as pl subdir = "data/" classes = [] data = [] the_ones = glob.glob(subdir + "f_" + img_kind + "*.jpg") all_of_them = glob.glob(subdir + "f_*_*.jpg") the_others = [] for x in all_of_them: if the_ones.count(x) < 1: the_others.append(x) for x in the_ones: classes.append(1) data.append(get_image_features(cv.LoadImageM(x))) for x in the_others: classes.append(-1) data.append(get_image_features(cv.LoadImageM(x))) lda = LDA(n_components=2) print 'fiting' lda.fit(data, classes) print 'transforming' X_r = lda.transform(data) print '----' print X_r.shape x0 = [x[0] for x in X_r] x1 = [x[1] for x in X_r] pl.figure() for i in xrange(0,len(x0)): if classes[i] == 1: pl.scatter(x0[i], x1[i], c = 'r') else: pl.scatter(x0[i], x1[i], c = 'b') # for c, i, target_name in zip("rg", [1, -1], target_names): # pl.scatter(X_r[classes == i, 0], X_r[classes == i, 1], c=c, label=target_name) pl.legend() pl.title('LDA of dataset') pl.show()
def classifier(method, X_train, y_train): if method == "lda": clf = LDA() elif method == "knn": clf = KNeighborsClassifier(n_neighbors=5, metric='euclidean') elif method == "svm": clf = GridSearch(X_train, y_train) else: print("Unknown classifier method ", method) clf.fit(X_train, y_train) return clf
def feat_extraction(X,y,D): # usupervised feature extraction: Principal Component Analysis pca = decomposition.PCA(n_components=D) pca.fit(X) X_pca = pca.transform(X) # supervised feature extraction: Linear Discriminative Analysis lda = LDA(n_components=D) lda.fit(X,y) X_lda = lda.transform(X) return (X_pca,X_lda)
def feat_extraction(X, y, D): # usupervised feature extraction: Principal Component Analysis pca = decomposition.PCA(n_components=D) pca.fit(X) X_pca = pca.transform(X) # supervised feature extraction: Linear Discriminative Analysis lda = LDA(n_components=D) lda.fit(X, y) X_lda = lda.transform(X) return (X_pca, X_lda)
def fit(X, y): # Do here you training #clf = LogisticRegression(penalty="l2") #clf = SVC(kernel='linear', probability=True, random_state=0) clf1 = LDA() #clf = ensemble.RandomForestClassifier(n_estimators=10, max_depth=8, min_samples_leaf=4, n_jobs=4, random_state=0) clf1.fit(X, y) #pred_y = clf1.predict_proba(X)[:,[1]] #pred_y2 = np.vstack([pred_y[0],pred_y[:-1]]) #pred_y3 = np.vstack([pred_y[0],pred_y[0],pred_y[:-2]]) #pred_y = np.concatenate((pred_y, pred_y2, pred_y3),axis=1) #clf2 = LDA() #clf2.fit(pred_y, y) return clf1