def yj(): params['mu0'] = np.random.randn()*0.2 params['mu1'] = np.random.randn()*0.2 params['sigma0'] = di.invgamma.rvs(3) params['sigma1'] = di.invgamma.rvs(3) sel, rawdata, normdata = get_data(data_yj, params) norm_trn_data = normdata.loc[sel['trn'], sel['feats']] norm_tst_data = normdata.loc[sel['tst'], sel['feats']] sklda = LDA() sklda.fit(norm_trn_data, sel['trnl']) error = (1-sklda.score(norm_tst_data, sel['tstl'])) print("skLDA error: %f" % error) return error
def LDA(array, test_labels): #LDA from sklearn.lda import LDA print "LDA" print "Features\tTime" for pct in pct_features_list: num_features = int(pct * len(array[0])) start = time() LDA(n_components=num_features).fit(array, test_labels) end = time() print num_features, "\t", (end - start)
def _fit_lda(self, X, y, sample_weight=None): """Helper to fit LDA.""" self.classes = numpy.unique(y) self._lda = LDA(n_components=len(self.classes) - 1, solver='lsqr', shrinkage='auto') ts = self._ts.fit_transform(X, sample_weight=sample_weight) self._lda.fit(ts, y) W = self._lda.coef_.copy() self._W = numpy.dot( numpy.dot(W.T, numpy.linalg.pinv(numpy.dot(W, W.T))), W) return ts
def fit(X, y): # Do here you training #clf = LogisticRegression(penalty="l2") #clf = SVC(kernel='linear', probability=True, random_state=0) clf1 = LDA() #clf = ensemble.RandomForestClassifier(n_estimators=10, max_depth=8, min_samples_leaf=4, n_jobs=4, random_state=0) clf1.fit(X, y) #pred_y = clf1.predict_proba(X)[:,[1]] #pred_y2 = np.vstack([pred_y[0],pred_y[:-1]]) #pred_y3 = np.vstack([pred_y[0],pred_y[0],pred_y[:-2]]) #pred_y = np.concatenate((pred_y, pred_y2, pred_y3),axis=1) #clf2 = LDA() #clf2.fit(pred_y, y) return clf1
def checkeachClassfier(train_x, train_y, test_x, test_y): classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(class_weight='auto'), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), DecisionTreeClassifier(class_weight='auto'), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), RandomForestClassifier(class_weight='auto'), AdaBoostClassifier(), GaussianNB(), LDA(), QDA() ] classtitle = [ "KNeighborsClassifier", "SVC", "SVC weighted", "SVC(gamma=2, C=1)", "DecisionTreeClassifier", "DecisionTreeClassifier weighted", "RandomForestClassifier", "RandomForestClassifier weighted", "AdaBoostClassifier", "GaussianNB", "LDA", "QDA" ] for i in range(len(classtitle)): try: ctitle = classtitle[i] clf = classifiers[i] clf.fit(train_x, train_y) train_pdt = clf.predict(train_x) MCC, Acc_p, Acc_n, Acc_all = get_Accs(train_y, train_pdt) print ctitle + ":" print "MCC, Acc_p , Acc_n, Acc_all(train): " print "%s,%s,%s,%s" % (str(MCC), str(Acc_p), str(Acc_n), str(Acc_all)) test_pdt = clf.predict(test_x) MCC, Acc_p, Acc_n, Acc_all = get_Accs(test_y, test_pdt) print "MCC, Acc_p , Acc_n, Acc_all(test): " print "%s,%s,%s,%s" % (str(MCC), str(Acc_p), str(Acc_n), str(Acc_all)) fn = "submission_%s.csv" % ctitle fout = open(fn, 'w') fout.write("ID,target\n") for index, eachline in enumerate(test_pdt): fout.write("%s,%s\n" % (str(int(test_x[index][0])), str(test_pdt[index]))) fout.close() except: print ctitle + ": error" print
def get_fsmethod (fsmethod, n_feats, n_subjs, n_jobs=1): if fsmethod == 'stats': return 'stats', None #Feature selection procedures #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html fsmethods = { 'rfe' : RFE(estimator=SVC(kernel="linear"), step=0.05, n_features_to_select=2), #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html 'rfecv' : RFECV(estimator=SVC(kernel="linear"), step=0.05, loss_func=zero_one), #cv=3, default; cv=StratifiedKFold(n_subjs, 3) #Univariate Feature selection: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectPercentile.html 'univariate': SelectPercentile(f_classif, percentile=5), #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFpr.html 'fpr' : SelectFpr (f_classif, alpha=0.05), #http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFdr.html 'fdr' : SelectFdr (f_classif, alpha=0.05), #http://scikit-learn.org/stable/modules/feature_selection.html 'extratrees': ExtraTreesClassifier(n_estimators=50, max_features='auto', compute_importances=True, n_jobs=n_jobs, random_state=0), 'pca' : PCA(n_components='mle'), 'rpca' : RandomizedPCA(random_state=0), 'lda' : LDA(), } #feature selection parameter values for grid search max_feats = ['auto'] if n_feats < 10: feats_to_sel = range(2, n_feats, 2) n_comps = range(1, n_feats, 2) else: feats_to_sel = range(2, 20, 4) n_comps = range(1, 30, 4) max_feats.extend(feats_to_sel) n_comps_pca = list(n_comps) n_comps_pca.extend(['mle']) fsgrid = { 'rfe' : dict(estimator_params = [dict(C=0.1), dict(C=1), dict(C=10)], n_features_to_select = feats_to_sel), 'rfecv' : dict(estimator_params = [dict(C=0.1), dict(C=1), dict(C=10)]), 'univariate': dict(percentile = [1, 3, 5, 10]), 'fpr' : dict(alpha = [1, 3, 5, 10]), 'fdr' : dict(alpha = [1, 3, 5, 10]), 'extratrees': dict(n_estimators = [1, 3, 5, 10, 30, 50], max_features = max_feats), 'pca' : dict(n_components = n_comps_pca, whiten = [True, False]), 'rpca' : dict(n_components = n_comps, iterated_power = [3, 4, 5], whiten = [True, False]), 'lda' : dict(n_components = n_comps) } return fsmethods[fsmethod], fsgrid[fsmethod]
def fit(self, X, y): """ Fit the data """ n_len = len(X[0]) if n_len > 30: n_cut = 13 elif n_len > 15: n_cut = 7 else: n_cut = 3 uni = FeatureUnion([('lda', LDA(n_components=n_cut - 1)), ('pca', PCA(n_components=n_cut))]) pipe = Pipeline([('scaler', MinMaxScaler()), ('union', uni)]) self.pipe = pipe self.pipe.fit(X, y) return self
def __init__(self, classifier=None, debug=False): np.random.seed(10) if classifier == "LDA": self.classifier = LDA() else: self.classifier = DecisionTreeClassifier(random_state=0) self.dim_reducer = PCA() self.trainDataMatrix = None self.labels = None self.trained = False # debug mode restricts trains/tests to 100 data points self.debug = debug self.debug_training_len = 10000 self.debug_len = 100
def dim_reduction_LDA(X,Y,n_dim): """ Reduce the dimension by PCA. :param X: matrix data (n*k), n is the number of samples. k is the dimension of each sample :param n_dim: number of dimension we desired to reduce to. :param Y: reference or labels :return reduced_X:matrix data(n*n_dim) """ try: reduced_X = LDA(n_components=n_dim).fit_transform(X,Y) except: print "dimension error" reduced_X = X finally: return np.array(reduced_X)
def lda(arr0, target, n_components): from sklearn.lda import LDA matrix = np.array(arr0) target = np.array(target) temp = LDA(n_components=n_components).fit(matrix, target) coef = temp.coef_ # covariance = temp.covariance_ mean = temp.means_ priors = temp.priors_ scalings = temp.scalings_ xbar = temp.xbar_ # label = data_utility.retrieve_nan_index(temp.transform(matrix).tolist(), index) label = temp.transform(matrix).tolist() return label, coef.tolist(), mean.tolist(), priors.tolist( ), scalings.tolist(), xbar.tolist()
def features(self, pixels, gt=None): #grab feature stack fullFeatures = naive_features(pixels) print fullFeatures.shape #if the LDA from ground truth exists already, transform new features if gt == None and self.lda != None: print self.lda return self.lda.transform(fullFeatures) assert gt != None #otherwise, train LDA self.lda = LDA(n_components=self.n_comp).fit(fullFeatures, gt) print self.lda return self.lda.transform(fullFeatures)
def lda_data(X, y, n_components=2, num_data_points=-1): lda = LDA(n_components=n_components) if num_data_points > 0: X = X[:num_data_points, :] y = y[:num_data_points] print "Performing mapping" start = timeit.default_timer() mapped = lda.fit_transform(X, y) end = timeit.default_timer() print "Mapping completed in %f seconds" % (end - start) return mapped, lda
def drawLDA(X_true,X_false,X_test,suffix=""): X=X_true+X_false Y=[1]*len(X_true)+[0]*len(X_false) plc=0 lda = LDA(solver="eigen",n_components=2) canfit=False hred = False try: lda.fit(X,Y) canfit=True except : try: print("fit error") X = np.array(X) X = X[:,:140] lda.fit(X,Y) canfit=True hred=True except: print("cannot visualize") if(not canfit): return if(hred): Xlda_true = lda.transform(np.array(X_true)[:,:140]) Xlda_false = lda.transform(np.array(X_false)[:,:140]) else: Xlda_true = lda.transform(X_true) Xlda_false = lda.transform(X_false) plt.scatter(Xlda_true[:,0],Xlda_true[:,1],color=plp[plc][0],marker=plp[plc][1],label="thbgm") plc+=1 plt.scatter(Xlda_false[:,0],Xlda_false[:,1],color=plp[plc][0],marker=plp[plc][1],label="not thbgm") plc+=1 if(len(X_test)>0): if(hred): Xlda_test = lda.transform(np.array(X_test)[:,:140]) else: Xlda_test = lda.transform(np.array(X_test)) plt.scatter(Xlda_test[:,0],Xlda_test[:,1],color=plp[plc][0],marker=plp[plc][1],label="test") plc+=1 print(lda.coef_.shape) plt.xlabel("feature1") plt.ylabel("feature2") plt.title("Classification with "+useFeature) plt.legend() plt.savefig("./learn/visualize/lda_"+useFeature+suffix+".png") plt.clf()
def learners(clf=None, kwds=None): "Return dict of available classifier" models = {} # common classifiers models['LinearSVC'] = svm.LinearSVC() models['SVC'] = svm.SVC() models['KNeighborsClassifier'] = KNeighborsClassifier() models['KNeighborsClassifier'].n_jobs = 8 models['RandomForestClassifier'] = RandomForestClassifier() models['ExtraTreesClassifier'] = ExtraTreesClassifier() models['GaussianNB'] = GaussianNB() models['BernoulliNB'] = BernoulliNB() models['SGDClassifier'] = SGDClassifier() models['RidgeClassifier'] = RidgeClassifier(solver='lsqr') models['GradientBoostingClassifier'] = GradientBoostingClassifier() models['DecisionTreeClassifier'] = DecisionTreeClassifier() models['PCA'] = PCA() models['XGBClassifier'] = XGBClassifier() # common ensemble classifiers models['AdaBoostClassifier'] = AdaBoostClassifier() models['BaggingClassifier'] = BaggingClassifier() # examples how to construct pipelines steps = [('PCA', PCA(n_components='mle', whiten=True)), ('clf', models['RandomForestClassifier'])] models['pca_rfc'] = Pipeline(steps=steps) steps = [('PCA', PCA(n_components='mle', whiten=True)), ('clf', models['KNeighborsClassifier'])] models['pca_knc'] = Pipeline(steps=steps) steps = [('PCA', PCA(n_components='mle', whiten=True)), ('clf', models['SVC'])] models['pca_svc'] = Pipeline(steps=steps) steps = [('LDA', LDA()), ('clf', models['RandomForestClassifier'])] models['lda_rfc'] = Pipeline(steps=steps) # common regressors models['RandomForestRegressor'] = RandomForestRegressor() models['ExtraTreesRegressor'] = ExtraTreesRegressor() models['DecisionTreeRegressor'] = DecisionTreeRegressor() models['SVR'] = SVR() models['SGDRegressor'] = SGDRegressor() models['GradientBoostingRegressor'] = GradientBoostingRegressor() models['AdaBoostRegressor'] = AdaBoostRegressor() models['BaggingRegressor'] = BaggingRegressor() return models
def fit_LDA_from_codes_file(codes_file, clique_idx, lda_components=[50, 100, 200], outlda="LDAs.pk"): """Fits and LDA from a codes file and saves it into a new pickle file.""" clique_idx = np.asarray(load_pickle(clique_idx)) codes = np.asarray(load_pickle(codes_file)) # Remove Nones none_idx = np.where(np.equal(codes, None))[0] codes = np.delete(codes, none_idx, axis=0) clique_idx = np.delete(clique_idx, none_idx, axis=0) # Hack to make it the right shape C = np.zeros((codes.shape[0], codes[0].shape[0])) k = 0 for code in codes: C[k] = code k += 1 codes = C # Remove nans nan_idx = np.where(np.isnan(codes))[0] codes = np.delete(codes, nan_idx, axis=0) clique_idx = np.delete(clique_idx, nan_idx, axis=0) print codes.shape # Remove infs inf_idx = np.where(np.isinf(codes))[0] codes = np.delete(codes, inf_idx, axis=0) clique_idx = np.delete(clique_idx, inf_idx, axis=0) print codes.shape print "LDA components: ", lda_components #return codes, clique_idx res = [] k = 0 while k < len(lda_components): c = lda_components[k] lda = LDA(n_components=c) try: lda.fit(codes, clique_idx) res.append(lda) k += 1 except: print "LDA error, trying again" save_pickle(res, outlda)
def train_predict(X,y,Xt,yt=[],c=1): if c==1: #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7) #clf=xgb_classifier(num_round=300,eta=0.01,min_child_weight=20,depth=8, subsample=0.1,col=0.7) return clf.train_predict(X,y,Xt,yt) elif c==2: clf = LDA() clf.fit(X,y) preds = clf.predict_proba(Xt)[:,1] return preds elif c==3: clf = LogisticRegression() clf.fit(X,y) preds = clf.predict_proba(Xt)[:,1] return preds
def optimize(self, X, y): clf = LDA() scores = [] train_times = [] for train, test in StratifiedKFold(y, 10): X_train, X_test, y_train, y_test = (X[train], X[test], y[train], y[test]) clf.fit(X_train.toarray(), y_train) t0 = self._timer() scores.append(clf.score(X_test.toarray(), y_test)) train_times.append(self._timer() - t0) self._mean_score = np.mean(scores) self._score_std = np.var(scores) self._mean_train_time = np.mean(train_times) self._train_time_std = np.var(train_times)
def lda_model(x_train, y_train, x_test, y_test): global get_test print "LDA model learning..." start_time = time.time() #LDA assumes common variance matrix among classes, while QDA doesn't clf = LDA() #clf = QDA() clf.fit(x_train, y_train) learning_time = time.time() - start_time print "training time is: {:.5f} seconds.".format(learning_time) ''' #use LDA to do dimensionality reduction, reduce to n_class-1 dimensions x_t = clf.transform(x_train) print x_train.shape print x_t.shape print x_train[:3] print x_t[:3] ''' print "Model Prediction..." #y_predict = clf.predict(x_test) start_time = time.time() #get probability prediction y_prob = clf.predict_proba(x_test) prediction_time = time.time() - start_time print "prediction time is: {:.5f} seconds.".format(prediction_time) if get_test == True: #the data is from real test set #output to file output_result(y_prob) else: #the test set is split from the train set, compute the loss function value encoder = LabelEncoder() #encode string label 'Class_1', 'Class_2',... to [0,1,...,8] y_true = encoder.fit_transform(y_test) #the classe labels in encoder is consistent with the class labels in the classifier assert (encoder.classes_ == clf.classes_).all() #compute the value for loss function score = logloss_mc(y_true, y_prob) print( " -- Multiclass logloss on validation set: {:.5f}.".format(score))
def train(self,workDir,classifier,ldaDim): fname = "{}labels.csv".format(workDir) #labels of faces print("Loading labels " + fname + " csv size: " + str(os.path.getsize("/home/hatice/PycharmProjects/newOpenface/generated-embeddings/reps.csv"))) if os.path.getsize(fname) > 0: print(fname + " file is not empty") labels = pd.read_csv(fname, header=None).as_matrix()[:, 1] print(labels) else: print(fname + " file is empty") labels = "1:mini/dummy/1.png" #creating a dummy string to start the process logger.debug(map(os.path.dirname, labels)) logger.debug(map(os.path.split,map(os.path.dirname, labels))) logger.debug(map(itemgetter(1),map(os.path.split,map(os.path.dirname, labels)))) labels = map(itemgetter(1),map(os.path.split,map(os.path.dirname, labels))) fname = "{}reps.csv".format(workDir) # Representations of faces fnametest = format(workDir) + "reps.csv" print("Loading embedding " + fname + " csv size: " + str(os.path.getsize(fname))) if os.path.getsize(fname) > 0: print(fname + " file is not empty") embeddings = pd.read_csv(fname, header=None).as_matrix() # Get embeddings as a matrix from reps.csv else: print(fname + " file is empty") embeddings = np.zeros((2,150)) #creating an empty array since csv is empty self.le = LabelEncoder().fit(labels) # LabelEncoder is a utility class to help normalize labels such that they contain only values between 0 and n_classes-1 # Fits labels to model labelsNum = self.le.transform(labels) nClasses = len(self.le.classes_) print("Training for {} classes.".format(nClasses)) if classifier == 'LinearSvm': self.clf = SVC(C=1, kernel='linear', probability=True) elif classifier == 'GMM': self.clf = GMM(n_components=nClasses) if ldaDim > 0: clf_final = self.clf self.clf = Pipeline([('lda', LDA(n_components=ldaDim)) ('clf', clf_final)]) self.clf.fit(embeddings, labelsNum) #link embeddings to labels fName = "{}classifier.pkl".format(workDir) print("Saving classifier to '{}'".format(fName)) with open(fName, 'w') as f: pickle.dump((self.le, self.clf), f) # Creates character stream and writes to file to use for recognition
def lda(data_matrix, target, n_components): """ Linear Discriminant Analysis (LDA) Adapted from: http://scikit-learn.org/stable/_downloads/plot_pca_vs_lda.py Args: - data_matrix: a matrix-like object containing the data, columns for features and rows for an item (developer in this case) - target: an array-like object contaning the class of the data item (developer) at the respective row - n_components: the number of principal components to be extracted (0 < n_components <= #features) Return: - a matrix-like object contained the transformed data with n_components columns """ lda_obj = LDA(n_components=n_components) return lda_obj.fit(data_matrix, target).transform(data_matrix)
def dimensionalReduction(df_list, method, methods_list): ''' :param df_list: df :param methods_list: methods of dimension reduction :return: ''' #methods = {'pca':[n_components],'lda':[n_components],'tSNE':[n_components]} df_list = pd.DataFrame(df_list) print(df_list.shape) if method not in methods_list.keys(): raise ValueError("please use method in method_list") if method == 'pca': args = methods_list[method] model = PCA(n_components=args) x_pca = model.fit_transform(df_list) return x_pca if method == 'lda': args = methods_list[method] model = LDA(n_components=args) y = df_list[['target']] x_lda = model.fit_transform(df_list.drop('target', axis=1), y) return x_lda if method == 'tSNE': args = methods_list[method] tsne = manifold.TSNE(n_components=args) X_tsne = tsne.fit_transform(df_list) return X_tsne if method == 'lle': args = methods_list[method] lle = manifold.LocallyLinearEmbedding(n_components=args) x_lle = lle.fit_transform(df_list) return x_lle if method == 'isomap': args = methods_list[method] x_iso = manifold.Isomap(n_components=args).fit_transform(df_list) return x_iso if method == 'mds': args = methods_list[method] x_mds = manifold.MDS(n_components=args).fit_transform(df_list) return x_mds
def LDA(data, label, pred_data, pred_last): '''not good,不需要规范化 ''' data = np.array(data) pred_data = np.array(pred_data) label = np.array(label) pred_last = np.array(pred_last) from sklearn.lda import LDA gnb = LDA() gnb.fit(data, label) print gnb.score(data, label) pred_result = gnb.predict(pred_data) print("Number of mislabeled points out of a total %d points : %d" % (pred_data.shape[0], (pred_last != pred_result).sum())) print gnb.score(pred_data, pred_last) return pred_result
def train(subject, data_path, plot=False): d = load_train_data(data_path, subject) x, y = d['x'], d['y'] print 'n_preictal', np.sum(y) print 'n_inetrictal', np.sum(y - 1) n_channels = x.shape[1] n_fbins = x.shape[2] x, y = reshape_data(x, y) data_scaler = StandardScaler() x = data_scaler.fit_transform(x) lda = LDA() lda.fit(x, y) coef = lda.scalings_ * lda.coef_[:1].T channels = [] fbins = [] for c in range(n_channels): fbins.extend(range(n_fbins)) # 0- delta, 1- theta ... channels.extend([c] * n_fbins) if plot: fig = plt.figure() for i in range(n_channels): if n_channels == 24: fig.add_subplot(4, 6, i) else: fig.add_subplot(4, 4, i) ax = plt.gca() ax.set_xlim([0, n_fbins]) ax.set_xticks(np.arange(0.5, n_fbins + 0.5, 1)) ax.set_xticklabels(np.arange(0, n_fbins)) max_y = max(abs(coef)) + 0.01 ax.set_ylim([0, max_y]) ax.set_yticks( np.around(np.arange(0, max_y, max_y / 4.0), decimals=1)) for label in (ax.get_xticklabels() + ax.get_yticklabels()): label.set_fontsize(15) plt.bar(range(0, n_fbins), abs(coef[i * n_fbins:i * n_fbins + n_fbins])) fig.suptitle(subject, fontsize=20) plt.show() coefs = np.reshape(coef, (n_channels, n_fbins)) return lda, data_scaler, coefs
def classifier_comparison(X, y): """ 分类器比较 Args: X: training samples, size=[n_samples, n_features] y: class labels, size=[n_samples, 1] Returns: None """ from sklearn import grid_search from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.lda import LDA from sklearn.qda import QDA import scipy # Exhaustive Grid Search exhaustive_parameters = {'kernel':['rbf'], 'C':[1, 10, 100, 1000], 'gamma':[1e-3, 1e-4]} clf_SVC_exhaustive = grid_search.GridSearchCV(SVC(), exhaustive_parameters) # Randomized Parameter Optimization randomized_parameter = {'kernel':['rbf'], 'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1)} clf_SVC_randomized = grid_search.RandomizedSearchCV(SVC(), randomized_parameter) names = ["Linear SVM", "RBF SVM", "RBF SVM with Grid Search", "RBF SVM with Random Grid Search", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "LDA", "QDA"] classifiers = [ SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), clf_SVC_exhaustive, clf_SVC_randomized, DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LDA(), QDA()] for name, clf in zip(names, classifiers): logger.info('Use %s:' % (name)) train_classifier(clf, X, y)
def with_lda(X_train_std, y_train, X_test_std, y_test): from sklearn.lda import LDA lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plot.xlabel('LD 1') plot.ylabel('LD 2') plt.legend(loc='lower left') plt.show() X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr) plot.xlabel('LD 1') plot.ylabel('LD 2') plt.legend(loc='lower left') plt.show()
def get_LDA(X_std, y): sklearn_lda = LDA(n_components=2) Xred_lda = sklearn_lda.fit_transform(X_std, y) cmap = plt.cm.get_cmap('Accent') mclasses = (1, 2, 3, 4, 5, 6, 7, 8, 9) mcolors = [cmap(i) for i in np.linspace(0, 1, 10)] plt.figure(figsize=(12, 8)) for lab, col in zip(mclasses, mcolors): plt.scatter(Xred_lda[y == lab, 0], Xred_lda[y == lab, 1], label=lab, c=col) plt.xlabel('LDA/Fisher Direction 1') plt.ylabel('LDA/Fisher Direction 2') leg = plt.legend(loc='upper right', fancybox=True) plt.show()
def LDA10Fold(X, y): acc = [] kf = KFold(X.shape[0], n_folds=10, shuffle=True) i = 0 for train_index, test_index in kf: yTest = y[test_index] yTrain = y[train_index] clf = LDA() clf.fit(X[train_index], yTrain) newRepTrain = clf.transform(X[train_index]) newRepTest = clf.transform(X[test_index]) nclf = neighbors.KNeighborsClassifier(n_neighbors=2) nclf.fit(newRepTrain, yTrain) XPred = nclf.predict(newRepTest) acc.append(np.sum(XPred == yTest) * 1.0 / yTest.shape[0]) # print i,":",acc[i] i += 1 return np.mean(acc), np.std(acc)
def runTestPairs(e): x = e[0] y = e[1] trainX = labelsmaptra[x] + labelsmaptra[y] labelsX = [x] * len(labelsmaptra[x]) + [y] * len(labelsmaptra[y]) clf = LDA() clf.fit(trainX, labelsX) testX = labelsmaptes[x] + labelsmaptes[y] labelsX = [x] * len(labelsmaptes[x]) + [y] * len(labelsmaptes[y]) error = 0 for lab, test in zip(labelsX, testX): pred = clf.predict(test) if lab != pred: error += 1 print e, error, error / float(len(testX)) return (e, error, error / float(len(testX)))
def multi_classifier(): classifiers = [ KNeighborsClassifier(4), SVC(kernel="linear", C=0.025), SVC(), #####GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), #DecisionTreeClassifier(max_depth=7), #RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), #RandomForestClassifier(), LDA(), AdaBoostClassifier(), #GaussianNB(), #QuadraticDiscriminantAnalysis() ] for clf in classifiers: clf.fit(sx, sy) py = clf.predict(tx) print accuracy_score(ty, py)
def trainDayNightClassifier(self): """ Trains model classifier, given that an histogram feature matrix was created for day and night. The method trains an LDA. Have a look at diary of 07/05/2013 for an experiment, showing that LDA is more robust to wrong labels than SVM. """ if self.histNight is None or self.histDay is None: raise RuntimeError("day or night histogram was not computed " + \ "before calling this function") hist = np.concatenate((self.histDay, self.histNight)) lbl = np.concatenate((np.zeros( (len(self.histDay))), np.ones((len(self.histNight))))) self.modelClassifier = LDA() self.modelClassifier.fit(hist, lbl)