def renew(directory): # path = directory + "判别公式参数.csv" # df = pd.DataFrame({"基金交易频率": [0], "最大交易金额": [0], # "混合型占比": [0], "债券指数或债券型占比": [0], # "货币型占比": [0], "股票指数或股票型占比": [0], "其他型占比": [0]}) # df.to_csv(path, encoding='gbk') client = pd.read_csv(directory + '用户记录.csv', encoding='gbk', index_col=0) # x中不要加入其他型占比 x = client[[ "基金交易频率", "最大交易金额", "混合型占比", "债券指数或债券型占比", "货币型占比", "股票指数或股票型占比" ]] y = client['客户类别'] lda = discriminant_analysis.LinearDiscriminantAnalysis() lda.fit(x, y) # print(x) # print(y) # print(lda.coef_) df = pd.DataFrame(data=lda.coef_, columns=[ "基金交易频率", "最大交易金额", "混合型占比", "债券指数或债券型占比", "货币型占比", "股票指数或股票型占比" ]) df['截距'] = lda.intercept_ df.insert(0, '客户类别', lda.classes_) df.to_csv(directory + "判别公式参数.csv", encoding='gbk') return
def test_LinearDiscriminantAnalysis_solver(*data): ''' 测试 LinearDiscriminantAnalysis 的预测性能随 solver 参数的影响 :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 :return: None ''' X_train,X_test,y_train,y_test=data solvers=['svd','lsqr','eigen'] for solver in solvers: if(solver=='svd'): lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver) else: lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver, shrinkage=None) lda.fit(X_train, y_train) print('Score at solver=%s: %.2f' %(solver, lda.score(X_test, y_test)))
def train(train_data_1, train_data_2, numFilt): numTrials_1 = np.size(train_data_1,0) numTrials_2 = np.size(train_data_1,0) # train the CCACSP filters ccacsp_filts = calc_CCACSP(train_data_1, train_data_2, numFilt) # extract the features train_filt_1 = apply_CCACSP(train_data_1, ccacsp_filts, numFilt) train_logP_1 = np.squeeze(np.log(np.var(train_filt_1, axis=2))) train_filt_2 = apply_CCACSP(train_data_2, ccacsp_filts, numFilt) train_logP_2 = np.squeeze(np.log(np.var(train_filt_2, axis=2))) # define the classifier clf = sklda.LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') X = np.concatenate((train_logP_1, train_logP_2), axis=0) y1 = np.zeros(numTrials_1) y2 = np.ones(numTrials_2) y = np.concatenate((y1, y2)) # train the classifier clf.fit(X, y) return ccacsp_filts, clf
def model_lda(train, test, label): reglin = discriminant_analysis.LinearDiscriminantAnalysis() reglin.fit(train[label], train['hotel_cluster']) prediction = reglin.predict_proba(test[label]) return util.best_proba(prediction), reglin
def lda(self, n_components=2, solver='svd'): print('Calculating linear discriminant analysis....\n') t = time.time() lda = discriminant_analysis.LinearDiscriminantAnalysis( solver=solver, n_components=n_components) lda_array = lda.fit_transform(self.X, self.y) #plot2D(lda_array,self.y,'LDA','LDA: 1078 cells with 10 subtypes',(time.time() - t)) return lda_array
def test_LinearDiscriminantAnalysis_solver(*data): X_train, X_test, y_train, y_test = data solvers = ['svd', 'lsqr', 'eigen'] for solver in solvers: lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver) lda.fit(X_train, y_train) print('Score at solver = %s : %.2f' % (solver, lda.score(X_test, y_test)))
def classifyWithLinearDiscriminant(xTrain, xTest, yTrain, yTest): lda = discriminant_analysis.LinearDiscriminantAnalysis() print(lda.fit(xTrain, yTrain)) print("Linear Discriminant Analysis Score: " + str(lda.score(xTest, yTest))) print("Linear Discriminant Analysis Report: ") print(classification_report(yTest, lda.predict(xTest), labels=[1, 2, 3]))
def solve_weights(features_Vp,labels): clf = lda.LinearDiscriminantAnalysis()#solver='eigen',shrinkage='auto',priors=None,n_components=None) clf.fit(features_Vp, labels) # print(clf.predict(features_Vp[0])) #print(clf.coef_) features_Up = clf.transform(features_Vp) print(clf.coef_.shape,features_Up.shape) return clf.coef_,features_Up
def main(): data = pd.read_csv('data_3_6.csv', names=['x', 'y', 'class']) max_x = data['x'].max() min_x = data['x'].min() max_y = data['y'].max() min_y = data['y'].min() trans_x = data['x'].transform(lambda x: (x - min_x) / (max_x - min_x)) trans_y = data['y'].transform(lambda x: (x - min_y) / (max_y - min_y)) reshape_x = trans_x.values.reshape(-1, 1) reshape_y = trans_y.values.reshape(-1, 1) reshape_class = data['class'].values.reshape(-1, 1).ravel() reshape_data = np.append(reshape_y, reshape_x, axis=1) nb_classifier = nb.MultinomialNB() nb_fit = nb_classifier.fit(reshape_data, reshape_class) nb_scores = ms.cross_val_score(nb_fit, reshape_data, reshape_class, cv=10) nb_est = ms.cross_val_predict(nb_fit, reshape_data, reshape_class, cv=10) nb_conf = met.confusion_matrix(reshape_class, nb_est) print("Naive Bayes - Score %f +/-%f" % (np.mean(nb_scores), np.std(nb_scores))) print(nb_conf, "\n") qda_classifier = da.QuadraticDiscriminantAnalysis() qda_fit = qda_classifier.fit(reshape_data, reshape_class) qda_scores = ms.cross_val_score(qda_fit, reshape_data, reshape_class, cv=10) qda_est = ms.cross_val_predict(qda_fit, reshape_data, reshape_class, cv=10) qda_conf = met.confusion_matrix(reshape_class, qda_est) print("QDA - Score %f +/-%f" % (np.mean(qda_scores), np.std(qda_scores))) print(qda_conf, "\n") lda_classifier = da.LinearDiscriminantAnalysis() lda_fit = lda_classifier.fit(reshape_data, reshape_class) lda_scores = ms.cross_val_score(lda_fit, reshape_data, reshape_class, cv=10) lda_est = ms.cross_val_predict(lda_fit, reshape_data, reshape_class, cv=10) lda_conf = met.confusion_matrix(reshape_class, lda_est) print("LDA - Score %f +/-%f" % (np.mean(lda_scores), np.std(lda_scores))) print(lda_conf, "\n") plt.figure() mlxplt.plot_decision_regions(reshape_data, reshape_class, clf=nb_fit) plt.figure() mlxplt.plot_decision_regions(reshape_data, reshape_class, clf=qda_fit) plt.figure() mlxplt.plot_decision_regions(reshape_data, reshape_class, clf=lda_fit) plt.show()
def real_scores(values, target): clf_models = {} svm_clf = svm.SVC(gamma="auto").fit(values, target) clf_models[ "svm"] = svm_clf # Actually not needed, the cv does the training again lg_clf = linear_model.LogisticRegression( random_state=constants.RANDOM_STATE, solver='lbfgs').fit(values, target) clf_models["logistic_regression"] = lg_clf lineardisc_clf = discriminant_analysis.LinearDiscriminantAnalysis().fit( values, target) clf_models["linear_discriminant"] = lineardisc_clf neigh_clf = neighbors.KNeighborsClassifier().fit(values, target) clf_models["kneighbors"] = neigh_clf dectree_clf = tree.DecisionTreeClassifier( random_state=constants.RANDOM_STATE).fit(values, target) clf_models["decision_tree"] = dectree_clf gaussian_clf = naive_bayes.GaussianNB().fit(values, target) clf_models["gaussian_nb"] = gaussian_clf random_forest_clf = ensemble.RandomForestClassifier(n_estimators=100).fit( values, target) clf_models["random_forest"] = random_forest_clf gradient_boost_clf = ensemble.GradientBoostingClassifier().fit( values, target) clf_models["gradient_boosting"] = gradient_boost_clf results = {} for clf in clf_models.keys(): cv_results = cross_validate(clf_models[clf], values, target, cv=10, scoring=SCORE_RAW) results["None+{}".format(clf)] = np.mean(cv_results["test_score"]) for pproc in pprocs.keys(): try: new_values, new_target = preprocessor(pproc, values, target) except: for clf in clf_models.keys(): results["{}+{}".format(pproc, clf)] = 0 continue for clf in clf_models.keys(): try: cv_results = cross_validate(clf_models[clf], new_values, new_target, cv=10, scoring=SCORE_RAW) except ValueError: cv_results = cross_validate(clf_models[clf], values, target, cv=10, scoring=SCORE_RAW) results["{}+{}".format(pproc, clf)] = np.mean(cv_results["test_score"]) return results
def lda(): print( "\n################## Linear Discriminant Analysis ##################") from sklearn import discriminant_analysis clf = discriminant_analysis.LinearDiscriminantAnalysis() return clf
def lda(X_tra, y_tra, X_val, y_val, index_no, classifier_num): y_tra, X_tra, y_val, X_val, weights = dataRegulationSKL( y_tra, X_tra, y_val, X_val, index_no) clf = skdisa.LinearDiscriminantAnalysis(solver='svd', n_components=5) clf.fit(X_tra, y_tra) return processLearning(clf, X_tra, y_tra, X_val, y_val)
def linear_discriminant_analysis(*data): _x_train, _x_test, _y_train, _y_test = data regression = discriminant_analysis.LinearDiscriminantAnalysis() regression.fit(_x_train, _y_train) print(regression.score(_x_train, _y_train)) print(regression.score(_x_test, _y_test)) pass
def test_LinearDiscriminantAnalysis_solver(*data): ''' test score with different solver :param data: train_data, test_data, train_value, test_value :return: None ''' X_train, X_test, y_train, y_test = data solvers = ['svd', 'lsqr', 'eigen'] for solver in solvers: if (solver == 'svd'): lda = discriminant_analysis.LinearDiscriminantAnalysis( solver=solver) else: lda = discriminant_analysis.LinearDiscriminantAnalysis( solver=solver, shrinkage=None) lda.fit(X_train, y_train) print('Score at solver={0}: {1}'.format(solver, lda.score(X_test, y_test)))
def run_plot_LDA(): train_X, test_X, train_y, test_y = load_data() X = np.vstack((train_X, test_X)) Y = np.vstack((train_y.reshape(train_y.size, 1),\ test_y.reshape(test_y.size, 1))) model = discriminant_analysis.LinearDiscriminantAnalysis() model.fit(X, Y) converted_X = np.dot(X, np.transpose(model.coef_)) + model.intercept_ plot_LDA(converted_X, Y)
def __init__(self, experiment): SemiSupervisedProjection.__init__(self, experiment) self.projection = discriminant_analysis.LinearDiscriminantAnalysis( n_components=self.num_components) if not self.conf.families_supervision: message = 'Lda projection without families supervision. ' message += 'The projection space is of dimension 1, and so the projected instances cannot be displayed ' message += 'with hexagonal binnnings.' warnings.warn(message)
def calc_fitness(self, data, target): if self.changed: nfolds = 4 scores = np.zeros(nfolds) precision = np.zeros(nfolds) recall = np.zeros(nfolds) X = np.copy(data) for i in range(0, len(self.genome)): if self.genome[len(self.genome) - 1 - i] == 0: X = np.delete(X, len(self.genome) - 1 - i, 1) i = 0 skf = cross_validation.StratifiedKFold(n_splits=nfolds) for train, test in skf.split(X, target): if self.type == 'dt': self.clf = tree.DecisionTreeClassifier( criterion='entropy', splitter='random').fit(X[train], target[train]) elif self.type == 'svm': self.clf = svm.SVC(kernel='linear').fit( X[train], target[train]) elif self.type == 'knn': self.clf = knn.KNeighborsClassifier().fit( X[train], target[train]) elif self.type == 'lr': self.clf = lm.LogisticRegression().fit( X[train], target[train]) elif self.type == 'nb': self.clf = nb.GaussianNB().fit(X[train], target[train]) elif self.type == 'rf': self.clf = ens.RandomForestClassifier().fit( X[train], target[train]) elif self.type == 'et': self.clf = ens.ExtraTreesClassifier().fit( X[train], target[train]) elif self.type == 'mlp': self.clf = nn.MLPClassifier( hidden_layer_sizes=(40, 5)).fit(X[train], target[train]) elif self.type == 'lda': self.clf = da.LinearDiscriminantAnalysis().fit( X[train], target[train]) elif self.type == 'qda': self.clf = da.QuadraticDiscriminantAnalysis().fit( X[train], target[train]) else: self.clf = None p = self.clf.predict(X[test]) scores[i] = metrics.accuracy_score(target[test], p) precision[i] = metrics.precision_score(target[test], p) recall[i] = metrics.recall_score(target[test], p) i += 1 self.accuracy = scores.mean() self.std = scores.std() self.precision = precision.mean() self.recall = recall.mean() self.changed = False
def __init__(self, conf): SemiSupervisedProjection.__init__(self, conf) self.projection = discriminant_analysis.LinearDiscriminantAnalysis( n_components=conf.num_components) if not self.conf.multiclass: self.conf.logger.warning( 'Lda projection without families supervision. ' 'The projection space is of dimension 1, and so the ' 'projected instances cannot be displayed with hexagonal ' 'binnnings.')
def lda_projection(): print("Computing Linear Discriminant Analysis projection") X2 = X.copy() X2.flat[::X.shape[1] + 1] += 0.01 # Make X invertible t0 = time() X_lda = discriminant_analysis.LinearDiscriminantAnalysis(n_components=2).fit_transform(X2, y) plot_embedding(X_lda, "Linear Discriminant projection of the digits (time %.2fs)" % (time() - t0))
def test_LinearDiscriminantAnalysis(*data): ''' 测试 LinearDiscriminantAnalysis 的用法 ''' X_train, X_test, y_train, y_test = data lda = discriminant_analysis.LinearDiscriminantAnalysis() lda.fit(X_train, y_train) print('Coefficients:%s, intercept %s' % (lda.coef_, lda.intercept_)) print('Score: %.2f' % lda.score(X_test, y_test))
def rdm_ldt(data, noise=0.): # Our data is an mxnxk matrix. m = samples, n = states, k = activation. m, n, k = data.shape # add noise to the data data += np.random.normal(loc=0., scale=noise, size=data.shape) if m % 2 != 0: # discard last sample data = data[:-1] m = m-1 warnings.warn("for ldt we need an even number of samples. Discarding one sample") # Divide the data into two separate sets. set1 = data[:m//2] set2 = data[m//2:] # Run linear discriminant analysis for each pair of states... rdm = np.zeros((n, n)) for i in range(n): for j in range(i+1, n): # Get sample activations for 2 states from the training set train_state1 = set1[:, i, :].reshape(m//2, k) train_state2 = set1[:, j, :].reshape(m//2, k) # stack them: X = np.concatenate((train_state1, train_state2), axis=0) # give state 1 label "0" and state 2 label "1" y = np.hstack((np.zeros(m//2), np.ones(m//2))) # fit linear discriminant analysis lda = sklda.LinearDiscriminantAnalysis(solver='svd') lda.fit(X, y) #print("test") # save the intercept. coeffs = lda.coef_ intercept = lda.intercept_ # Get the same states from the test set test_state1 = set2[:, i, :].reshape(m//2, k) test_state2 = set2[:, j, :].reshape(m//2, k) # Compute "distance" (orthogonal vector value) with the intercept. # These "distances" (not really) will be positive or negative depending on the category distances_state1 = (np.dot(coeffs, np.transpose(test_state1)).reshape(-1) + intercept) / np.sqrt(np.sum(coeffs**2)) distances_state2 = (np.dot(coeffs, np.transpose(test_state2)).reshape(-1) + intercept) / np.sqrt(np.sum(coeffs**2)) # Now do a t-test to see if the two categories are separated. tvalue, p_value = stats.ttest_ind(distances_state1, distances_state2) distance = np.abs(tvalue) # This is our distance value for the RDM! rdm[i, j] = distance # Do the other side of the RDM, it's symmetrical. rdm[j, i] = distance # Fill the diagonal with 0s. for i in range(n): rdm[i, i] = 0. return rdm
def LDA(self, train_X, test_X, train_y, dims): lda = sk_discriminant_analysis.LinearDiscriminantAnalysis( n_components=dims) lda.fit(train_X, train_y) print('LDA:') print('LDA的数据中心点:', lda.means_) print('LDA分类的正确率:', lda.score(train_X, train_y)) train_X = lda.transform(train_X) test_X = lda.transform(test_X) return train_X, train_y, test_X
def test_LinearDiscriminantAnalysis(*data): ''' 测试 LinearDiscriminantAnalysis 的用法 :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 :return: None ''' X_train, X_test, y_train, y_test = data lda = discriminant_analysis.LinearDiscriminantAnalysis() lda.fit(X_train, y_train) print('Coefficients:%s, intercept %s' % (lda.coef_, lda.intercept_)) print('Score: %.2f' % lda.score(X_test, y_test))
def _train(self): x = self._train_features y = self._train_outputs pipe = pipeline.Pipeline([ ('expand', preprocessing.PolynomialFeatures(degree=2, )), ('estim', discriminant_analysis.LinearDiscriminantAnalysis()) ]) pipe.fit(x, y) self._model = pipe.predict
def test_LinearDiscriminantAnalysis(*data): ''' test of LDA :param data: train_data, test_data, train_value, test_value :return: None ''' X_train, X_test, y_train, y_test = data lda = discriminant_analysis.LinearDiscriminantAnalysis() lda.fit(X_train, y_train) print('Coefficients: {0}, intercept {1}'.format(lda.coef_, lda.intercept_)) print('Score: {0}'.format(lda.score(X_test, y_test)))
def lda(X, y, nr_components=2): """ Linear discrimindant analysis :param X: Input vectors :param y: Input classes :param nr_components: Dimension of output co-ordinates :return: Output co-ordinates """ print("Computing Linear Discriminant Analysis projection") X2 = X.copy() X2.flat[::X.shape[1] + 1] += 0.01 # Make X invertible return discriminant_analysis.LinearDiscriminantAnalysis(n_components=nr_components).fit_transform(X2, y)
def lda_predict(self, unlabelled): ''' Use Linear Discriminant Analysis for classification. WARNING: Will only work when we have multiple data samples for each dataset (i.e., two for left, two for right, etc.)''' self.clf = discriminant_analysis.LinearDiscriminantAnalysis() self.clf.fit(self.dataset, self.targets) unlabelled = np.array(unlabelled) unlabelled = unlabelled.flatten() target = self.clf.predict(unlabelled) return self.positions[target[0]]
def LDA_proposed(data, target, n): my_pca = My_pca(n) results = my_pca.MCPCA_fit_transform(data, target) X_train, X_test, Y_train, Y_test = train_test_split(results['new_data'], target, train_size=0.35, random_state=0) model = discriminant_analysis.LinearDiscriminantAnalysis() model.fit(X_train, Y_train) sc = model.score(X_test, Y_test) return sc
def LDA_pca(data, target, n): pca = PCA(n) data1 = pca.fit_transform(data) X_train, X_test, Y_train, Y_test = train_test_split(data1, target, train_size=0.35, random_state=0) model = discriminant_analysis.LinearDiscriminantAnalysis() model.fit(X_train, Y_train) sc = model.score(X_test, Y_test) return sc
def using_lda(X, s=None): X = X.as_matrix() print("Computing Linear Discriminant Analysis projection") X2 = X.copy() X2.flat[::X.shape[1] + 1] += 0.01 # Make X invertible t0 = time() X_lda = discriminant_analysis.LinearDiscriminantAnalysis( n_components=2).fit_transform(X2, y) #plot_embedding(X_lda,"Linear Discriminant projection of the results (time %.2fs)" %(time() - t0)) plot_our_embedding( X_lda, "Linear Discriminant projection of the results (time %.2fs)" % (time() - t0), s)