def classify_using_lda(feat1, feat2, num_comp=2): n_plus = len(feat1) n_minus = len(feat2) X = np.concatenate((feat1, feat2), axis=0) y = np.concatenate((np.zeros(n_plus), np.ones(n_minus)), axis=0) y += 1 print(X.shape, y.shape, n_plus, n_minus, feat1.shape, feat2.shape) lda = LDA(n_components=num_comp) lda.fit(X, y) # TODO FIXME Why is this returning n_samples x 1, and not n_samples x 2? # Is it able to to differentiate using just 1 component? Crazy!! X_tr = lda.transform(X) print(X_tr.shape, lda.score(X, y)) # CRAZY, we don't actually have the 2nd component from LDA X1 = np.concatenate((X_tr[0:n_plus], np.zeros((n_plus, 1))), axis=1) X2 = np.concatenate((X_tr[-n_minus:], np.ones((n_minus, 1))), axis=1) plt.plot(X1[:, 0], X1[:, 1], 'ro') plt.plot(X2[:, 0], X2[:, 1], 'g+') plt.ylim(-1, 3) plt.show()
def run_ldatest(features, assignment): # determine which one has more spikes _, n_spikes = np.unique(assignment, return_counts=True) id_big = np.argmax(n_spikes) id_small = np.argmin(n_spikes) n_diff = n_spikes[id_big] - n_spikes[id_small] if n_diff > 0: n_repeat = int(np.ceil(np.max(n_spikes) / np.min(n_spikes))) idx_big = np.where(assignment == id_big)[0] lda_probs = np.zeros(n_repeat) for j in range(n_repeat): idx_remove = np.random.choice(idx_big, n_diff, replace=False) idx_in = np.ones(len(assignment), 'bool') idx_in[idx_remove] = False # fit lda lda = LDA(n_components=1) lda.fit(features[idx_in], assignment[idx_in]) # check tp of lda lda_probs[j] = lda.score(features[idx_in], assignment[idx_in]) lda_prob = np.median(lda_probs) else: lda = LDA(n_components=1) lda.fit(features, assignment) lda_prob = lda.score(features, assignment) return lda_prob
def test_LinearDiscriminantAnalysis(self): helper = Helper() # Test with dataset type: sklearn DataBunch # Load up data iris = load_iris() X_train, y_train, X_test, y_test = iris.data[:120], iris.target[:120], iris.data[120:], iris.target[120:] # Bring in the default LinearDiscriminantAnalysis model model_name = 'lda' actual_model = helper.getBuiltModel(model_name) # Explicity create the model we expect to get from getBuiltModel call expected_model = LinearDiscriminantAnalysis() # Make sure the models are the same type before continuing self.assertEqual( type(actual_model), type(expected_model) ) # Train this default model on the iris dataset actual_model.fit(X_train, y_train) # Get default model accuracy on testing set actual_accuracy = actual_model.score(X_test, y_test) # Complete the same process, however we make the model explicitly expected_model.fit(X_train, y_train) expected_accuracy = expected_model.score(X_test, y_test) # Make sure that the accuracy reported from both models is the same self.assertEqual( actual_accuracy, expected_accuracy ) # Test with dataset type: pandas DataFrame # Load up data iris_df = load_iris(as_frame=True).frame X = iris_df.loc[:, iris_df.columns != 'target'] y = iris_df['target'] X_train, y_train = X.iloc[:120], y.iloc[:120] X_test, y_test = X.iloc[120:], y.iloc[120:] # Bring in the default LinearDiscriminantAnalysis model model_name = 'lda' actual_model = helper.getBuiltModel(model_name) # Explicity create the model we expect to get from getBuiltModel call expected_model = LinearDiscriminantAnalysis() # Make sure the models are the same type before continuing self.assertEqual( type(actual_model), type(expected_model) ) # Train this default model on the iris dataset actual_model.fit(X_train, y_train) # Get default model accuracy on testing set actual_accuracy = actual_model.score(X_test, y_test) # Complete the same process, however we make the model explicitly expected_model.fit(X_train, y_train) expected_accuracy = expected_model.score(X_test, y_test) # Make sure that the accuracy reported from both models is the same self.assertEqual( actual_accuracy, expected_accuracy )
def run_lda_2(X_train, X_test, y_train, y_test, dataset): # model = LinearDiscriminantAnalysis(solver='eigen',n_components=y_train.groupby(y_train.columns[0]).count().shape[0]) model = LinearDiscriminantAnalysis( n_components=y_train.groupby(y_train.columns[0]).count().shape[0] - 1) score_df = pd.DataFrame() # k_max = X_train.shape[1]-1 # if k_max > 120: k_max = 120 k_max = y_train.groupby(y_train.columns[0]).count().shape[0] for i in range(1, k_max): LOGGER.info('lda: k={}'.format(i)) model.set_params(n_components=i) # model = LinearDiscriminantAnalysis(n_components=i) # lda_X = model.fit_transform(X_train,y_train[y_train.columns[0]]) model.fit(X_train, y_train[y_train.columns[0]]) # lda_test_X = model.transform(X_test) # y_pred = model.predict(lda_test_X) # print(y_pred) score_df.loc[i, 'test_score'] = model.score(X_test, y_test[y_test.columns[0]]) score_df.loc[i, 'train_score'] = model.score(X_train, y_train[y_train.columns[0]]) print(score_df) model = LinearDiscriminantAnalysis( n_components=y_train.groupby(y_train.columns[0]).count().shape[0] - 1) model.fit(X_train, y_train[y_train.columns[0]]) result_df = pd.DataFrame(data=model.explained_variance_ratio_, columns=['ex_variance'], index=range(len(model.explained_variance_ratio_))) title = 'lda_explained_variance' x = 'components' y = 'variance contributed' LOGGER.info('plotting {}'.format(title)) plt.clf() plt.title(title) plt.xlabel(x) plt.ylabel(y) plt.grid() # sig_vec = [np.abs(i)/np.sum(result_df['ex_variance']) for i in result_df['ex_variance']] plt.step(range(0, result_df.shape[0]), np.cumsum(result_df['ex_variance']), label='cumulative explained variance') plt.bar(range(0, result_df.shape[0]), result_df['ex_variance'], align='center', label='explained variance')
def train(self, save_path, name, cross_validation=True, reshape=None): history = History(save_path, name) fold_index = 0 for train_dataset, test_dataset in self.feed.take( self.n_splits if cross_validation else 1): # train_dataset是训练集,test_dataset是测试集。train_dataset[0]和test_dataset[0]是data,四维数组;train[1]和test[1]是label,二维数组。 fold_index += 1 pca_model = PCA(n_components=self.pca_components) lda_model = LinearDiscriminantAnalysis() train_data = numpy.array(train_dataset[0]) train_label = numpy.array(train_dataset[1]) test_data = numpy.array(test_dataset[0]) test_label = numpy.array(test_dataset[1]) pca_model.fit(train_data.reshape((train_data.shape[0], -1))) train_pc = pca_model.transform( train_data.reshape((train_data.shape[0], -1))) lda_model.fit(train_pc, train_label) # model.load_weights(os.path.join(save_path, 'test_weights.h5')) r = {} r['acc'] = lda_model.score(train_pc, train_label) print('acc = ' + str(r['acc'])) test_pc = pca_model.transform( test_data.reshape((test_data.shape[0], -1))) r['val_acc'] = lda_model.score(test_pc, test_label) print('val_acc = ' + str(r['val_acc'])) history.add(str(fold_index), r) # Save weights to a HDF5 file # self.model.save(self.save_path) history.save()
def problem2(): data = readFileFunction() features = [] labels = [] for row in data: features.append(row[:-1]) labels.append(row[-1]) training_set1 = np.array(features[0:40]+features[50:90]+features[100:140]) testing_set1 = np.array(features[40:50]+features[90:100]+features[140:150]) labels_1 = np.array(labels[0:40]+labels[50:90]+labels[100:140]) correct_labs_1 = np.array(labels[40:50]+labels[90:100]+labels[140:150]) print(training_set1.shape) print(testing_set1.shape) print(labels_1.shape) lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) lda.fit(training_set1, labels_1) score1_1 = lda.score(training_set1, correct_labs_1) score1_2 = lda.score(testing_set1, correct_labs_1) print(score1_1,score1_2)
class LDA_Agent(AgentMET4FOF): def init_parameters(self, incremental = True): self.ml_model = LinearDiscriminantAnalysis(n_components=3,priors=None, shrinkage=None, solver='eigen') self.incremental = incremental def reformat_target(self, target_vector): class_target_vector=np.ceil(target_vector[0]) for i in class_target_vector.index: if class_target_vector[i]==0: class_target_vector[i]=1 #Fixing the zero element. return np.array(class_target_vector) def on_received_message(self, message): self.log_info("MODE : "+ message['channel']) if message['channel'] == 'train': if self.incremental: #message['data']['target'] = message['data']['target'][0] message['data']['target'] = self.reformat_target(message['data']['target']) self.buffer_store(agent_from=message['from'], data=message['data']) y_true = self.buffer[list(self.buffer.keys())[0]]['target'] x = np.array(self.buffer[list(self.buffer.keys())[0]]['quantities']) else: y_true = self.reformat_target(message['data']['target']) x = message['data']['quantities'] self.ml_model = self.ml_model.fit(x, y_true) self.log_info("Overall Train Score: " + str(self.ml_model.score(x, y_true))) elif message['channel'] == 'test': y_true = self.reformat_target(message['data']['target']) y_pred = self.ml_model.predict(message['data']['quantities']) self.send_output({'y_pred':y_pred, 'y_true': y_true}) self.log_info("Overall Test Score: " + str(self.ml_model.score(message['data']['quantities'], y_true))) self.lda_test_score = self.ml_model.score(message['data']['quantities'], y_true)
def batch(): x, y = CSP.load_data() kv = BCI.gen_kv_idx(y, 10) for train_idx, test_idx in kv: x_train, y_train = x[train_idx], y[train_idx] x_test, y_test = x[test_idx], y[test_idx] fb_mi = fb_mibif(x_train, y_train) #ts_mi = ts_mibif(x_train, y_train) fb_idx = np.argmax(fb_mi) #ts_idx = np.argmax(ts_mi) fb_csp = CSP.filterbank_CSP(x_train) #ts_csp = CSP.temporal_spectral_CSP(x_train) fb_x = mi_selector(fb_csp, fb_idx) ts_x = mi_selector(ts_csp, fb_idx) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis fb_lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') ts_lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') fb_lda.fit(fb_x, y_train) ts_lad.fit(ts_x, y_train) fb_x_t = mi_selector(CSP.filterbank_CSP(x_test), fb_x) #ts_x_t = mi_selector(CSP.temporal_spectral_CSP(x_test), ts_mi) fb_score = fb_lda.score(fb_x_t, y_test) ts_score = ts_lda.score(ts_x_t, y_test) print(fb_score) print(ts_score)
def plot_lda(): n_train = 20 # samples for training n_test = 200 # samples for testing n_averages = 50 # how often to repeat classification n_features_max = 75 # maximum number of features step = 4 # step size for the calculation acc_clf1, acc_clf2, acc_clf3 = [], [], [] n_features_range = range(1, n_features_max + 1, step) for n_features in n_features_range: score_clf1, score_clf2, score_clf3 = 0, 0, 0 for _ in range(n_averages): X, y = generate_data(n_train, n_features) clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y) clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y) oa = OAS(store_precision=False, assume_centered=False) clf3 = LinearDiscriminantAnalysis(solver='lsqr', covariance_estimator=oa).fit( X, y) X, y = generate_data(n_test, n_features) score_clf1 += clf1.score(X, y) score_clf2 += clf2.score(X, y) score_clf3 += clf3.score(X, y) acc_clf1.append(score_clf1 / n_averages) acc_clf2.append(score_clf2 / n_averages) acc_clf3.append(score_clf3 / n_averages) features_samples_ratio = np.array(n_features_range) / n_train plt.plot(features_samples_ratio, acc_clf1, linewidth=2, label="Linear Discriminant Analysis with Ledoit Wolf", color='navy') plt.plot(features_samples_ratio, acc_clf2, linewidth=2, label="Linear Discriminant Analysis", color='gold') plt.plot(features_samples_ratio, acc_clf3, linewidth=2, label="Linear Discriminant Analysis with OAS", color='red') plt.xlabel('n_features / n_samples') plt.ylabel('Classification accuracy') plt.legend(loc=3, prop={'size': 12}) plt.suptitle('Linear Discriminant Analysis vs. ' + '\n' + 'Shrinkage Linear Discriminant Analysis vs. ' + '\n' + 'OAS Linear Discriminant Analysis (1 discriminative feature)') plt.show()
def linear_discriminant(train_set, test_set): train_feature, train_label, test_feature, test_label = feature_selection( train_set, test_set) lda = LinearDiscriminantAnalysis() # lda.fit(train_feature, train_label) y_pred = lda.fit(train_feature, train_label).predict(test_feature) training_score = lda.score(train_feature, train_label) accuracy = lda.score(test_feature, test_label) return accuracy, training_score, y_pred, test_label
def Lineal(self): lda = LinearDiscriminantAnalysis() model = lda.fit(self.x_train, self.y_train.ravel()) pred_lda = model.predict(self.x_eval) acc_trining_LDA = lda.score(self.x_train, self.y_train) acc_test_LDA = lda.score(self.x_eval, self.y_eval) #print("Matriz de confusion de LDA ", confusion_matrix(pred_lda, Y_test)) return classification_report(self.y_eval, pred_lda, digits=3), acc_trining_LDA, acc_test_LDA
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## Plots ## ph = plot_helper() scores = [] train_scores = [] rng = range(1, X_train_scl.shape[1]+1) for i in rng: lda = LinearDiscriminantAnalysis(n_components=i) cv = KFold(X_train_scl.shape[0], 3, shuffle=True) # cross validation cv_scores = [] for (train, test) in cv: lda.fit(X_train_scl[train], y_train[train]) score = lda.score(X_train_scl[test], y_train[test]) cv_scores.append(score) mean_score = np.mean(cv_scores) scores.append(mean_score) # train score lda = LinearDiscriminantAnalysis(n_components=i) lda.fit(X_train_scl, y_train) train_score = lda.score(X_train_scl, y_train) train_scores.append(train_score) print(i, mean_score) ## ## Score Plot ## title = 'Score Summary Plot (LDA) for ' + data_set_name name = data_set_name.lower() + '_lda_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [scores, train_scores], [None, None], ['cross validation score', 'training score'], cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title, 'n_components', 'Score', filename)
def load_csp(count): with open('csp_data/A0' + str(count) + 'T.npz.pic', 'rb') as f: res = pickle.load(f) for i in range(5): print(i) x_train = res[i]['train']['x'] y_train = res[i]['train']['y'] y_test = res[i]['test']['y'] fb_csp = res[i]['train']['fbcsp'] fb_mi = MIBIF.fb_mibif_with_csp(x_train, y_train, fb_csp) fb_idx = np.argmin(fb_mi) fb_x = MIBIF.mi_selector(fb_csp, fb_idx) fb_x_t = MIBIF.mi_selector(res[i]['test']['fbcsp'], fb_idx) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis fb_lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') fb_lda.fit(fb_x, y_train.argmax(axis=1)) fb_score = fb_lda.score(fb_x_t, y_test.argmax(axis=1)) lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') lda.fit(res[i]['train']['csp'], y_train.argmax(axis=1)) score = fb_lda.score(res[i]['test']['csp'], y_test.argmax(axis=1)) ts_csp = res[i]['train']['tscsp'] st_csp = res[i]['train']['stcsp'] ts_mi = MIBIF.fb_mibif_with_csp(x_train, y_train, ts_csp) st_mi = MIBIF.fb_mibif_with_csp(x_train, y_train, st_csp) ts_idx = np.argmin(ts_mi) st_idx = np.argmin(st_mi) ts_x = MIBIF.mi_selector(ts_csp, ts_idx) st_x = MIBIF.mi_selector(st_csp, st_idx) ts_x_t = MIBIF.mi_selector(res[i]['test']['tscsp'], ts_idx) st_x_t = MIBIF.mi_selector(res[i]['test']['stcsp'], st_idx) ts_lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') ts_lda.fit(ts_x, y_train.argmax(axis=1)) ts_score = ts_lda.score(ts_x_t, y_test.argmax(axis=1)) st_lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') st_lda.fit(fb_x, y_train.argmax(axis=1)) st_score = fb_lda.score(fb_x_t, y_test.argmax(axis=1)) pen = open('csp_res_' + str(count) + '.csv', 'a') pen.write( str(i) + ',' + str(score) + ',' + str(fb_score) + ',' + str(ts_score) + ',' + str(st_score) + '\n')
class LinearDiscriminantAnalysiscls(object): """docstring for ClassName""" def __init__(self): self.lda_cls = LinearDiscriminantAnalysis() self.prediction = None self.train_x = None self.train_y = None def train_model(self, train_x, train_y): try: self.train_x = train_x self.train_y = train_y self.lda_cls.fit(train_x, train_y) except: print(traceback.format_exc()) def predict(self, test_x): try: self.test_x = test_x self.prediction = self.lda_cls.predict(test_x) return self.prediction except: print(traceback.format_exc()) def accuracy_score(self, test_y): try: # return r2_score(test_y, self.prediction) return self.lda_cls.score(self.test_x, test_y) except: print(traceback.format_exc())
def intersubjective_shallow(data, model_name): x_train, y_train, x_test, y_test, o_t_test, o_tr_test = data x_train, y_train, x_test, y_test = resample_transform( (x_train, y_train, x_test, y_test), resample=False) global t_test t_test = o_t_test global tr_test tr_test = o_tr_test x_train = x_train.reshape(x_train.shape[0], -1) x_test = x_test.reshape(x_test.shape[0], -1) m = [ 'acc', 'val_acc', 'val_precisions', 'val_recalls', 'val_f1s', 'val_aucs', 'val_balanced_acc', 'val_recognition_acc', 'val_bpm' ] metrics = {key: [] for key in m} history = False if 'svm' in model_name: clf = svm.LinearSVC(random_state=0) elif 'lda' in model_name: if 'shrinkage' in model_name: clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') else: clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None) clf.fit(x_train, y_train) y_predict = clf.predict(x_test) probs = clf.decision_function(x_test) metrics['acc'].append(clf.score(x_train, y_train)) metrics = compute_metrics(metrics, probs, y_predict, y_test) cnf_matrix = confusion_matrix(y_test, y_predict) return metrics, history, cnf_matrix, clf
def LDA(X, y, r): tri = LinearDiscriminantAnalysis() sc = cross_val_score(tri , X, y, cv=5) tri.fit(X, y) print('CV', np.mean(sc),tri.score(X,y)) # G=tri.feature_importances_ return np.mean(sc), tri
def discriminant_analysis_models(x_train, y_train): from sklearn.discriminant_analysis import LinearDiscriminantAnalysis classifier1 = LinearDiscriminantAnalysis() classifier1.fit(x_train, y_train) print('LinearDiscriminantAnalysis training accuracy: ', classifier1.score(x_train, y_train)) return classifier1
def SDE(filePath, nfold, numLearn, numWave): variableName = "afterSGSmooth" mat = sio.loadmat(filePath) data = mat[variableName] newData = data[:256, :].T label = data[256, :].T matrix = [] acc = 0 kf = KFold(n_splits=nfold, shuffle=True) for trainIdx, testIdx in kf.split(label): for i in np.arange(0, numLearn, 1): #多少个弱分类器 list = np.arange(0, 256, 1).tolist() ramList = random.sample(list, numWave) #选多少个波段 newData = newData[:, ramList] newLabel = label X_train, X_test, y_train, y_test = \ newData[trainIdx, :], newData[testIdx, :], newLabel[trainIdx], newLabel[testIdx] lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) matrix.append(confusion_matrix(y_test, lda.predict(X_test))) acc += lda.score(X_test, y_test, sample_weight=None) return acc / (nfold * numLearn), matrix
def get_LDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0): lda = LDA() lda.fit(Xtrain,Ytrain) scores = np.empty((4)) if (verbose == 1): scores = np.empty((2)) scores[0] = lda.score(Xtrain,Ytrain) print('LDA, train: {0:.02f}% '.format(scores[0]*100)) if (type(Xtest) != type(None)): scores[1] = lda.score(Xtest,Ytest) print('LDA, test: {0:.02f}% '.format(scores[1]*100)) return lda
def lda_classifier(data): data.loc[data['student'] == 'Yes', ['student']] = 1 data.loc[data['student'] == 'No', ['student']] = 0 data.loc[data['default'] == 'Yes', ['default']] = 1 data.loc[data['default'] == 'No', ['default']] = 0 # print(data[['Rating', 'Income']]) data = data.values x_train = data[:-30, [1, 2, 3]] x_test = data[-30:, [1, 2, 3]] y_train = data[:-30, 0].astype('int') y_test = data[-30:, 0].astype('int') sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) # x_train = data[:, [1, 2, 3]] # y_train = data[:, 0].astype('int') lda = LDA(n_components=1) x_train_lda = lda.fit_transform(x_train, y_train) x_test_lda = lda.transform(x_test) print(x_train_lda) lda.fit(x_train_lda, y_train) # x_train_lda = lda.fit_transform(x_train, y_train) # return x_train_lda, y_train y_pre = lda.predict(x_test_lda) score = lda.score(x_test_lda, y_test) return x_test_lda, y_pre, y_test
def LDA(targetData, featureData): from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA #Definir y hacer fit la data estandarizada (estandarizar data previo a la ejecucion de la funcion) lda = LDA() ld = lda.fit_transform(featureData, targetData) lda_df = pd.DataFrame(data=ld, columns=['LDA1', 'LDA2']) lda_df['Cluster'] = targetData #Imprimir los resultados de la clasificacion del Training Data print('Accuracy of LDA classifier on training set: {:.2f}'.format( lda.score(featureData, targetData))) lda_df.head() lda.predict(featureData) # Scatter plot del primer y segundo LDA sns.lmplot( x="LDA1", y="LDA2", data=lda_df, fit_reg=False, hue='Cluster', # color por cluster legend=True, scatter_kws={"s": 80}) # especificar el tamaño del punto
def run_thingy(X, y, name): X_train, X_test, y_train, y_test = pre.train_test_split(X, y, stratify=y) accuracy_arr = [] for i in range(1): ica = LinearDiscriminantAnalysis(n_components=i + 1) X_transformed = ica.fit_transform(X_train, y_train) print(ica.score(X_test, y_test))
def discriminator(self, IQ_012_data): # IQベクトルを作成します(実部と虚部で構成されています) zero_data_reshaped = self.reshape_complex_vec(IQ_012_data[0]) one_data_reshaped = self.reshape_complex_vec(IQ_012_data[1]) two_data_reshaped = self.reshape_complex_vec(IQ_012_data[2]) IQ_012_data_copy = np.concatenate( (zero_data_reshaped, one_data_reshaped, two_data_reshaped)) # (テスト用に)0と1と2の値が含まれたベクトルを構築します state_012 = np.zeros(self.shots) # 実験のショット数 state_012 = np.concatenate((state_012, np.ones(self.shots))) state_012 = np.concatenate((state_012, 2 * np.ones(self.shots))) # データをシャッフルして学習用セットとテスト用セットに分割します IQ_012_train, IQ_012_test, state_012_train, state_012_test = train_test_split( IQ_012_data_copy, state_012, test_size=0.5) # LDAを設定します LDA_012 = LinearDiscriminantAnalysis() LDA_012.fit(IQ_012_train, state_012_train) # 精度を計算します score_012 = LDA_012.score(IQ_012_test, state_012_test) print(score_012) return LDA_012
def lda(encoder, x_train, y_train, train_label): ''' total LDA analysis, outputs plots and can return a variable too if needed lda(encoder, x_train, y_train, train_label) ''' from sklearn.discriminant_analysis import LinearDiscriminantAnalysis z_mean_pred, z_sig, z_label_pred, z_pred = encoder.predict( [x_train, y_train], batch_size=16) sklearn_lda = LinearDiscriminantAnalysis() y = np.array(train_label) z_pred = pd.DataFrame(z_pred) sklearn_lda = sklearn_lda.fit(z_pred, y) X_lda = sklearn_lda.transform(z_pred) print(len(X_lda[0])) score = sklearn_lda.score(z_pred, y) print('accruacy', score) label_dict = {1: 'Healthy', 2: 'At risk of SCZ', 3: 'Depression', 4: 'SCZ'} from CVAE_3Dplots import lda_densityplot lda_densityplot(X_lda, y, 'STUDYGROUP', sklearn_lda) from CVAE_3Dplots import plot_lda_cluster plot_lda_cluster(X_lda, y, '', label_dict, sklearn_lda) importance = pd.DataFrame(sklearn_lda.scalings_) print(sklearn_lda.explained_variance_ratio_) print(importance.shape) #print(sklearn_lda.confusion_matrix) exp_var = sklearn_lda.explained_variance_ratio_.tolist() importance.loc[len(importance)] = exp_var importance = importance.abs() # removing all negative numbers importance['totals'] = (importance[0] * importance.iloc[50, 0]) + ( importance[1] * importance.iloc[50, 1]) + (importance[2] * importance.iloc[50, 2]) importance = importance.sort_values(by=['totals'], ascending=False) return importance
def lda(X_train, y_train, X_test, y_test): lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) lda_score = lda.score(X_test, y_test) print('LDA: ' + str(lda_score)) return lda_score
def main(): dataset = pd.read_csv("shuttle.csv", header=None).values.astype(np.int32, copy=False) data_train = dataset[0:int(len(dataset) * 0.6)] data_test = dataset[int(len(dataset) * 0.6) + 1:] x, y = np.array([]), np.array([]) for row in dataset: if (row[-1] == 4 or row[-1] == 5): x = np.vstack( (x, [row[3], row[6]])) if len(x) != 0 else [row[3], row[6]] y = np.append(y, row[-1] - 4) #<class 'list'>: [11478, 13, 39, 2155, 809, 4, 2] => 4, 5 lda = LDA(solver="svd", store_covariance=True) splot = visualization(dataset[:, 3], dataset[:, 6], dataset[:, -1]) splot = plot_data(lda, x, y, lda.fit(x, y).predict(x)) plt.axis('tight') plt.show() lda = lda.fit(data_train[:, :-1], data_train[:, -1]) lda = lda.score(data_test[:, :-1], data_test[:, -1]) qda = QDA(store_covariances=True) qda = qda.fit(data_train[:, :-1], data_train[:, -1]) qda = qda.score(data_test[:, :-1], data_test[:, -1]) print("Linear Discriminant Analysis: ", lda) print("Quadratic Discriminant Analysis: ", qda)
def linearDiscriminantAnalysis_model(X_train, X_test, y_train, y_test): t0 = time() # Create classifier model = LinearDiscriminantAnalysis() # Fit the classifier on the training features and labels. t0 = time() model.fit(X_train, y_train) print('\nPerfomance Report:\n') print("Training time:", round(time() - t0, 3), "s") # Predicting using X_test_norm t1 = time() y_pred = model.predict(X_test) print("Prediction time:", round(time() - t1, 3), "s\n") # Computing for the accuracy, precision, & recall result = model.score(X_test, y_test) print("Accuracy: {:.2%}".format(result), '\n') # Diplay performance metrics model_evaluation(y_test, y_pred) print("\nNumber of mislabeled points out of a total %d points : %d" % (X_test.shape[0], (y_test != y_pred).sum())) return y_pred
def main(): # load the data from external load_mnist.py script train_X, train_y, test_X, test_y = mnist(noTrSamples=400, noTsSamples=100, digit_range=[5, 8], noTrPerClass=200, noTsPerClass=50) # get pca data pca_train_X, e_pca_train_X = pca(train_X, dim=10) pca_test_X, e_pca_test_X = pca(test_X, dim=10) # apply Fishers Linear Discriminant to project PCA trained data pca_mnist_fld = FLD(pca_train_X, train_y, dim=1) # fit the FLD process pca_mnist_fld.fit() # compute training accuracy train_acc = pca_mnist_fld.train_accuracy() # compute test accuracy test_acc = pca_mnist_fld.test_accuracy(pca_test_X, test_y) # Display training and test accuracy print(f'Training accuracy : {train_acc}') print(f'Test accuracy : {test_acc}') clf = LinearDiscriminantAnalysis() # print(pca_train_X.T.shape, train_y.T.flatten().shape) # (400, 10) (400,) clf.fit(pca_train_X.T, train_y.T.flatten()) print('From sklearn') print( 'Training accuracy : ', clf.fit(pca_train_X.T, train_y.T.flatten()).score(pca_train_X.T, train_y.T.flatten())) print('Test accuracy : ', clf.score(pca_test_X.T, test_y.T.flatten()))
def classifiers(trainingRatingmatrix, testingRatingMatrix, trainLabel, testLabel): ### logistic regression clf = LogisticRegression(solver='lbfgs', multi_class='multinomial', class_weight='balanced').fit( trainingRatingmatrix, trainLabel) acc = clf.score(testingRatingMatrix, testLabel) prediction = clf.predict(testingRatingMatrix) print("LOgictic Regression Accuracy", acc, "RMSE", sklearnRMSE(prediction, testLabel), "NMAE:", NMAE(prediction, testLabel)) ## LDA--------------------------------------------------------------- from sklearn.discriminant_analysis import LinearDiscriminantAnalysis clf2 = LinearDiscriminantAnalysis(solver='svd') clf2.fit(trainingRatingmatrix, trainLabel) acc = clf2.score(testingRatingMatrix, testLabel) LDA_prediction = clf2.predict(testingRatingMatrix) print("LDA::Accuracy", acc, "RMSE", sklearnRMSE(LDA_prediction, testLabel), "NMAE:", NMAE(LDA_prediction, testLabel)) ##PCA--------------------------------------------------------------- pca = PCA(n_components=1000) pca.fit(trainingRatingmatrix) PCA_train = pca.transform(trainingRatingmatrix) PCA_test = pca.transform(testingRatingMatrix) clf = LogisticRegression(solver='lbfgs', multi_class='multinomial') clf.fit(PCA_train, trainLabel) acc = clf.score(PCA_test, testLabel) prediction = clf.predict(PCA_test) print("PCA: Accuracy", acc, "RMSE", sklearnRMSE(prediction, testLabel), "NMAE:", NMAE(prediction, testLabel)) ## MLP classifier--------------------------------------------------------------- clf_mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 100), random_state=1) clf_mlp.fit(trainingRatingmatrix, trainLabel) acc = clf_mlp.score(testingRatingMatrix, testLabel) prediction = clf_mlp.predict(testingRatingMatrix) print("MLP: Accuracy", acc, "RMSE", sklearnRMSE(prediction, testLabel), "NMAE:", NMAE(prediction, testLabel)) ## ELM--------------------------------------------------------------- nh = 100 srhl_rbf = RBFRandomLayer(n_hidden=nh * 2, rbf_width=0.1, random_state=0) name = ["rbf(0.1))"] classifiers = [GenELMClassifier(hidden_layer=srhl_rbf)] for classifier, clf in zip(name, classifiers): clf.fit(trainingRatingmatrix, trainLabel) prediction = clf.predict(testingRatingMatrix) score = clf.score(testingRatingMatrix, testLabel) print('ELM Model %s Accuracy: %s' % (classifier, score), "RMSE", sklearnRMSE(prediction, testLabel), "NMAE", NMAE(prediction, testLabel)) ######## print( "===========================================================================" )
def classification_temporal(sub): import RCNN, CNN for i in range(1, 6): train_data = scipy.io.loadmat('RCNN2/twist_rev_mv/' + sub + '_' + str(i) + '_train.mat') test_data = scipy.io.loadmat('RCNN2/twist_rev_mv/' + sub + '_' + str(i) + '_test.mat') train_x = x_translator2(train_data['train'][0][0][0]) train_y = np.transpose(train_data['train'][0][0][1]) test_x = x_translator2(test_data['test'][0][0][0]) test_y = np.transpose(test_data['test'][0][0][1]) show_pca(train_x, test_x, train_y, test_y, sub + '_' + str(i)) train_x = tans(train_x) test_x = tans(test_x) lda = LinearDiscriminantAnalysis(n_components=2) lda.fit(train_x, train_y.argmax(axis=1)) train_x = lda.transform(train_x) test_x = lda.transform(test_x) model = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') #model = CNN.create_3d_model((9, 18, 5, 1)) model.fit(train_x, train_y.argmax(axis=1), shuffle=True) score = model.score(test_x, test_y.argmax(axis=1)) pen = open('LDA_seg_mv_res_L.csv', 'a') pen.write('LDA,' + sub + ',' + str(i) + ',' + str(score) + '\n') pen.close()
def getBoardAccuracySetting2(Bn, step): RList = ['R1', 'R2', 'R3', 'R4'] acc = [] if Bn in ['B4', 'B5']: loop_Rn = ['R1'] else: loop_Rn = ['R1', 'R2', 'R3'] for i, Rn in enumerate(loop_Rn): print("{} Repeat:".format(Bn) + Rn) train_data = [] train_label = [] for j, item in enumerate(dic[Bn][Rn]): # print('\r{}:{}/{}'.format(Bn, j + 1, len(dic[Bn]['R1'])), end=" ") x, y = getDataFromFile(item, 400, step) train_data.append(x) train_label.append(y) clf = LinearDiscriminantAnalysis() clf.fit(train_data, train_label) test_data = [] test_label = [] for j, item in enumerate(dic[Bn][RList[i + 1]]): # print('\r{}:{}/{}'.format(Bn, j + 1, len(dic[Bn]['R1'])), end=" ") x, y = getDataFromFile(item, 400, step) test_data.append(x) test_label.append(y) acc_tmp = clf.score(test_data, test_label) acc.append(acc_tmp) if Bn in ['B4', 'B5']: acc = acc + [0, 0] return acc
def linear_discriminant_analysis_none(x_train, y_train, x_test, y_test): import operator from sklearn.discriminant_analysis import LinearDiscriminantAnalysis linear = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None) linear.fit(x_train, y_train) value = linear.score(x_test, y_test) return "{0:.2f}".format(value)
def lda_performance(train, test, standardize=False, shrinkage=False, interact_terms=1): '''Note that train and test are lists of Creatures because we need to choose whether or not to standardize their position and orientation ''' if standardize: for c in train: c.standardize() for c in test: c.standardize() trainb = make_vectors.make_bunch(train) testb = make_vectors.make_bunch(test) (x_train, x_test, y_train, y_test) = (trainb.data, testb.data, trainb.target, testb.target) pol1 = PolynomialFeatures(interact_terms) pol2 = PolynomialFeatures(interact_terms) x_train_poly = pol1.fit_transform(x_train) x_test_poly = pol2.fit_transform(x_test) shrinkage = 'auto' if shrinkage else False lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=shrinkage) lda.fit(x_train_poly, y_train) return lda.score(x_test_poly, y_test)
def assess_embedding(to_vec): """ Returns LDA classification score and projected data """ (x_data, y_data) = get_x_y_matrices(to_vec) lda = LDA(n_components=2) x_prime = lda.fit_transform(x_data, y_data) score = lda.score(x_data, y_data) return (x_prime.reshape(26, ), y_data, score)
scores_windows = [] for train_idx, test_idx in cv_split: y_train, y_test = labels[train_idx], labels[test_idx] X_train = csp.fit_transform(epochs_data_train[train_idx], y_train) X_test = csp.transform(epochs_data_train[test_idx]) # fit classifier lda.fit(X_train, y_train) # running classifier: test classifier on sliding window score_this_window = [] for n in w_start: X_test = csp.transform(epochs_data[test_idx][:, :, n:(n + w_length)]) score_this_window.append(lda.score(X_test, y_test)) scores_windows.append(score_this_window) # Plot scores over time w_times = (w_start + w_length / 2.) / sfreq + epochs.tmin plt.figure() plt.plot(w_times, np.mean(scores_windows, 0), label='Score') plt.axvline(0, linestyle='--', color='k', label='Onset') plt.axhline(0.5, linestyle='-', color='k', label='Chance') plt.xlabel('time (s)') plt.ylabel('classification accuracy') plt.title('Classification score over time') plt.legend(loc='lower right') plt.show()
#models.append(('SVC', SVC(probability=True))) # Evaluate each model in turn results = [] names = [] for name, model in models: cv_results = cross_val_score(model, X, Y, cv=kfold, n_jobs=processors) results.append(cv_results) names.append(name) print("{0}: ({1:.3f}) +/- ({2:.3f})".format(name, cv_results.mean(), cv_results.std())) clf = LinearDiscriminantAnalysis() clf.fit(X,Y) clf.score(X,Y) # In[58]: # clf = LogisticRegression() # clf.fit(X,Y) # clf.score(X,Y) # In[73]: get_ipython().magic('pinfo cross_val_score') # In[ ]:
from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier() knn.fit(X_train, y_train) print('Accuracy of K-NN classifier on training set: {:.2f}' .format(knn.score(X_train, y_train))) print('Accuracy of K-NN classifier on test set: {:.2f}' .format(knn.score(X_test, y_test))) # Linear Discriminant Analysis from sklearn.discriminant_analysis import LinearDiscriminantAnalysis lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) print('Accuracy of LDA classifier on training set: {:.2f}' .format(lda.score(X_train, y_train))) print('Accuracy of LDA classifier on test set: {:.2f}' .format(lda.score(X_test, y_test))) # Gaussian Naive Bayes from sklearn.naive_bayes import GaussianNB gnb = GaussianNB() gnb.fit(X_train, y_train) print('Accuracy of GNB classifier on training set: {:.2f}' .format(gnb.score(X_train, y_train))) print('Accuracy of GNB classifier on test set: {:.2f}' .format(gnb.score(X_test, y_test))) # Support Vector Machine from sklearn.svm import SVC
def fit(self, X, y): # validate X, y = check_X_y(X, y, allow_nd=True) X = sklearn.utils.validation.check_array(X, allow_nd=True) # set internal vars self.classes_ = unique_labels(y) self.X_ = X self.y_ = y ################################################## # split X into train and test sets, so that # grid search can be performed on train set only seed = 7 np.random.seed(seed) #X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X, y, test_size=0.25, random_state=seed) for epoch_trim in self.epoch_bounds: for bandpass in self.bandpass_filters: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed) # X_train = np.copy(X_TRAIN) # X_test = np.copy(X_TEST) # y_train = np.copy(y_TRAIN) # y_test = np.copy(y_TEST) # separate out inputs that are tuples bandpass_start,bandpass_end = bandpass epoch_trim_start,epoch_trim_end = epoch_trim # bandpass filter coefficients b, a = butter(5, np.array([bandpass_start, bandpass_end])/(self.sfreq*0.5), 'bandpass') # filter and crop TRAINING SET X_train = self.preprocess_X(X_train, b, a, epoch_trim_start, epoch_trim_end) # validate X_train, y_train = check_X_y(X_train, y_train, allow_nd=True) X_train = sklearn.utils.validation.check_array(X_train, allow_nd=True) # filter and crop TEST SET X_test = self.preprocess_X(X_test, b, a, epoch_trim_start, epoch_trim_end) # validate X_test, y_test = check_X_y(X_test, y_test, allow_nd=True) X_test = sklearn.utils.validation.check_array(X_test, allow_nd=True) ########################################################################### # self-tune CSP to find optimal number of filters to use at these settings [best_num_filters, best_num_filters_score] = self.self_tune(X_train, y_train) # as an option, we could tune optimal CSP filter num against complete train set #X_tune = self.preprocess_X(X, b, a, epoch_trim_start, epoch_trim_end) #[best_num_filters, best_num_filters_score] = self.self_tune(X_tune, y) # now use this insight to really fit with optimal CSP spatial filters """ reg : float | str | None (default None) if not None, allow regularization for covariance estimation if float, shrinkage covariance is used (0 <= shrinkage <= 1). if str, optimal shrinkage using Ledoit-Wolf Shrinkage ('ledoit_wolf') or Oracle Approximating Shrinkage ('oas'). """ transformer = CSP(n_components=best_num_filters, reg='ledoit_wolf') transformer.fit(X_train, y_train) # use these CSP spatial filters to transform train and test spatial_filters_train = transformer.transform(X_train) spatial_filters_test = transformer.transform(X_test) # put this back in as failsafe if NaN or inf starts cropping up # spatial_filters_train = np.nan_to_num(spatial_filters_train) # check_X_y(spatial_filters_train, y_train) # spatial_filters_test = np.nan_to_num(spatial_filters_test) # check_X_y(spatial_filters_test, y_test) # train LDA classifier = LinearDiscriminantAnalysis() classifier.fit(spatial_filters_train, y_train) score = classifier.score(spatial_filters_test, y_test) print "current score",score print "bandpass:"******"epoch window:",epoch_trim_start,epoch_trim_end print best_num_filters,"filters chosen" # put in ranked order Top 10 list idx = bisect(self.ranked_scores, score) self.ranked_scores.insert(idx, score) self.ranked_scores_opts.insert(idx, dict(bandpass=bandpass,epoch_trim=epoch_trim,filters=best_num_filters)) self.ranked_classifiers.insert(idx,classifier) self.ranked_transformers.insert(idx,transformer) if len(self.ranked_scores) > self.num_votes: self.ranked_scores.pop(0) if len(self.ranked_scores_opts) > self.num_votes: self.ranked_scores_opts.pop(0) if len(self.ranked_classifiers) > self.num_votes: self.ranked_classifiers.pop(0) if len(self.ranked_transformers) > self.num_votes: self.ranked_transformers.pop(0) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print " T O P ", self.num_votes, " C L A S S I F I E R S" print #j=1 for i in xrange(len(self.ranked_scores)): print i,",",round(self.ranked_scores[i],4),",", print self.ranked_scores_opts[i] # finish up, set the flag to indicate "fitted" state self.fit_ = True # Return the classifier return self
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.cross_validation import train_test_split solver = 'svd' total_score = 0 stop = 100 for x in range(stop): clf = LinearDiscriminantAnalysis(solver=solver) data = win.getStudents() data_train, data_test = train_test_split(data, test_size=0.2) data_train_labels = [s.spec for s in data_train] data_test_labels = [s.spec for s in data_test] data_train = [s.grades for s in data_train] data_test = [s.grades for s in data_test] clf.fit(data_train, data_train_labels) total_score += clf.score(data_test, data_test_labels) total_score = total_score / stop print('all') print(total_score) specs = ['FK', 'FM', 'MN', 'OE'] for sp in specs: total_score = 0 total_sensitivity = 0 total_specificity = 0 total_precision = 0 total_npv = 0 total_prevalence = 0 for x in range(stop): sensitivity = 0 specificity = 0
hf5.create_array('/ancillary_analysis', 'r_spearman', r_spearman) hf5.create_array('/ancillary_analysis', 'p_spearman', p_spearman) hf5.create_array('/ancillary_analysis', 'lda_palatability', lda_palatability) hf5.flush() # --------End palatability calculation---------------------------------------------------------------------------- #---------Isotonic (ordinal) regression of firing against palatability-------------------------------------------- r_isotonic = np.zeros((unique_lasers.shape[0], palatability.shape[0], palatability.shape[1])) for i in range(unique_lasers.shape[0]): for j in range(palatability.shape[0]): for k in range(palatability.shape[1]): model = IsotonicRegression(increasing = "auto") model.fit(palatability[j, k, trials[i]], response[j, k, trials[i]]) r_isotonic[i, j, k] = model.score(palatability[j, k, trials[i]], response[j, k, trials[i]]) # Save this array to file hf5.create_array('/ancillary_analysis', 'r_isotonic', r_isotonic) hf5.flush() #---------End Isotonic regression of firing against palatability-------------------------------------------------- #---------Multiple regression of firing rate against palatability and identity------------------------------------ # Set up an array to store the results of multiple regression using both identity and palatability - on the last axis, first element is the identity coeff and the second is the palatability coeff id_pal_regress = np.zeros((unique_lasers.shape[0], identity.shape[0], identity.shape[1], 2)) for i in range(unique_lasers.shape[0]): for j in range(identity.shape[0]): for k in range(identity.shape[1]): #model = LinearRegression() # Standardize the identity and palatability arrays for this time bin
def selfEvaluation(self): eval_start = time.clock() print colors.GOLD print "--------------------------" print "Self Evaluation" # extract features from collected epochs by transforming with spatial filters print "Training..." self.X = self.extractFeatures(self.epochs, self.spatial_filters) lda = LinearDiscriminantAnalysis() lda = lda.fit(self.X, self.y) cross_validation_folds = 10 xval = cross_val_score(lda, self.X, self.y, cv=cross_validation_folds) self.tuneSpatialFilters() # print cross validation report on training LDA print print colors.BOLD_YELLOW print "cross-validation with k=",cross_validation_folds,"folds" print xval print "mean:", xval.mean() print colors.SILVER print "--------------------------" print "Self Evaluation" print "Testing..." start = time.clock() test_epochs, test_y = BCIFileToEpochs( filename=self.test_file, num_channels=self.num_channels, max_epochs_per_class=1000, #self.calculation_threshold, filter_class_labels=[-1,1], #self.class_labels, epoch_size=self.epoch_size, include_electrodes=self.include_electrodes ) end = time.clock() print "loaded test file in ", str(end - start),"seconds" # apply IIR filters to each channel row test_epochs = np.apply_along_axis( self.filterChannelData, axis=1, arr=test_epochs ) test_X = self.extractFeatures(epochs=test_epochs, spatial_filters=self.spatial_filters) #chart_file_name="test_filters.pdf", y=test_y) print "-----------------------------------------------------------------" print "Metrics & Score" print colors.ORANGE predicted_y = lda.predict(test_X) cm = confusion_matrix(test_y, predicted_y) np.set_printoptions(precision=2) print('Confusion matrix, without normalization') print(cm) cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print('Normalized confusion matrix') print(cm_normalized) print colors.DARK_GREEN print "test",self.test_file print "bandpass filter", self.bandpass_filter_range print "trained with", self.calculation_threshold, "epochs per class" print (self.calculation_threshold*2*self.epoch_size)/self.sampling_rate, "sec trained" print "epoch_size", self.epoch_size print "CSP filters:", self.num_spatial_filters print colors.BOLD_GREEN print "percent correct:", lda.score(test_X, test_y) print colors.ENDC end = time.clock() print "evaluation stage completed in ", str(end - eval_start),"seconds" print "########################################" print "########################################" print "########################################" print "########################################" print "EXITING NOW" os._exit(1) thread.interrupt_main() exit() exit() return True
if n_features > 1: X = np.hstack([X, np.random.randn(n_samples, n_features - 1)]) return X, y acc_clf1, acc_clf2 = [], [] n_features_range = range(1, n_features_max + 1, step) for n_features in n_features_range: score_clf1, score_clf2 = 0, 0 for _ in range(n_averages): X, y = generate_data(n_train, n_features) clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y) clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y) X, y = generate_data(n_test, n_features) score_clf1 += clf1.score(X, y) score_clf2 += clf2.score(X, y) acc_clf1.append(score_clf1 / n_averages) acc_clf2.append(score_clf2 / n_averages) features_samples_ratio = np.array(n_features_range) / n_train plt.plot(features_samples_ratio, acc_clf1, linewidth=2, label="Linear Discriminant Analysis with shrinkage", color='r') plt.plot(features_samples_ratio, acc_clf2, linewidth=2, label="Linear Discriminant Analysis", color='g') plt.xlabel('n_features / n_samples') plt.ylabel('Classification accuracy')
for train_index,test_index in rs: X_train = data[train_index,:] y_train = labels[train_index] X_test = data[test_index,:] y_test = labels[test_index] X_train_reduced = estimator.fit_transform(X_train) if shortname == 'pca': print "----- noise variance : ",estimator.noise_variance_ print "----- percentage of explained variance for each component : ",estimator.explained_variance_ratio_ print "----- percentage total of explained variance : ",np.sum(estimator.explained_variance_ratio_) if shortname == 'nmf': print "----- error reconstruction : ",estimator.reconstruction_err_ X_test_reduced = estimator.transform(X_test) clf_lda = LinearDiscriminantAnalysis() clf_lda.fit(X_train_reduced,y_train) scores.append(clf_lda.score(X_test_reduced,y_test)) mean_score = np.mean(np.asarray(scores)) print "--- Cross validation scores : ",scores print "--- Cross validation mean score : %f" % mean_score if shortname == 'pca': scores_pca[i] = mean_score else: scores_nmf[i] = mean_score train_time = (time() - t0) print "--- done in %0.3fs" % train_time components_ = estimator.components_ #plot_gallery('%s - Train time %.1fs' % (name, train_time),components_[:n_components],n_components/5,5) # plt.show() plt.clf()
def LinearDA(X_train, y_train, X_test, y_test): clf = LDA() clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) return accuracy
class P300EasyClassifier(object): '''Easy and modular P300 classifier attributes" fname - classifier save filename epoch_buffor - current epoch buffor max_avr - maximum epochs to average decision_buffor - last decisions buffor, when full of identical decisions final decision is made clf - core classifier from sklearn feature_s - feature length''' def __init__(self, fname='./class.joblib.pkl', max_avr=10, decision_stop=3, targetFs=30, clf=None, feature_reduction = None): '''fname - classifier file to save or load classifier on disk while classifying produce decision after max_avr epochs averaged, or after decision_stop succesfull same decisions targetFs - on feature extraction downsample to this Hz clf - sklearn type classifier to use as core feature_reduction - 'auto', int, None. If 'auto' - features are reduced, features left are those which have statistically significant (p<0.05) difference in target and nontarget, if int - use feature_reduction most significant features, if None don't use reduction ''' self.targetFs = targetFs self.fname = fname self.epoch_buffor = [] self.max_avr = max_avr self.decision_buffor = deque([], decision_stop) self.feature_reduction = feature_reduction if clf is None: self.clf = LinearDiscriminantAnalysis(solver = 'lsqr', shrinkage='auto') def load_classifier(self, fname=None): '''loads classifier from disk, provide fname - path to joblib pickle with classifier, or will be used from init''' self.clf = joblib.load(fname) def calibrate(self, targets, nontargets, bas=-0.1, window=0.4, Fs=None): '''targets, nontargets - 3D arrays (epoch x channel x time) or list of OBCI smart tags if arrays - need to provide Fs (sampling frequency) in Hz bas - baseline in seconds(negative), in other words start offset''' if Fs is None: Fs = float(targets[0].get_param('sampling_frequency')) target_data = _tags_to_array(targets) nontarget_data = _tags_to_array(nontargets) data = np.vstack((target_data, nontarget_data)) self.epoch_l = data.shape[2] labels = np.zeros(len(data)) labels[:len(target_data)] = 1 data, labels = _remove_artifact_epochs(data, labels) features = _feature_extraction(data, Fs, bas, window, self.targetFs) if self.feature_reduction: mask = _feature_reduction_mask(features, labels, self.feature_reduction) self.feature_reduction_mask = mask features = features[:, mask] self.feature_s = features.shape[1] self.bas = bas self.window = window self.clf.fit(features, labels) joblib.dump(self.clf, self.fname, compress=9) return self.clf.score(features, labels) def run(self, epoch, Fs=None): '''epoch - array (channels x time) or smarttag/readmanager object, bas - baseline in seconds (negative), Fs - sampling frequency Hz, leave None if epoch is smart tag, returns decision - 1 for target, 0 for nontarget, None - for no decision''' bas = self.bas window = self.window if Fs is None: Fs = float(epoch.get_param('sampling_frequency')) epoch = epoch.get_samples()[:,:self.epoch_l] if len(self.epoch_buffor)< self.max_avr: self.epoch_buffor.append(epoch) avr_epoch = np.mean(self.epoch_buffor, axis=0) features = _feature_extraction_singular(avr_epoch, Fs, bas, window, self.targetFs)[None, :] if self.feature_reduction: mask = self.feature_reduction_mask features = features[:, mask] decision = self.clf.predict(features)[0] self.decision_buffor.append(decision) if len(self.decision_buffor) == self.decision_buffor.maxlen: if len(set(self.decision_buffor))==1: self.decision_buffor.clear() self.epoch_buffor = [] return decision if len(self.epoch_buffor) == self.max_avr: self.decision_buffor.clear() self.epoch_buffor = [] return decision return None
# Single-trial fitting and feature extraction features = np.zeros((len(triggers), 32)) for t in range(len(triggers)): print('Fold {:2d}/{:2d}, trial: {:d} '.format(fold, nfolds, t), end='\r') ws.set_data(data[t, :, :]) ws.fit_var() con = ws.get_connectivity('ffPDC') alpha = np.mean(con[:, :, np.logical_and(7 < freq, freq < 13)], axis=2) beta = np.mean(con[:, :, np.logical_and(15 < freq, freq < 25)], axis=2) features[t, :] = np.array([alpha, beta]).flatten() lda.fit(features[train, :], classids[train]) acc_train = lda.score(features[train, :], classids[train]) acc_test = lda.score(features[test, :], classids[test]) print('Fold {:2d}/{:2d}, ' 'acc train: {:.3f}, ' 'acc test: {:.3f}'.format(fold, nfolds, acc_train, acc_test)) pred = lda.predict(features[test, :]) cm += confusion_matrix(classids[test], pred) print('\nConfusion Matrix:\n', cm) print('\nTotal Accuracy: {:.3f}'.format(np.sum(np.diag(cm))/np.sum(cm)))
def classify(data=None, clf=None, repeat=10, test_size=0.2, leave=False): '''applies classification method based on a classification object clf data must be list of objects-students; repeat should be an integer and it makes the classification happen 'repeat' number of times and printed results are averaged over all repeats returns a dictionary of results(accuracy, precision etc.)''' if data is None: data = win.getStudents() if clf is None: clf = LinearDiscriminantAnalysis(solver='lsqr') clf = clf data = data total_score = 0 stop = repeat results = OrderedDict() results['method'] = str(clf) if leave is False: for x in range(stop): data_train, data_test = train_test_split(data, test_size=test_size) data_train_labels = [s.spec for s in data_train] data_test_labels = [s.spec for s in data_test] data_train = [s.grades for s in data_train] data_test = [s.grades for s in data_test] clf.fit(data_train, data_train_labels) total_score += clf.score(data_test, data_test_labels) total_score = total_score / stop results['ACC for all specs'] = round(total_score, 2) specs = ['FK', 'FM', 'MN', 'OE'] for sp in specs: total_score = 0 total_sensitivity = 0 total_specificity = 0 total_precision = 0 total_npv = 0 total_prevalence = 0 for x in range(stop): sensitivity = 0 # true positive specificity = 0 # true negative precision = 0 npv = 0 prevalence = 0 data_train, data_test = train_test_split( data, test_size=test_size) data_train_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_train] data_test_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_test] data_train = [s.grades for s in data_train] data_test = [s.grades for s in data_test] clf.fit(data_train, data_train_labels) total_score += clf.score(data_test, data_test_labels) prediction = clf.predict(data_test) for ii, d in enumerate(prediction): if d == data_test_labels[ii] and d == sp: sensitivity += 1 elif d == data_test_labels[ii] and d != sp: specificity += 1 else: pass try: sensitivity = sensitivity / data_test_labels.count(sp) except ZeroDivisionError: sensitivity = 0 try: specificity = specificity / \ data_test_labels.count('NOT ' + sp) except ZeroDivisionError: specificity = 0 try: precision = sensitivity / prediction.tolist().count(sp) except ZeroDivisionError: precision = 0 try: npv = specificity / prediction.tolist().count('NOT ' + sp) except ZeroDivisionError: npv = 0 prevalence = data_test_labels.count(sp) / len(data_test_labels) total_sensitivity += sensitivity total_specificity += specificity total_precision += precision total_npv += npv total_prevalence += prevalence total_score = total_score / stop total_sensitivity = total_sensitivity / stop total_specificity = total_specificity / stop total_precision = total_precision / stop total_npv = total_npv / stop total_prevalence = total_prevalence / stop # results[sp + ' accuracy: '] = total_score # results[sp + ' sensitivity: '] = total_sensitivity # results[sp + ' specificity: '] = total_specificity # results[sp + ' precision: '] = total_precision # results[sp + ' negative predictive value: '] = total_npv results[sp + ' acc - prevalence: '] = round( total_score - max(total_prevalence, 1 - total_prevalence), 2) else: for x in range(stop): loo = LeaveOneOut(n=len(data)) for train_index, test_index in loo: data_train, data_test = [data[ii] for ii in train_index], data[test_index[0]] data_train_labels = [s.spec for s in data_train] data_test_labels = data_test.spec data_train = [s.grades for s in data_train] data_test = data_test.grades clf.fit(data_train, data_train_labels) if clf.predict(data_test)[0] == data_test_labels: total_score += 1 total_score = total_score / stop / len(loo) results['ACC for all specs'] = round(total_score, 2) specs = ['FK', 'FM', 'MN', 'OE'] for sp in specs: total_score = 0 total_prevalence = 0 for x in range(stop): # prevalence = 0 loo = LeaveOneOut(n=len(data)) for train_index, test_index in loo: data_train, data_test = [data[ii] for ii in train_index], data[test_index[0]] data_train_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_train] data_test_labels = data_test.spec if data_test.spec == sp else 'NOT ' + sp data_train = [s.grades for s in data_train] data_test = data_test.grades prediction = clf.predict(data_test) clf.fit(data_train, data_train_labels) if prediction[0] == data_test_labels: total_score += 1 if data_test_labels == sp: total_prevalence += 1 # total_prevalence += prevalence total_score = total_score / stop / len(loo) total_prevalence = total_prevalence / stop / len(loo) # results[sp + ' accuracy: '] = round(total_score, 2) results[sp + ' acc - prevalence: '] = round( total_score - max(total_prevalence, 1 - total_prevalence), 2) return results
import sklearn.metrics as metrics metrics.accuracy_score(test.label,pred) #0.6097560975609756 metrics.roc_auc_score(test.label,pred) #0.60621768080159055 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis clf = LinearDiscriminantAnalysis() clf=clf.fit(train_data[0:,1].reshape(-1,1), train_data[0:,0]) pred = clf.predict(test_data[0:,1].reshape(-1,1)) print("lda: label ~ count accuracy:") clf.score(test_data[0:,1].reshape(-1,1),test.label) #0.57513768686073963 clf = LinearDiscriminantAnalysis() clf=clf.fit(train_data[0:,[1,19,20]], train_data[0:,0]) pred = clf.predict(test_data[0:,[1,19,20]]) print("lda: label ~ count + callcount + crimecount accuracy:") clf.score(test_data[0:,[2,13,14]],test.label) #0.75735590487706572
def discriminatePlot(X, y, cVal, titleStr=''): # Frederic's Robust Wrapper for discriminant analysis function. Performs lda, qda and RF afer error checking, # Generates nice plots and returns cross-validated # performance, stderr and base line. # X np array n rows x p parameters # y group labels n rows # rgb color code for each data point - should be the same for each data beloging to the same group # titleStr title for plots # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses # Global Parameters CVFOLDS = 10 MINCOUNT = 10 MINCOUNTTRAINING = 5 # Initialize Variables and clean up data classes, classesCount = np.unique(y, return_counts = True) # Classes to be discriminated should be same as ldaMod.classes_ goodIndClasses = np.array([n >= MINCOUNT for n in classesCount]) goodInd = np.array([b in classes[goodIndClasses] for b in y]) yGood = y[goodInd] XGood = X[goodInd] cValGood = cVal[goodInd] classes, classesCount = np.unique(yGood, return_counts = True) nClasses = classes.size # Number of classes or groups # Do we have enough data? if (nClasses < 2): print 'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT) return -1, -1, -1, -1 , -1, -1, -1 cvFolds = min(min(classesCount), CVFOLDS) if (cvFolds < CVFOLDS): print 'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS) # Data size and color values nD = XGood.shape[1] # number of features in X nX = XGood.shape[0] # number of data points in X cClasses = [] # Color code for each class for cl in classes: icl = (yGood == cl).nonzero()[0][0] cClasses.append(np.append(cValGood[icl],1.0)) cClasses = np.asarray(cClasses) myPrior = np.ones(nClasses)*(1.0/nClasses) # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted. nDmax = int(np.fix(np.sqrt(nX/5))) if nDmax < nD: print 'Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.' nDmax = min(nD, nDmax) pca = PCA(n_components=nDmax) Xr = pca.fit_transform(XGood) print 'Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_)*100.0) # Initialise Classifiers ldaMod = LDA(n_components = min(nDmax,nClasses-1), priors = myPrior, shrinkage = None, solver = 'svd') qdaMod = QDA(priors = myPrior) rfMod = RF() # by default assumes equal weights # Perform CVFOLDS fold cross-validation to get performance of classifiers. ldaScores = np.zeros(cvFolds) qdaScores = np.zeros(cvFolds) rfScores = np.zeros(cvFolds) skf = cross_validation.StratifiedKFold(yGood, cvFolds) iskf = 0 for train, test in skf: # Enforce the MINCOUNT in each class for Training trainClasses, trainCount = np.unique(yGood[train], return_counts=True) goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount]) goodIndTrain = np.array([b in trainClasses[goodIndClasses] for b in yGood[train]]) # Specity the training data set, the number of groups and priors yTrain = yGood[train[goodIndTrain]] XrTrain = Xr[train[goodIndTrain]] trainClasses, trainCount = np.unique(yTrain, return_counts=True) ntrainClasses = trainClasses.size # Skip this cross-validation fold because of insufficient data if ntrainClasses < 2: continue goodInd = np.array([b in trainClasses for b in yGood[test]]) if (goodInd.size == 0): continue # Fit the data trainPriors = np.ones(ntrainClasses)*(1.0/ntrainClasses) ldaMod.priors = trainPriors qdaMod.priors = trainPriors ldaMod.fit(XrTrain, yTrain) qdaMod.fit(XrTrain, yTrain) rfMod.fit(XrTrain, yTrain) ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) iskf += 1 if (iskf != cvFolds): cvFolds = iskf ldaScores.reshape(cvFolds) qdaScores.reshape(cvFolds) rfScores.reshape(cvFolds) # Refit with all the data for the plots ldaMod.priors = myPrior qdaMod.priors = myPrior Xrr = ldaMod.fit_transform(Xr, yGood) # Check labels for a, b in zip(classes, ldaMod.classes_): if a != b: print 'Error in ldaPlot: labels do not match' # Print the coefficients of first 3 DFA print 'LDA Weights:' print 'DFA1:', ldaMod.coef_[0,:] if nClasses > 2: print 'DFA2:', ldaMod.coef_[1,:] if nClasses > 3: print 'DFA3:', ldaMod.coef_[2,:] # Obtain fits in this rotated space for display purposes ldaMod.fit(Xrr, yGood) qdaMod.fit(Xrr, yGood) rfMod.fit(Xrr, yGood) XrrMean = Xrr.mean(0) # Make a mesh for plotting x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1)) xm1 = np.reshape(x1, -1) xm2 = np.reshape(x2, -1) nxm = np.size(xm1) Xm = np.zeros((nxm, Xrr.shape[1])) Xm[:,0] = xm1 if Xrr.shape[1] > 1 : Xm[:,1] = xm2 for ix in range(2,Xrr.shape[1]): Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix] XmcLDA = np.zeros((nxm, 4)) # RGBA values for color for LDA XmcQDA = np.zeros((nxm, 4)) # RGBA values for color for QDA XmcRF = np.zeros((nxm, 4)) # RGBA values for color for RF # Predict values on mesh for plotting based on the first two DFs yPredLDA = ldaMod.predict_proba(Xm) yPredQDA = qdaMod.predict_proba(Xm) yPredRF = rfMod.predict_proba(Xm) # Transform the predictions in color codes maxLDA = yPredLDA.max() for ix in range(nxm) : cWeight = yPredLDA[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcLDA[ix,:] = np.dot(cWinner, cClasses) XmcLDA[ix,3] = cWeight.max()/maxLDA # Plot the surface of probability plt.figure(facecolor='white', figsize=(10,3)) plt.subplot(131) Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean()*100.0))) plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.xlabel('DFA 1') plt.ylabel('DFA 2') # Transform the predictions in color codes maxQDA = yPredQDA.max() for ix in range(nxm) : cWeight = yPredQDA[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcQDA[ix,:] = np.dot(cWinner, cClasses) XmcQDA[ix,3] = cWeight.max()/maxQDA # Plot the surface of probability plt.subplot(132) Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean()*100.0))) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) # Transform the predictions in color codes maxRF = yPredRF.max() for ix in range(nxm) : cWeight = yPredRF[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses # Weighted colors does not work XmcRF[ix,:] = np.dot(cWinner, cClasses) XmcRF[ix,3] = cWeight.max()/maxRF # Plot the surface of probability plt.subplot(133) Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean()*100.0))) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.show() # Results ldaScore = ldaScores.mean()*100.0 qdaScore = qdaScores.mean()*100.0 rfScore = rfScores.mean()*100.0 ldaScoreSE = ldaScores.std() * 100.0 qdaScoreSE = qdaScores.std() * 100.0 rfScoreSE = rfScores.std() * 100.0 print ("Number of classes %d. Chance level %.2f %%") % (nClasses, 100.0/nClasses) print ("%s LDA: %.2f (+/- %0.2f) %%") % (titleStr, ldaScore, ldaScoreSE) print ("%s QDA: %.2f (+/- %0.2f) %%") % (titleStr, qdaScore, qdaScoreSE) print ("%s RF: %.2f (+/- %0.2f) %%") % (titleStr, rfScore, rfScoreSE) return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses