class LinearDiscriminantAnalysisImpl(): def __init__(self, solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001): self._hyperparams = { 'solver': solver, 'shrinkage': shrinkage, 'priors': priors, 'n_components': n_components, 'store_covariance': store_covariance, 'tol': tol} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X) def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
def intersubjective_shallow(data, model_name): x_train, y_train, x_test, y_test, o_t_test, o_tr_test = data x_train, y_train, x_test, y_test = resample_transform( (x_train, y_train, x_test, y_test), resample=False) global t_test t_test = o_t_test global tr_test tr_test = o_tr_test x_train = x_train.reshape(x_train.shape[0], -1) x_test = x_test.reshape(x_test.shape[0], -1) m = [ 'acc', 'val_acc', 'val_precisions', 'val_recalls', 'val_f1s', 'val_aucs', 'val_balanced_acc', 'val_recognition_acc', 'val_bpm' ] metrics = {key: [] for key in m} history = False if 'svm' in model_name: clf = svm.LinearSVC(random_state=0) elif 'lda' in model_name: if 'shrinkage' in model_name: clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') else: clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None) clf.fit(x_train, y_train) y_predict = clf.predict(x_test) probs = clf.decision_function(x_test) metrics['acc'].append(clf.score(x_train, y_train)) metrics = compute_metrics(metrics, probs, y_predict, y_test) cnf_matrix = confusion_matrix(y_test, y_predict) return metrics, history, cnf_matrix, clf
class LDADecoder(ERPDecoder): """ A basic EM decoder. Not to be used in online experiments. There the OnlineUnsupervsedEM decoder should be used """ def __init__(self, n_stimuli, x, y): self.n_stimuli = n_stimuli from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA # The -1 is to not use the bias, LDA does this for us self.preprocess = lambda x: flatten_normalise_bias(x)[:, :-1] self.clf = LDA(solver='lsqr', shrinkage='auto') self.clf.fit(self.preprocess(x), y) self.eeg = [] self.stimuli = [] def add_trial(self, x, s): """ :return: """ self.eeg.append(self.preprocess(x)) self.stimuli.append(s) def predict_all_trials(self): output = [] for x, s in zip(self.eeg, self.stimuli): outputs = np.zeros(self.n_stimuli) for idx in range(self.n_stimuli): eeg = np.array([xx for xx, ss in zip(x, s) if idx in ss]) if eeg.shape == 0: outputs[idx] = -np.inf else: outputs[idx] = self.apply_single_stimulus(eeg).mean() output.append(np.argmax(outputs)) return np.array(output) def apply_single_stimulus(self, x): return self.clf.decision_function(flatten_normalise_bias(x)[:, :-1])
class LinearDiscriminantAnalysisImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X) def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
if k < n_samples_integrate - 1: # beginning of data stream (not yet enough samples) signal_tmp = test_filt_data[:, :k + 1] else: # enough samples collected (n_samples_integrate) signal_tmp = test_filt_data[:, k - (n_samples_integrate - 1):k + 1] # format data signal_tmp = np.expand_dims( signal_tmp, 0 ) # dimensions for CSP: epochs x channels x samples (1,24,n_samples_integrate) # apply CSP filters + LDA fea_tmp = csp.transform(signal_tmp) #pred_tmp = lda.predict(fea_tmp) pred_tmp = lda.decision_function(fea_tmp) # put in array for prediction values #pred_cont.append(list(pred_tmp)) #pred_cont = pred_cont[-n_output_integrate:] # only keep last values in buffer pred_buffer[:n_output_integrate] = pred_buffer[ -n_output_integrate:] # shift to the left by one pred_buffer[n_output_integrate] = pred_tmp # add prediction of this loop # alternative: low-pass filter for buffer lfilter with zi // initiliaze with lfilter_zi cl_out_cont[k], zi_previous = lfilter(b, a, pred_tmp, axis=-1, zi=zi_previous)
def precision(f_pos, t_pos): return t_pos / (t_pos + f_pos) cobertura(f_neg, t_pos) # In[22]: #sklearn.discriminant_analysis.LinearDiscriminantAnalysis(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=True, tol=0.0001) clf = LinearDiscriminantAnalysis() X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) y = np.array([1, 1, 1, 2, 2, 2]) clf.fit(X, y) print(clf.predict([[-0.8, -1]])) clf.decision_function(X) # In[11]: def f1(f_pos, f_neg, t_pos, t_neg): return 2 * precision(f_pos, t_pos) * cobertura( f_neg, t_pos) / (precision(f_pos, t_pos) + cobertura(f_neg, t_pos)) # In[ ]: matriz = [[t_pos, f_neg], [f_pos, t_neg]] plt.imshow(matriz) plt.savefig("matriz.png")
model = RandomForestClassifier(n_estimators=10, max_depth=3) print "Random Forest" test_model(model) model_lda = LinearDiscriminantAnalysis() print "LDA" test_model(model_lda) use_prediction = False raw_test_data, test_labels = readDataMultipleFiles([3]) test_data_matrix, test_data_matrices, test_labels, test_labels_binary = buildMatricesAndLabels(raw_test_data, test_labels, scaling_functions) test_predictions = [] for features in test_data_matrix: if not use_prediction: test_predictions.append(model_lda.decision_function([features])[0]) # score for classes_[1] else: test_predictions.append(model_lda.predict_proba([features])[0]) for i in range(target_count): print sum(test_labels_binary[i]) thresholds_for_bci = multiclassRoc(test_predictions, test_labels_binary) # model = SVC(C=1000, kernel="poly", degree=2) # print "SVM" # test_model(model) # pickle.Pickler(file("U:\\data\\test\\5_targets\\model0.pkl", "w")).dump(model_lda) # pickle.Pickler(file("U:\\data\\test\\5_targets\\model0_mm.pkl", "w")).dump(min_max) # pickle.Pickler(file("U:\\data\\test\\5_targets\\model0_thresh.pkl", "w")).dump(thresholds_for_bci)
# comparing results of prediction np.testing.assert_equal(predicted_training_online, predicted_training_offline) np.testing.assert_equal(predicted_training_online, predicted_training_sklearn) np.testing.assert_equal(predicted_testing_online, predicted_testing_offline) np.testing.assert_equal(predicted_testing_online, predicted_testing_sklearn) # comparing posterior probabilities np.testing.assert_equal(classifier_online.prob_classes, classifier_offline.prob_classes) np.testing.assert_equal(classifier_online.prob_classes, classifier_sklearn.priors_) # comparing total covariance matrix np.testing.assert_almost_equal(classifier_online.total_covariance(), classifier_offline.total_covariance()) np.testing.assert_almost_equal(classifier_online.total_covariance(), classifier_sklearn.covariance_) np.testing.assert_almost_equal(classifier_offline.total_covariance(), classifier_sklearn.covariance_) # comparing means np.testing.assert_almost_equal(classifier_online.means_by_class(), classifier_offline.means_by_class()) np.testing.assert_almost_equal(classifier_online.means_by_class(), classifier_sklearn.means_) # simplify due to specific of sklearn if len(np.unique(y)) > 2: np.testing.assert_almost_equal(classifier_online.coef(), classifier_offline.coef()) np.testing.assert_almost_equal(classifier_online.coef(), classifier_sklearn.coef_) np.testing.assert_almost_equal(classifier_online.intercept(), classifier_offline.intercept()) np.testing.assert_almost_equal(classifier_online.intercept(), classifier_sklearn.intercept_, decimal=5) np.testing.assert_almost_equal(classifier_online.scores, classifier_offline.scores) np.testing.assert_almost_equal(classifier_online.scores, classifier_sklearn.decision_function(X_test), decimal=5)
# Create classifier object and train # Add code here to include other claassifiers (MLP, BDT,...) clf = LDA() clf.fit(X_train, y_train) # Evaluate accuracy using the test data. # If available, use the decision function, else (e.g. for MLP) use predict_proba # Adjust threshold value tCut or pMin as appropriate X_bkg_test = X_test[y_test == 0] X_sig_test = X_test[y_test == 1] y_bkg_test = y_test[y_test == 0] y_sig_test = y_test[y_test == 1] if hasattr(clf, "decision_function"): tCut = 0. y_bkg_pred = (clf.decision_function(X_bkg_test) >= tCut).astype(bool) y_sig_pred = (clf.decision_function(X_sig_test) >= tCut).astype(bool) else: pMin = 0.9 y_bkg_pred = (clf.predict_proba(X_bkg_test)[:, 1] >= pMin).astype(bool) y_sig_pred = (clf.predict_proba(X_sig_test)[:, 1] >= pMin).astype(bool) power = metrics.accuracy_score(y_sig_test, y_sig_pred) # = = Prob(t >= tCut|sig) print('power of test with respect to signal = ', power) # Add code here to obtain the background efficiency # = size of test alpha = = Prob(t >= tCut|bkg) # make a scatter plot fig, ax = plt.subplots(1, 1)
class LDA(CtrlNode): """Linear Discriminant Analysis, uses sklearn""" nodeName = "LDA" uiTemplate = [('train_data', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection, 'toolTip': 'Column containing the training data'}), ('train_labels', 'combo', {'toolTip': 'Column containing training labels'}), ('solver', 'combo', {'items': ['svd', 'lsqr', 'eigen']}), ('shrinkage', 'combo', {'items': ['None', 'auto', 'value']}), ('shrinkage_val', 'doubleSpin', {'min': 0.0, 'max': 1.0, 'step': 0.1, 'value': 0.5}), ('n_components', 'intSpin', {'min': 2, 'max': 1000, 'step': 1, 'value': 2}), ('tol', 'intSpin', {'min': -50, 'max': 0, 'step': 1, 'value': -4}), ('score', 'lineEdit', {}), ('predict_on', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection, 'toolTip': 'Data column of the input "predict" Transmission\n' 'that is used for predicting from the model'}), ('Apply', 'check', {'applyBox': True, 'checked': False}) ] def __init__(self, name, **kwargs): CtrlNode.__init__(self, name, terminals={'train': {'io': 'in'}, 'predict': {'io': 'in'}, 'T': {'io': 'out'}, 'coef': {'io': 'out'}, 'means': {'io': 'out'}, 'predicted': {'io': 'out'} }, **kwargs) self.ctrls['score'].setReadOnly(True) def process(self, **kwargs): return self.processData(**kwargs) def processData(self, train: Transmission, predict: Transmission): self.t = train.copy() #: Transmisison instance containing the training data with the labels if predict is not None: self.to_predict = predict.copy() #: Transmission instance containing the data to predict after fitting on the the training data dcols, ccols, ucols = organize_dataframe_columns(self.t.df.columns) self.ctrls['train_data'].setItems(dcols) self.ctrls['train_labels'].setItems(ccols) if predict is not None: pdcols, ccols, ucols = organize_dataframe_columns(self.to_predict.df.columns) self.ctrls['predict_on'].setItems(pdcols) if not self.apply_checked(): return train_columns = self.ctrls['train_data'].getSelectedItems() labels = self.ctrls['train_labels'].currentText() solver = self.ctrls['solver'].currentText() shrinkage = self.ctrls['shrinkage'].currentText() if shrinkage == 'value': shrinkage = self.ctrls['shrinkage_val'].value() elif shrinkage == 'None': shrinkage = None n_components = self.ctrls['n_components'].value() tol = 10 ** self.ctrls['tol'].value() store_covariance = True if solver == 'svd' else False params = {'train_data': train_columns, 'train_labels': labels, 'solver': solver, 'shrinkage': shrinkage, 'n_components': n_components, 'tol': tol, 'store_covariance': store_covariance } kwargs = params.copy() kwargs.pop('train_data') kwargs.pop('train_labels') self.lda = LinearDiscriminantAnalysis(**kwargs) # Make an array of all the data from the selected columns self.X = np.hstack([np.vstack(self.t.df[train_column]) for train_column in train_columns]) self.y = self.t.df[labels] self.X_ = self.lda.fit_transform(self.X, self.y) self.t.df['_LDA_TRANSFORM'] = self.X_.tolist() self.t.df['_LDA_TRANSFORM'] = self.t.df['_LDA_TRANSFORM'].apply(np.array) params.update({'score': self.lda.score(self.X, self.y), 'classes': self.lda.classes_.tolist() }) self.ctrls['score'].setText(f"{params['score']:.4f}") self.t.history_trace.add_operation('all', 'lda', params) self.t.df['_LDA_DFUNC'] = self.lda.decision_function(self.X).tolist() coef_df = pd.DataFrame({'classes': self.lda.classes_, '_COEF': self.lda.coef_.tolist()}) t_coef = Transmission(df=coef_df, history_trace=self.t.history_trace) means_df = pd.DataFrame({'classes': self.lda.classes_, '_MEANS': self.lda.means_.tolist()}) t_means = Transmission(df=means_df, history_trace=self.t.history_trace) out = {'T': self.t, 'coef': t_coef, 'means': t_means, 'predicted': None} # Predict using the trained model predict_columns = self.ctrls['predict_on'].getSelectedItems() if not predict_columns: return out if predict_columns != train_columns: QtWidgets.QMessageBox.warning('Predict and Train columns do not match', 'The selected train and predict columns are different') predict_data = np.hstack([np.vstack(self.to_predict.df[predict_column]) for predict_column in predict_columns]) self.to_predict.df['LDA_PREDICTED_LABELS'] = self.lda.predict(predict_data) self.to_predict.df['_LDA_TRANSFORM'] = self.lda.transform(predict_data).tolist() self.to_predict.df['_LDA_TRANSFORM'] = self.to_predict.df['_LDA_TRANSFORM'].apply(np.array) params_predict = params.copy() params_predict.update({'predict_columns': predict_columns}) self.to_predict.history_trace.add_operation('all', 'lda-predict', params_predict) out.update({'predicted': self.to_predict}) return out
def cross_validator(data, subject, n_splits=5, epochs=10, lr=0.0003, batch_size=64, model_name="", model_config={ 'bn': True, 'dropout': True, 'branched': True, 'nonlinear': 'tanh' }, early_stopping=True, use_deep_features=False, patience=10): if model_name.startswith('deep'): metrics = [] histories = [] else: m = [ 'acc', 'val_acc', 'val_precisions', 'val_recalls', 'val_f1s', 'val_aucs', 'val_balanced_acc', 'val_recognition_acc', 'val_bpm' ] metrics = {key: [] for key in m} histories = False cnf_matrices = [] x = data[0] y = data[1] t = data[2] tr = data[3] if use_deep_features: path = './models/subjects/' load_model_name = 'deep_subjective_branched_250_thesis1' files = [ f for f in listdir(join(path, subject)) if isfile(join(path, subject, f)) ] myfiles = [file for file in files if load_model_name in file] myfiles.sort() # skf = StratifiedKFold(n_splits=5) # for train, test in skf.split(x, y): # sss = StratifiedShuffleSplit(n_splits=1, test_size=.1, random_state=0) # for train, test in sss.split(x, y): for i, (train, test) in enumerate(cv_splitter(x, n_splits=n_splits)): if use_deep_features: base_model = load_model(join(path, subject, myfiles[i])) model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output) # print train # print test # continue global y_test x_tv, x_test, y_tv, y_test = x[train], x[test], y[train], y[test] global t_test t_test = t[test] global tr_test tr_test = tr[test] if model_name.startswith('deep') and early_stopping: x_train, x_valid, y_train, y_valid = train_test_split( x_tv, y_tv, stratify=y_tv, random_state=42, test_size=0.2) else: x_train = x_tv y_train = y_tv if use_deep_features: x_train, y_train, x_test, y_test = resample_transform( (x_train, y_train, x_test, y_test), resample=False) x_train = model.predict(x_train) x_test = model.predict(x_test) # standarization of the data # computing the mean and std on the training data # scalar = StandardScaler(with_mean=False) ## mus = [] # stds = [] # trials_no = x_train.shape[0] # for i in range(trials_no): # scalar.fit(x_train[i]) ## mu = scalar.mean_ # std = scalar.scale_ ## mus.append(mu) # stds.append(std) # #scalar.fit(x_train.reshape((x_train.shape[0]*x_train.shape[1], x_train.shape[2]))) # # # tranbsforming the training data ## scalar.mean_ = np.mean(mus, axis=0) # scalar.scale_ = np.mean(stds, axis=0) # normalized_x_train = np.empty_like(x_train) # for i in range(trials_no): # temp = scalar.transform(x_train[i]) # normalized_x_train[i] = temp # # # transforming the test data # normalized_x_test = np.empty_like(x_test) # trials_no = x_test.shape[0] # for i in range(trials_no): # temp = scalar.transform(x_test[i]) # normalized_x_test[i] = temp # normalized_x_train = x_train # normalized_x_test = x_test #standarization scalar = StandardScaler(with_mean=True) scalar.fit(x_train.reshape(x_train.shape[0], -1)) x_train = scalar.transform(x_train.reshape(x_train.shape[0], -1)).reshape(x_train.shape) x_test = scalar.transform(x_test.reshape(x_test.shape[0], -1)).reshape(x_test.shape) if model_name.startswith('deep') and early_stopping: x_valid = scalar.transform(x_valid.reshape( x_valid.shape[0], -1)).reshape(x_valid.shape) # x_train_reshaped = x_train.reshape(x_train.shape[0],-1) # x_test_reshaped = x_test.reshape(x_test.shape[0], -1) # mins = np.min(x_train_reshaped , axis=0) # maxs = np.max(x_train_reshaped, axis=0) # normalized_x_train = 2*(x_train_reshaped-mins)/(maxs-mins)-1 # normalized_x_test = 2*(x_test_reshaped-mins)/(maxs-mins)-1 # normalized_x_train = np.reshape(normalized_x_train, x_train.shape) # normalized_x_test = np.reshape(normalized_x_test, x_test.shape) ## #resampling the data if model_name.startswith('deep'): n_samples, timepoints, channels = x_train.shape x_train = np.reshape(x_train, (n_samples, timepoints * channels)) ros = RandomOverSampler(random_state=0) x_res, y_res = ros.fit_sample(x_train, y_train) x_train = np.reshape(x_res, (x_res.shape[0], timepoints, channels)) y_train = y_res x_train = np.expand_dims(x_train, axis=3) global x_test x_test = np.expand_dims(x_test, axis=3) if model_name.startswith('deep') and early_stopping: x_valid = np.expand_dims(x_valid, axis=3) # c = compute_class_weight('balanced', [0, 1], y) # class_weight = {0:c[0],1:c[1]} # print class_weight # pdb.set_trace() #compiling the model if model_name.startswith('deep'): if 'branched' in model_name: if '250' in model_name: path = './models/subjects/' load_model_name = 'deep_intersubjective_branched_250_thesis2_2' files = [ f for f in listdir(join(path, subject)) if isfile(join(path, subject, f)) ] myfiles = [ file for file in files if load_model_name in file ] model = load_model(join(path, subject, myfiles[0])) # model = branched2(x.shape, model_config=model_config, f=5) else: path = './models/subjects/' load_model_name = 'deep_intersubjective_branched_50_avg_thesis2_2' files = [ f for f in listdir(join(path, subject)) if isfile(join(path, subject, f)) ] myfiles = [ file for file in files if load_model_name in file ] model = load_model(join(path, subject, myfiles[0])) # model = branched2(x.shape, model_config=model_config, f=1) elif 'eegnet' in model_name: if '250' in model_name: model = create_eegnet(x.shape, f=4) else: model = create_eegnet(x.shape, f=1) elif 'cnn' in model_name: if '250' in model_name: model = create_cnn(x.shape, f=5) else: model = create_cnn(x.shape, f=1) # opt = Adam(lr=lr) opt = SGD(lr=1e-4, momentum=0.9) lrate = LearningRateScheduler(step_decay) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) m = Metrics() reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=int(patience / 2), min_lr=0) early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=patience, verbose=0, mode='auto') mod_path = './models/subjects/' + subject timestr = time.strftime("%Y%m%d-%H%M") checkpointer = ModelCheckpoint(filepath=mod_path + '/best_' + model_name + '_' + timestr, monitor='val_loss', verbose=1, save_best_only=True) if early_stopping: history = model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=2, validation_data=(x_valid, y_valid), callbacks=[m, early_stop, checkpointer, reduce_lr], ) else: history = model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=2, validation_data=(x_test, y_test), callbacks=[m], ) metrics.append(m) histories.append(history) probabilities = model.predict(x_test, batch_size=batch_size, verbose=0) y_predict = [(round(k)) for k in probabilities] else: x_train = np.reshape(x_train, (x_train.shape[0], -1)) x_test = np.reshape(x_test, (x_test.shape[0], -1)) if 'svm' in model_name: clf = svm.LinearSVC(random_state=4) elif 'lda' in model_name: if 'shrinkage' in model_name: clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') else: clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None) clf.fit(x_train, y_train) y_predict = clf.predict(x_test) if 'svm' in model_name: probs = clf.decision_function(x_test) elif 'lda' in model_name: probs = clf.decision_function(x_test) metrics['acc'].append(clf.score(x_train, y_train)) metrics = compute_metrics(metrics, probs, y_predict, y_test) cnf_matrix = confusion_matrix(y_test, y_predict) cnf_matrices.append(cnf_matrix) return metrics, histories, cnf_matrices
import numpy as np import pandas as pd # Import dataset df = pdf.read_csv('data.csv') X = df.iloc[:, :-1].values y = df.iloc[:, -1].values # Split into training and test sets from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Feature scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) # Apply LDA # By default, it reduces to k-1 components, # where k is the number of classes from sklearn.discriminant_analysis import LinearDiscriminantAnalysis lda = LinearDiscriminantAnalysis(n_components=None) X_train = lda.fit_transform(X_train, y_train) X_test = lda.transform(X_test) # LDA can classify data using posteriors y_pred = np.max(lda.decision_function(), 1)
spectraCal = np.delete(spectraCal, np.where(bbl == 0)[0], 1) # remove bad bands # Read in validation spectral files # libSpecValFile = libLocation + dateTag + '_transformed_spectral_library_validation_spectra.csv' libSpecValFile = libLocation + dateTag + '_spectral_library_validation_spectra.csv' spectraVal = np.loadtxt(libSpecValFile, dtype=object, delimiter=',') # Load in spectra - skips first line metaSpecVal = spectraVal[:, 0:5] # save first 5 columns of spectra separately spectraVal = np.delete(spectraVal, [0, 1, 2, 3, 4], 1) # remove the 5 columns of metadata in spectra spectraVal = spectraVal.astype(np.double) # convert from string array to double array spectraVal = np.nan_to_num(spectraVal) # there are values that are not finite, change them to zero spectraVal = np.delete(spectraVal, np.where(bbl == 0)[0], 1) # remove bad bands # Develop canonical discriminant variables clf = LinearDiscriminantAnalysis() # http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html#sklearn.discriminant_analysis.LinearDiscriminantAnalysis clf.fit(spectraCal, metaCal[:, 15].astype(np.int)) cdaDecision = clf.decision_function(spectraVal) cdaScore = clf.scalings_ cdaPredict = clf.predict(spectraVal) calCDA = clf.transform(spectraCal) valCDA = clf.transform(spectraVal) # Calculate results from CDA development regr = linear_model.LinearRegression() x = metaVal[:, 15].astype(np.int).reshape(len(metaVal[:, 15]), 1) y = cdaPredict.reshape(len(cdaPredict), 1) linearResults = regr.fit(x, y) # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html plt.scatter(metaVal[:, 15].astype(np.int), cdaPredict) plt.plot(metaVal[:, 15].astype(np.int), regr.predict(x)) plt.ylabel('Predicted') plt.xlabel('Observed') plt.title(dateTag)
compra_sim['durabilid'], compra_nao['durabilid']) ) print(stats.f_oneway( compra_sim['estilo'], compra_nao['estilo']) ) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis X = compra_xls[['durabilid', 'desempenh', 'estilo']] y = compra_xls['compra'] == 'sim' print(y) clf = LinearDiscriminantAnalysis() clf.fit(X, y) print(clf.decision_function(X)) print(clf.score(X, y)) y_ = clf.predict(X) print(clf) print(clf.score(X, y)) print(clf.coef_, clf.intercept_) comprapredic = pd.read_csv("comprapredic.csv", header=0, sep=";") X2 = comprapredic[['durabilid', 'desempenh', 'estilo']] clf.predict(X2) from sklearn.feature_selection import RFE
ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
def main(): # global file_exist, file1, file2, channelNum Data_path = "C:\\Users\\user\\Desktop\\Drone\\LDA\\Data\\" eegData_txt = Data_path + 'eegData.out' stims_txt = Data_path + 'stims.out' start_txt = Data_path + 'start.out' moveData_eeg = 'C:\\Users\\user\\Desktop\\Drone\\LDA\\Online\\eegData\\' moveData_stims = 'C:\\Users\\user\\Desktop\\Drone\\LDA\\Online\\stims\\' Classifier_path = "C:\\Users\\user\\Desktop\\Drone\\LDA\\Model\\" current_list2 = sorted(glob.glob(Classifier_path + '*.pickle'), key=os.path.getmtime, reverse=True) Classifier_real = current_list2[0] lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') lda = joblib.load(Classifier_real) serverSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) serverSock.bind(('', 12240)) serverSock.listen(0) connectionSock, addr = serverSock.accept() print(str(addr), '에서 접속이 확인되었습니다.') for i in range(0, 12): #load text file while True: if os.path.isfile(start_txt): break start_time = time.time() while (time.time() - start_time < 25): pass while True: if os.path.isfile(eegData_txt) & os.path.isfile(stims_txt): processing_time = time.time() os.remove(start_txt) eegData = np.loadtxt(eegData_txt, delimiter=",") stims = np.loadtxt(stims_txt, delimiter=",") ctime = datetime.today().strftime("%m%d_%H%M%S") moveData_e = moveData_eeg + ctime + 'eegData.out' moveData_s = moveData_stims + ctime + 'stims.out' shutil.move(eegData_txt, moveData_e) shutil.move(stims_txt, moveData_s) break print("got process") channelNum = 7 samplingFreq = 300 buttonNum = 7 ### Preprocessing process #Bandpass Filter eegData = butter_bandpass_filter(eegData, 0.1, 30, samplingFreq, 4) #Epoching epochSampleNum = int(np.floor(1.0 * samplingFreq)) offset = int(np.floor(0.0 * samplingFreq)) baseline = int(np.floor(1.0 * samplingFreq)) Epochs_Aver = np.zeros((buttonNum, channelNum, epochSampleNum)) resampleRate = 100 featureNum = channelNum * resampleRate Epochs_final = np.zeros((buttonNum, channelNum, resampleRate)) for i in range(buttonNum): Epochs_Aver[i] = Epoching(eegData, stims, (i + 1), samplingFreq, channelNum, epochSampleNum, offset, baseline) Epochs_final[i] = resampling(Epochs_Aver[i], resampleRate, channelNum) Features = Convert_to_FeatureVector(Epochs_Aver, buttonNum, featureNum) Answers = lda.decision_function(Features) answer = np.argmax(Answers) + 1 # np.savetxt(result_txt, answer) print("Process time: ", time.time() - processing_time) print("Result: ", answer) connectionSock.send(str(answer).encode("utf-8"))
# This call to fit using data is what really does the learning wc.fit(breast.data, breast.target) #%% print('') print('') print('And test de classifier') # and the call to predict is what gives the outputs as labels (as in the target field) pred = wc.predict(breast.data) print('') print('Prediction:\n{}'.format(pred[some])) # but there are other ways to obtain other outputs. In particular, a # continuous output can be obtained using decision_function # But in general you need to have a look at the documentation for each predictor scores = wc.decision_function(breast.data) print('') print('Scores (distances to the classifier)\n{}'.format(scores[some])) # in some cases, probabilities for each class can also be obtained probs = wc.predict_proba(breast.data) print('') print('Probabilities of being in each class:\n{}'.format(probs[some])) #%% # Accuracy # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html # Overall accuracy: ratio of properly classified samples Acc = metrics.accuracy_score(breast.target, pred) print('Overall accuracy is {}'.format(Acc))
y = esmFeatures[e][~np.isnan(esmFeatures[e])].values y = y == 3 if np.sum(y) / len(y) < 0.2 or np.sum(y) / len(y) > 0.8: continue print(np.sum(y) / len(y)) kf = KFold(n_splits=folds) weights = np.zeros((folds, x.shape[1])) scores = np.zeros((y.shape[0], 2)) aucs = [] for fold, (train, test) in enumerate(kf.split(x)): est.fit(x[train, :], y[train]) #scores[test,:] = est.predict_proba(x[test,:]) if hasattr(est, "predict_proba"): prob_pos = est.predict_proba(x[test, :]) #[:, 0] else: # use decision function prob_pos = est.decision_function(x[test, :]) #prob_pos = \ # (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) scores[test, :] = prob_pos #importances[fold,:] = est.feature_importances_ weights[fold, :] = est.coef_ aucs.append(roc_auc_score(y[test] == 1, scores[test, 0])) #scores = cross_val_score(est,x,y,cv=KFold(10)) auc = roc_auc_score(y == 1, scores[:, 0]) (fpr, tpr, treshs) = roc_curve(y == 1, scores[:, 0]) print('Participant %s has auc %f' % (sub, auc)) print(np.mean(aucs), np.std(aucs)) # Figure sns.set_context('paper') fig, ax = plt.subplots(figsize=(4, 4))
class LDA(object): def __init__(self, solver="svd", shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=1e-4): """ :param solver: string, 可选项,"svd","lsqr", "eigen"。 默认使用svd, 不计算协方差矩阵,适用于大量特征 的数据, 最小二乘 lsqr, 结合shrinkage 使用。 eigen 特征值分解, 集合shrinkage 使用 :param shrinkage: str/float 可选项,概率值,默认为None, "auto", 自动收缩, 0到1内的float, 固定的收缩参数 :param priors: array, optional, shape (n_classes,) 分类优先 :param n_components: # 分量数, 默认None, int, 可选项 :param store_covariance: bool, 可选项, 只用于”svd“ 额外计算分类协方差矩阵 :param tol: 浮点型,默认1e-4, 在svd 中,用于排序评估的阈值 """ self.model = LinearDiscriminantAnalysis( solver=solver, shrinkage=shrinkage, priors=priors, n_components=n_components, store_covariance=store_covariance, tol=tol) def fit(self, x, y): self.model.fit(X=x, y=y) def transform(self, x): return self.model.transform(X=x) def fit_transform(self, x, y): return self.model.fit_transform(X=x, y=y) def get_params(self, deep=True): return self.model.get_params(deep=deep) def set_params(self, **params): self.model.set_params(**params) def decision_function(self, x): self.model.decision_function(X=x) def predict(self, x): return self.model.predict(X=x) def predict_log_proba(self, x): return self.model.predict_log_proba(X=x) def predict_proba(self, x): return self.model.predict_proba(X=x) def score(self, x, y, sample_weight): return self.model.score(X=x, y=y, sample_weight=sample_weight) def get_attributes(self): # 生成模型之后才能获取相关属性值 coef = self.model.coef_ # 权重向量, intercept = self.model.intercept_ # 截距项 covariance = self.model.covariance_ # 协方差矩阵 explained_variance_ratio = self.model.explained_variance_ratio_ means = self.model.means_ priors = self.model.priors_ # 分类等级, 求和为1 shape (n_classes) scalings = self.model.scalings_ # shape(rank,n_classes-1). 缩放 xbar = self.model.xbar_ # 所有的均值 classes = self.model.classes_ # 分类标签 return coef, intercept, covariance, explained_variance_ratio, means, priors, scalings, xbar, classes
def compute( self, sample_df: pd.DataFrame, data_column: str, stimulus_type: str, method: str, method_kwargs: dict = None, use_scaler: bool = False, scaler_method: str = None, ): self.params = { 'data_column': data_column, 'stimulus_type': stimulus_type, 'method': method, 'method_kwargs': method_kwargs, 'use_scaler': use_scaler, 'scaler_method': scaler_method, } sample_df = sample_df.reset_index(drop=True) # Stimulus mapping dataframe stim_dataframe = sample_df.iloc[0]['stim_maps'][0][0][stimulus_type] # create an array where each frame is labelled with the stimulus name self.labels = np.empty( shape=(sample_df[data_column].iloc[0].shape), # shape is (n_frames,) dtype=f'<U{stim_dataframe.name.apply(len).max()}' # just unicode with length of longest stimulus name str ) # fill the array so each frame is labelled with the stimulus name for that frame for i, s in stim_dataframe.sort_values(by=['start'], ascending=True).iterrows(): self.labels[int(s['start']):int(s['end'])] = s['name'] self.input_data = np.vstack(sample_df[data_column].values).T if use_scaler: scaler = getattr(preprocessing, scaler_method)() X = scaler.fit_transform(self.input_data) else: X = self.input_data if method == 'PCA': self.pca = PCA(**method_kwargs) self.low_dim_data = self.pca.fit_transform(X) elif method == 'LDA': if 'store_covariance' in self.params['method_kwargs'].keys(): store_covariance = self.params['method_kwargs'].pop('method_kwargs') elif 'solver' in self.params['method_kwargs'].keys(): store_covariance = True if self.params['method_kwargs']['solver'] not in ['lsqr', 'eigen'] else False else: store_covariance=False lda = LinearDiscriminantAnalysis(**method_kwargs, store_covariance=store_covariance) self.low_dim_data = lda.fit_transform(X, self.labels) self.lda_means = lda.means_ if hasattr(lda, 'covariance_'): self.lda_covariance = lda.covariance_ self.lda_decision_function = lda.decision_function(X) return self
print "The misclassified item:", i Zx = [[5, 5, 5, 5], [3, 3, 3, 3]] # This is the item that I have made up with 4 features Z = np.array(Zx) # I have changed it as a numpy array print Z print lda.predict_log_proba( Z ) # This function returns posterior log-probabilities of classification according to each class on an array of test vectors X. print lda.predict_proba( Z ) # This function returns posterior probabilities of classification according to each class on an array of test vectors X. print lda.predict( Z) # This function does classification on an array of test vectors X. print lda.decision_function( Z ) # This function returns the decision function values related to each class on an array of test vectors X. print confusion_matrix(pred, y) # # print fit.score(X, y) # 96% of accuracy print accuracy_score( y, pred ) # the use of another function for calculating the accuracy (correct_predictions / all_predictions) # print accuracy_score(y, pred, normalize=False) # the number of correct predictions colors = ['navy', 'turquoise', 'darkorange'] lw = 2 plt.figure() for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_lda[y == i, 0], X_lda[y == i, 1],
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) lda.fit(X, y) # Quadratic Discriminant Analysis qda = QuadraticDiscriminantAnalysis(store_covariance=True) qda.fit(X, y) # class 0 and 1 : areas nx, ny = 200, 100 x_min, x_max = plt.xlim() y_min, y_max = plt.ylim() xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx), np.linspace(y_min, y_max, ny)) Z_LDA = lda.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z_LDA.shape = xx.shape ax = plt.subplot(2, 2, 2) ax.contourf(xx, yy, Z_LDA, cmap=cm_bright) ax.scatter(X0[:, 0], X0[:, 1], marker='.', color='red') ax.scatter(X1[:, 0], X1[:, 1], marker='.', color='blue') plt.title('LDA') Z_QDA = qda.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z_QDA.shape = xx.shape ax = plt.subplot(2, 2, 3) ax.contourf(xx, yy, Z_QDA, cmap=cm_bright) ax.scatter(X0[:, 0], X0[:, 1], marker='.', color='red') ax.scatter(X1[:, 0], X1[:, 1], marker='.', color='blue')
print('LDA prediction') # test accuracy on the test set train_set_df = pd.DataFrame(train_y_values) train_set_df['predicted'] = clf.predict(train_x_data.values) train_set_df[ 'wrong'] = train_set_df['predicted'] != train_set_df['age_group'] stats['lda']['train']['wrong'].append(train_set_df['wrong'].sum()) stats['lda']['train']['size'].append(train_set_df['wrong'].size) stats['lda']['train']['score'].append( clf.score(train_x_data, train_y_values)) # coefficent_df['svm']['coef'].append(clf.coef_) # coefficent_df['svm']['class'].append(clf.classes_) false_positive_rate, true_positive_rate, thresholds = roc_curve( train_y_values.values, clf.decision_function(train_x_data)) roc_auc = auc(false_positive_rate, true_positive_rate) stats['lda']['train']['roc_auc'].append(roc_auc) plt.subplot(2, 1, 1) plt.plot(false_positive_rate, true_positive_rate, 'b', label='Train AUC = %0.2f' % roc_auc) plt.title('Train set') print( 'LDA-Train - Total wrong predictions : {}, out of: {}, accuracy: {}, auc: {}' .format(train_set_df['wrong'].sum(), train_set_df['wrong'].size, clf.score(train_x_data, train_y_values), roc_auc)) # test accuracy on the test set test_set_df = pd.DataFrame(test_y_values) test_set_df['predicted'] = clf.predict(test_x_data.values)
print "done generating" X = np.array(arr1) y = np.array(arr2) del arr1 del arr2 print "done deleting arr1 arr2" start = timer() clf = LinearDiscriminantAnalysis() clf.fit(X, y) end = timer() del X del y print "Time it took to train:" print(end - start) print "Time it took to Predict:" test.append(randomarr()) test.append(randomarr()) test.append(randomarr()) test.append(randomarr()) start = timer() print "class prediction"; print(clf.predict(test)) end = timer() print(end - start) print "decision function value" #print test; print (clf.decision_function(test))
def lda_classify_datasets_demo(datasets): ''' Show LDA results on several datasets ''' figure = plt.figure(figsize=(8, 6)) i = 1 for ds in datasets: mesh_step = 0.2 x, y = ds # get dataset points # Split current dataset into training and test sets x = StandardScaler().fit_transform( x) # set mean=0 and set var=1 for input values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.4) x_min, x_max = x_test[:, 0].min() - .5, x_test[:, 0].max() + .5 y_min, y_max = x_test[:, 1].min() - .5, x_test[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step), np.arange(y_min, y_max, mesh_step)) # On top, show dataset only cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) ax = plt.subplot(len(datasets), 2, i) ax.scatter(x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xticks(()) ax.set_yticks(()) # Below that, show results of classifer on dataset classifier = LDA() classifier.fit(x_train, y_train) if hasattr(classifier, "decision_function"): Z = classifier.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = classifier.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot i += 1 ax = plt.subplot(len(datasets), 2, i) Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) ax.scatter(x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xticks(()) ax.set_yticks(()) i += 1 # Plot datasets plt.show()
eyeglass_idx = i print(pant_val) print(eyeglass_val) return pant_idx, eyeglass_idx def plot_image(img): plt.imshow(img.reshape(row, col), cmap='gray') plt.show() return # Find and plot the most incorrectly classified object in test set test_pred_val = lda.decision_function(X_pca_test) pant_idx, eyeglass_idx = find_most_incorrectly_classified( test_pred_val, test_label_2c) print(test_label_2c[pant_idx]) print(test_label_2c[eyeglass_idx]) plot_image(x_test[pant_idx]) plot_image(x_test[eyeglass_idx])
class FBCSP(object): def __init__(self, sample_rate, feat_sel_proportion=0.8, low_cut_hz = 4, high_cut_hz = 36, step = 4, csp_components = 4 ): self.low_cut_hz = low_cut_hz self.high_cut_hz = high_cut_hz self.step = step self.sample_rate = sample_rate self.csp_component = csp_components self.feat_proportion = feat_sel_proportion self.csp_bank = dict() self.low = dict() self.high = dict() self.n_bank = (self.high_cut_hz - self.low_cut_hz)//self.step self.n_feat = int(self.n_bank*self.csp_component*self.feat_proportion) for i in range(self.n_bank): self.low[i] = self.low_cut_hz+i*self.step self.high[i] = self.low_cut_hz+i*self.step+self.step if (self.high_cut_hz - self.high[i]) < self.step: self.high[i] = self.high_cut_hz def fit(self, data, label): data_bank = dict() for i in range(self.n_bank): # get each freq filter bank data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate) # extract csp feature for each bank self.csp_bank[i] = CSP(n_components=self.csp_component, reg=None, log=True, norm_trace=False) self.csp_bank[i].fit(data_bank[i], label) def transform(self, data): data_bank = dict() csp_feat = dict() for i in range(self.n_bank): # get each freq filter bank data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate) # extract csp feature for each bank csp_feat[i] = self.csp_bank[i].transform(data_bank[i]) try: feature except NameError: feature = csp_feat[i] else: feature = np.hstack([feature, csp_feat[i]]) return feature def fit_transform(self, data, label): data_bank = dict() csp_feat = dict() for i in range(self.n_bank): # get each freq filter bank data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate) # extract csp feature for each bank self.csp_bank[i] = CSP(n_components=4, reg=None, log=True, norm_trace=False) self.csp_bank[i].fit(data_bank[i], label) csp_feat[i] = self.csp_bank[i].transform(data_bank[i]) try: feature except NameError: feature = csp_feat[i] else: feature = np.hstack([feature, csp_feat[i]]) return feature def bank_filter(self, data, low_cut_hz, high_cut_hz, sample_rate): n_trial = data.shape[0] n_channel = data.shape[1] n_length = data.shape[2] data_bank = [] for i in range(n_trial): data_bank += [np.array([butter_bandpass_filter(data[i, j, :], low_cut_hz, high_cut_hz, sample_rate, pass_type = 'band', order=6) for j in range(n_channel)])] return np.array(data_bank) def classifier_fit(self, feature, label): # feature selection self.MI_sel = SelectPercentile(mutual_info_classif, percentile=self.feat_proportion*100) self.MI_sel.fit(feature, label) new_feat = self.MI_sel.transform(feature) # classification self.clf = LinearDiscriminantAnalysis() self.clf.fit(new_feat, label) def classifier_transform(self, feature): # feature selection new_feat = self.MI_sel.transform(feature) # classification return self.clf.transform(new_feat) def evaluation(self, feature, label): # feature selection new_feat = self.MI_sel.transform(feature) # accuracy accuracy = self.clf.score(new_feat, label) # f1 f1 = dict() pred = self.clf.predict(new_feat) f1["micro"] = f1_score(y_true = label, y_pred = pred, average='micro') f1["macro"] = f1_score(y_true = label, y_pred = pred, average='macro') # auc pred_posi = self.clf.decision_function(new_feat) lb = LabelBinarizer() test_y = lb.fit_transform(label) roc_auc = self.multiclass_roc_auc_score(y_true = test_y, y_score = pred_posi) return accuracy, f1, roc_auc def multiclass_roc_auc_score(self, y_true, y_score): assert y_true.shape == y_score.shape fpr = dict() tpr = dict() roc_auc = dict() n_classes = y_true.shape[1] # compute ROC curve and ROC area for each class for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) return roc_auc
delimiter=',') # Load in spectra - skips first line metaSpecVal = spectraVal[:, 0:5] # save first 5 columns of spectra separately spectraVal = np.delete(spectraVal, [0, 1, 2, 3, 4], 1) # remove the 5 columns of metadata in spectra spectraVal = spectraVal.astype( np.double) # convert from string array to double array spectraVal = np.nan_to_num( spectraVal) # there are values that are not finite, change them to zero spectraVal = np.delete(spectraVal, np.where(bbl == 0)[0], 1) # remove bad bands # Develop canonical discriminant variables clf = LinearDiscriminantAnalysis( ) # http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html#sklearn.discriminant_analysis.LinearDiscriminantAnalysis clf.fit(spectraCal, metaCal[:, 15].astype(np.int)) cdaDecision = clf.decision_function(spectraVal) cdaScore = clf.scalings_ cdaPredict = clf.predict(spectraVal) calCDA = clf.transform(spectraCal) valCDA = clf.transform(spectraVal) # Calculate results from CDA development regr = linear_model.LinearRegression() x = metaVal[:, 15].astype(np.int).reshape(len(metaVal[:, 15]), 1) y = cdaPredict.reshape(len(cdaPredict), 1) linearResults = regr.fit( x, y ) # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html plt.scatter(metaVal[:, 15].astype(np.int), cdaPredict) plt.plot(metaVal[:, 15].astype(np.int), regr.predict(x)) plt.ylabel('Predicted')
# train the model clf.fit(x_train, y_train) # looking at the attributes coef = clf.coef_ intercept = clf.intercept_ #covariance_mat = clf.covariance_ # gives the covariance matrix, does not work for the solver 'svd' perc_vari = clf.explained_variance_ratio_ means = clf.means_ priors = clf.priors_ scalings = clf.scalings_ overall_mean = clf.xbar_ classes = clf.classes_ # looking at the methods decison_function = clf.decision_function(x_test) fit_transform = clf.fit_transform(x_test, y_test) get_params = clf.get_params() prediction = clf.predict(x_test) predict_log_proba = clf.predict_log_proba(x_test) predict_proba = clf.predict_proba(x_test) mean_accuracy_train = clf.score(x_train, y_train) mean_accuracy_test = clf.score(x_test, y_test) transform = clf.transform(x_test) print( 'The mean accuracy of the train dataset is: %.3f and the mean accuracy of the test dataset is: %.3f' % (mean_accuracy_train, mean_accuracy_test)) pdb.set_trace()