Exemple #1
0
class LinearDiscriminantAnalysisImpl():

    def __init__(self, solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001):
        self._hyperparams = {
            'solver': solver,
            'shrinkage': shrinkage,
            'priors': priors,
            'n_components': n_components,
            'store_covariance': store_covariance,
            'tol': tol}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
Exemple #2
0
def intersubjective_shallow(data, model_name):

    x_train, y_train, x_test, y_test, o_t_test, o_tr_test = data

    x_train, y_train, x_test, y_test = resample_transform(
        (x_train, y_train, x_test, y_test), resample=False)

    global t_test
    t_test = o_t_test
    global tr_test
    tr_test = o_tr_test
    x_train = x_train.reshape(x_train.shape[0], -1)
    x_test = x_test.reshape(x_test.shape[0], -1)
    m = [
        'acc', 'val_acc', 'val_precisions', 'val_recalls', 'val_f1s',
        'val_aucs', 'val_balanced_acc', 'val_recognition_acc', 'val_bpm'
    ]
    metrics = {key: [] for key in m}
    history = False
    if 'svm' in model_name:
        clf = svm.LinearSVC(random_state=0)
    elif 'lda' in model_name:
        if 'shrinkage' in model_name:
            clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
        else:
            clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None)
    clf.fit(x_train, y_train)
    y_predict = clf.predict(x_test)
    probs = clf.decision_function(x_test)
    metrics['acc'].append(clf.score(x_train, y_train))
    metrics = compute_metrics(metrics, probs, y_predict, y_test)
    cnf_matrix = confusion_matrix(y_test, y_predict)

    return metrics, history, cnf_matrix, clf
Exemple #3
0
class LDADecoder(ERPDecoder):
    """
    A basic EM decoder. Not to be used in online experiments.
    There the OnlineUnsupervsedEM decoder should be used
    """
    def __init__(self, n_stimuli, x, y):
        self.n_stimuli = n_stimuli
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
        # The -1 is to not use the bias, LDA does this for us
        self.preprocess = lambda x: flatten_normalise_bias(x)[:, :-1]
        self.clf = LDA(solver='lsqr', shrinkage='auto')
        self.clf.fit(self.preprocess(x), y)
        self.eeg = []
        self.stimuli = []

    def add_trial(self, x, s):
        """

        :return:
        """
        self.eeg.append(self.preprocess(x))
        self.stimuli.append(s)

    def predict_all_trials(self):
        output = []
        for x, s in zip(self.eeg, self.stimuli):
            outputs = np.zeros(self.n_stimuli)
            for idx in range(self.n_stimuli):
                eeg = np.array([xx for xx, ss in zip(x, s) if idx in ss])
                if eeg.shape == 0:
                    outputs[idx] = -np.inf
                else:
                    outputs[idx] = self.apply_single_stimulus(eeg).mean()
            output.append(np.argmax(outputs))
        return np.array(output)

    def apply_single_stimulus(self, x):
        return self.clf.decision_function(flatten_normalise_bias(x)[:, :-1])
class LinearDiscriminantAnalysisImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
    if k < n_samples_integrate - 1:  # beginning of data stream (not yet enough samples)
        signal_tmp = test_filt_data[:, :k + 1]

    else:  # enough samples collected (n_samples_integrate)
        signal_tmp = test_filt_data[:, k - (n_samples_integrate - 1):k + 1]

    # format data
    signal_tmp = np.expand_dims(
        signal_tmp, 0
    )  # dimensions for CSP: epochs x channels x samples (1,24,n_samples_integrate)

    # apply CSP filters + LDA
    fea_tmp = csp.transform(signal_tmp)
    #pred_tmp = lda.predict(fea_tmp)
    pred_tmp = lda.decision_function(fea_tmp)

    # put in array for prediction values
    #pred_cont.append(list(pred_tmp))
    #pred_cont = pred_cont[-n_output_integrate:] # only keep last values in buffer
    pred_buffer[:n_output_integrate] = pred_buffer[
        -n_output_integrate:]  # shift to the left by one
    pred_buffer[n_output_integrate] = pred_tmp  # add prediction of this loop

    # alternative: low-pass filter for buffer lfilter with zi // initiliaze with lfilter_zi
    cl_out_cont[k], zi_previous = lfilter(b,
                                          a,
                                          pred_tmp,
                                          axis=-1,
                                          zi=zi_previous)
Exemple #6
0
def precision(f_pos, t_pos):
    return t_pos / (t_pos + f_pos)


cobertura(f_neg, t_pos)

# In[22]:

#sklearn.discriminant_analysis.LinearDiscriminantAnalysis(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=True, tol=0.0001)
clf = LinearDiscriminantAnalysis()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf.fit(X, y)
print(clf.predict([[-0.8, -1]]))
clf.decision_function(X)

# In[11]:


def f1(f_pos, f_neg, t_pos, t_neg):
    return 2 * precision(f_pos, t_pos) * cobertura(
        f_neg, t_pos) / (precision(f_pos, t_pos) + cobertura(f_neg, t_pos))


# In[ ]:

matriz = [[t_pos, f_neg], [f_pos, t_neg]]
plt.imshow(matriz)
plt.savefig("matriz.png")
Exemple #7
0
model = RandomForestClassifier(n_estimators=10, max_depth=3)
print "Random Forest"
test_model(model)

model_lda = LinearDiscriminantAnalysis()
print "LDA"
test_model(model_lda)

use_prediction = False
raw_test_data, test_labels = readDataMultipleFiles([3])
test_data_matrix, test_data_matrices, test_labels, test_labels_binary = buildMatricesAndLabels(raw_test_data, test_labels, scaling_functions)
test_predictions = []
for features in test_data_matrix:
    if not use_prediction:
        test_predictions.append(model_lda.decision_function([features])[0])  # score for classes_[1]
    else:
        test_predictions.append(model_lda.predict_proba([features])[0])

for i in range(target_count):
    print sum(test_labels_binary[i])

thresholds_for_bci = multiclassRoc(test_predictions, test_labels_binary)

# model = SVC(C=1000, kernel="poly", degree=2)
# print "SVM"
# test_model(model)

# pickle.Pickler(file("U:\\data\\test\\5_targets\\model0.pkl", "w")).dump(model_lda)
# pickle.Pickler(file("U:\\data\\test\\5_targets\\model0_mm.pkl", "w")).dump(min_max)
# pickle.Pickler(file("U:\\data\\test\\5_targets\\model0_thresh.pkl", "w")).dump(thresholds_for_bci)
    # comparing results of prediction
    np.testing.assert_equal(predicted_training_online, predicted_training_offline)
    np.testing.assert_equal(predicted_training_online, predicted_training_sklearn)
    np.testing.assert_equal(predicted_testing_online, predicted_testing_offline)
    np.testing.assert_equal(predicted_testing_online, predicted_testing_sklearn)
    # comparing posterior probabilities
    np.testing.assert_equal(classifier_online.prob_classes, classifier_offline.prob_classes)
    np.testing.assert_equal(classifier_online.prob_classes, classifier_sklearn.priors_)
    # comparing total covariance matrix
    np.testing.assert_almost_equal(classifier_online.total_covariance(), classifier_offline.total_covariance())
    np.testing.assert_almost_equal(classifier_online.total_covariance(), classifier_sklearn.covariance_)
    np.testing.assert_almost_equal(classifier_offline.total_covariance(), classifier_sklearn.covariance_)
    # comparing means
    np.testing.assert_almost_equal(classifier_online.means_by_class(), classifier_offline.means_by_class())
    np.testing.assert_almost_equal(classifier_online.means_by_class(), classifier_sklearn.means_)

    # simplify due to specific of sklearn
    if len(np.unique(y)) > 2:
        np.testing.assert_almost_equal(classifier_online.coef(), classifier_offline.coef())
        np.testing.assert_almost_equal(classifier_online.coef(), classifier_sklearn.coef_)
        np.testing.assert_almost_equal(classifier_online.intercept(), classifier_offline.intercept())
        np.testing.assert_almost_equal(classifier_online.intercept(), classifier_sklearn.intercept_, decimal=5)
        np.testing.assert_almost_equal(classifier_online.scores, classifier_offline.scores)
        np.testing.assert_almost_equal(classifier_online.scores, classifier_sklearn.decision_function(X_test), decimal=5)






# Create classifier object and train
# Add code here to include other claassifiers (MLP, BDT,...)
clf = LDA()
clf.fit(X_train, y_train)

# Evaluate accuracy using the test data.
# If available, use the decision function, else (e.g. for MLP) use predict_proba
# Adjust threshold value tCut or pMin as appropriate

X_bkg_test = X_test[y_test == 0]
X_sig_test = X_test[y_test == 1]
y_bkg_test = y_test[y_test == 0]
y_sig_test = y_test[y_test == 1]
if hasattr(clf, "decision_function"):
    tCut = 0.
    y_bkg_pred = (clf.decision_function(X_bkg_test) >= tCut).astype(bool)
    y_sig_pred = (clf.decision_function(X_sig_test) >= tCut).astype(bool)
else:
    pMin = 0.9
    y_bkg_pred = (clf.predict_proba(X_bkg_test)[:, 1] >= pMin).astype(bool)
    y_sig_pred = (clf.predict_proba(X_sig_test)[:, 1] >= pMin).astype(bool)

power = metrics.accuracy_score(y_sig_test,
                               y_sig_pred)  # = = Prob(t >= tCut|sig)
print('power of test with respect to signal = ', power)

#  Add code here to obtain the background efficiency
# = size of test alpha = = Prob(t >= tCut|bkg)

# make a scatter plot
fig, ax = plt.subplots(1, 1)
Exemple #10
0
class LDA(CtrlNode):
    """Linear Discriminant Analysis, uses sklearn"""
    nodeName = "LDA"
    uiTemplate = [('train_data', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection,
                                                 'toolTip': 'Column containing the training data'}),
                  ('train_labels', 'combo', {'toolTip': 'Column containing training labels'}),
                  ('solver', 'combo', {'items': ['svd', 'lsqr', 'eigen']}),
                  ('shrinkage', 'combo', {'items': ['None', 'auto', 'value']}),
                  ('shrinkage_val', 'doubleSpin', {'min': 0.0, 'max': 1.0, 'step': 0.1, 'value': 0.5}),
                  ('n_components', 'intSpin', {'min': 2, 'max': 1000, 'step': 1, 'value': 2}),
                  ('tol', 'intSpin', {'min': -50, 'max': 0, 'step': 1, 'value': -4}),
                  ('score', 'lineEdit', {}),
                  ('predict_on', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection,
                                                 'toolTip': 'Data column of the input "predict" Transmission\n'
                                                            'that is used for predicting from the model'}),
                  ('Apply', 'check', {'applyBox': True, 'checked': False})
                  ]

    def __init__(self, name, **kwargs):
        CtrlNode.__init__(self, name, terminals={'train': {'io': 'in'},
                                                 'predict': {'io': 'in'},

                                                 'T': {'io': 'out'},
                                                 'coef': {'io': 'out'},
                                                 'means': {'io': 'out'},
                                                 'predicted': {'io': 'out'}
                                                 },
                          **kwargs)
        self.ctrls['score'].setReadOnly(True)

    def process(self, **kwargs):
        return self.processData(**kwargs)

    def processData(self, train: Transmission, predict: Transmission):
        self.t = train.copy()  #: Transmisison instance containing the training data with the labels
        if predict is not None:
            self.to_predict = predict.copy()  #: Transmission instance containing the data to predict after fitting on the the training data

        dcols, ccols, ucols = organize_dataframe_columns(self.t.df.columns)

        self.ctrls['train_data'].setItems(dcols)
        self.ctrls['train_labels'].setItems(ccols)

        if predict is not None:
            pdcols, ccols, ucols = organize_dataframe_columns(self.to_predict.df.columns)
            self.ctrls['predict_on'].setItems(pdcols)

        if not self.apply_checked():
            return

        train_columns = self.ctrls['train_data'].getSelectedItems()
        labels = self.ctrls['train_labels'].currentText()

        solver = self.ctrls['solver'].currentText()

        shrinkage = self.ctrls['shrinkage'].currentText()
        if shrinkage == 'value':
            shrinkage = self.ctrls['shrinkage_val'].value()
        elif shrinkage == 'None':
            shrinkage = None

        n_components = self.ctrls['n_components'].value()
        tol = 10 ** self.ctrls['tol'].value()

        store_covariance = True if solver == 'svd' else False

        params = {'train_data': train_columns,
                  'train_labels': labels,
                  'solver': solver,
                  'shrinkage': shrinkage,
                  'n_components': n_components,
                  'tol': tol,
                  'store_covariance': store_covariance
                  }

        kwargs = params.copy()
        kwargs.pop('train_data')
        kwargs.pop('train_labels')
        self.lda = LinearDiscriminantAnalysis(**kwargs)

        # Make an array of all the data from the selected columns
        self.X = np.hstack([np.vstack(self.t.df[train_column]) for train_column in train_columns])
        self.y = self.t.df[labels]

        self.X_ = self.lda.fit_transform(self.X, self.y)

        self.t.df['_LDA_TRANSFORM'] = self.X_.tolist()
        self.t.df['_LDA_TRANSFORM'] = self.t.df['_LDA_TRANSFORM'].apply(np.array)

        params.update({'score': self.lda.score(self.X, self.y),
                       'classes': self.lda.classes_.tolist()
                       })

        self.ctrls['score'].setText(f"{params['score']:.4f}")

        self.t.history_trace.add_operation('all', 'lda', params)

        self.t.df['_LDA_DFUNC'] = self.lda.decision_function(self.X).tolist()

        coef_df = pd.DataFrame({'classes': self.lda.classes_, '_COEF': self.lda.coef_.tolist()})
        t_coef = Transmission(df=coef_df, history_trace=self.t.history_trace)

        means_df = pd.DataFrame({'classes': self.lda.classes_, '_MEANS': self.lda.means_.tolist()})
        t_means = Transmission(df=means_df, history_trace=self.t.history_trace)

        out = {'T': self.t, 'coef': t_coef, 'means': t_means, 'predicted': None}

        # Predict using the trained model
        predict_columns = self.ctrls['predict_on'].getSelectedItems()

        if not predict_columns:
            return out

        if predict_columns != train_columns:
            QtWidgets.QMessageBox.warning('Predict and Train columns do not match',
                                          'The selected train and predict columns are different')

        predict_data = np.hstack([np.vstack(self.to_predict.df[predict_column]) for predict_column in predict_columns])
        self.to_predict.df['LDA_PREDICTED_LABELS'] = self.lda.predict(predict_data)
        self.to_predict.df['_LDA_TRANSFORM'] = self.lda.transform(predict_data).tolist()
        self.to_predict.df['_LDA_TRANSFORM'] = self.to_predict.df['_LDA_TRANSFORM'].apply(np.array)

        params_predict = params.copy()
        params_predict.update({'predict_columns': predict_columns})

        self.to_predict.history_trace.add_operation('all', 'lda-predict', params_predict)

        out.update({'predicted': self.to_predict})

        return out
Exemple #11
0
def cross_validator(data,
                    subject,
                    n_splits=5,
                    epochs=10,
                    lr=0.0003,
                    batch_size=64,
                    model_name="",
                    model_config={
                        'bn': True,
                        'dropout': True,
                        'branched': True,
                        'nonlinear': 'tanh'
                    },
                    early_stopping=True,
                    use_deep_features=False,
                    patience=10):

    if model_name.startswith('deep'):

        metrics = []
        histories = []
    else:
        m = [
            'acc', 'val_acc', 'val_precisions', 'val_recalls', 'val_f1s',
            'val_aucs', 'val_balanced_acc', 'val_recognition_acc', 'val_bpm'
        ]
        metrics = {key: [] for key in m}
        histories = False

    cnf_matrices = []
    x = data[0]
    y = data[1]
    t = data[2]
    tr = data[3]

    if use_deep_features:
        path = './models/subjects/'
        load_model_name = 'deep_subjective_branched_250_thesis1'
        files = [
            f for f in listdir(join(path, subject))
            if isfile(join(path, subject, f))
        ]
        myfiles = [file for file in files if load_model_name in file]
        myfiles.sort()
#    skf = StratifiedKFold(n_splits=5)
#    for train, test in skf.split(x, y):

#    sss = StratifiedShuffleSplit(n_splits=1, test_size=.1, random_state=0)
#    for train, test in sss.split(x, y):

    for i, (train, test) in enumerate(cv_splitter(x, n_splits=n_splits)):

        if use_deep_features:
            base_model = load_model(join(path, subject, myfiles[i]))
            model = Model(inputs=base_model.input,
                          outputs=base_model.layers[-2].output)

#        print train
#        print test
#        continue
        global y_test
        x_tv, x_test, y_tv, y_test = x[train], x[test], y[train], y[test]

        global t_test
        t_test = t[test]
        global tr_test
        tr_test = tr[test]

        if model_name.startswith('deep') and early_stopping:
            x_train, x_valid, y_train, y_valid = train_test_split(
                x_tv, y_tv, stratify=y_tv, random_state=42, test_size=0.2)
        else:
            x_train = x_tv
            y_train = y_tv
            if use_deep_features:
                x_train, y_train, x_test, y_test = resample_transform(
                    (x_train, y_train, x_test, y_test), resample=False)
                x_train = model.predict(x_train)
                x_test = model.predict(x_test)
        # standarization of the data
        # computing the mean and std on the training data
#        scalar = StandardScaler(with_mean=False)
##         mus = []
#        stds = []
#        trials_no = x_train.shape[0]
#        for i in range(trials_no):
#            scalar.fit(x_train[i])
##             mu = scalar.mean_
#            std = scalar.scale_
##             mus.append(mu)
#            stds.append(std)
#        #scalar.fit(x_train.reshape((x_train.shape[0]*x_train.shape[1], x_train.shape[2])))
#
#        # tranbsforming the training data
##         scalar.mean_ = np.mean(mus, axis=0)
#        scalar.scale_ = np.mean(stds, axis=0)
#        normalized_x_train = np.empty_like(x_train)
#        for i in range(trials_no):
#            temp = scalar.transform(x_train[i])
#            normalized_x_train[i] = temp
#
#        # transforming the test data
#        normalized_x_test = np.empty_like(x_test)
#        trials_no = x_test.shape[0]
#        for i in range(trials_no):
#            temp = scalar.transform(x_test[i])
#            normalized_x_test[i] = temp

#        normalized_x_train = x_train
#        normalized_x_test = x_test

#standarization
        scalar = StandardScaler(with_mean=True)
        scalar.fit(x_train.reshape(x_train.shape[0], -1))
        x_train = scalar.transform(x_train.reshape(x_train.shape[0],
                                                   -1)).reshape(x_train.shape)
        x_test = scalar.transform(x_test.reshape(x_test.shape[0],
                                                 -1)).reshape(x_test.shape)
        if model_name.startswith('deep') and early_stopping:
            x_valid = scalar.transform(x_valid.reshape(
                x_valid.shape[0], -1)).reshape(x_valid.shape)

#        x_train_reshaped = x_train.reshape(x_train.shape[0],-1)
#        x_test_reshaped = x_test.reshape(x_test.shape[0], -1)
#        mins = np.min(x_train_reshaped , axis=0)
#        maxs = np.max(x_train_reshaped, axis=0)
#        normalized_x_train = 2*(x_train_reshaped-mins)/(maxs-mins)-1
#        normalized_x_test = 2*(x_test_reshaped-mins)/(maxs-mins)-1
#        normalized_x_train = np.reshape(normalized_x_train, x_train.shape)
#        normalized_x_test = np.reshape(normalized_x_test, x_test.shape)
##

#resampling the data

        if model_name.startswith('deep'):
            n_samples, timepoints, channels = x_train.shape
            x_train = np.reshape(x_train, (n_samples, timepoints * channels))
            ros = RandomOverSampler(random_state=0)
            x_res, y_res = ros.fit_sample(x_train, y_train)
            x_train = np.reshape(x_res, (x_res.shape[0], timepoints, channels))
            y_train = y_res

        x_train = np.expand_dims(x_train, axis=3)
        global x_test
        x_test = np.expand_dims(x_test, axis=3)
        if model_name.startswith('deep') and early_stopping:
            x_valid = np.expand_dims(x_valid, axis=3)

#        c = compute_class_weight('balanced', [0, 1], y)
#        class_weight = {0:c[0],1:c[1]}
#        print class_weight
#         pdb.set_trace()
#compiling the model
        if model_name.startswith('deep'):
            if 'branched' in model_name:
                if '250' in model_name:
                    path = './models/subjects/'
                    load_model_name = 'deep_intersubjective_branched_250_thesis2_2'
                    files = [
                        f for f in listdir(join(path, subject))
                        if isfile(join(path, subject, f))
                    ]
                    myfiles = [
                        file for file in files if load_model_name in file
                    ]
                    model = load_model(join(path, subject, myfiles[0]))

#                    model = branched2(x.shape, model_config=model_config, f=5)
                else:
                    path = './models/subjects/'
                    load_model_name = 'deep_intersubjective_branched_50_avg_thesis2_2'
                    files = [
                        f for f in listdir(join(path, subject))
                        if isfile(join(path, subject, f))
                    ]
                    myfiles = [
                        file for file in files if load_model_name in file
                    ]
                    model = load_model(join(path, subject, myfiles[0]))

#                    model = branched2(x.shape, model_config=model_config, f=1)
            elif 'eegnet' in model_name:
                if '250' in model_name:
                    model = create_eegnet(x.shape, f=4)
                else:
                    model = create_eegnet(x.shape, f=1)
            elif 'cnn' in model_name:
                if '250' in model_name:
                    model = create_cnn(x.shape, f=5)
                else:
                    model = create_cnn(x.shape, f=1)


#            opt = Adam(lr=lr)
            opt = SGD(lr=1e-4, momentum=0.9)
            lrate = LearningRateScheduler(step_decay)

            model.compile(loss='binary_crossentropy',
                          optimizer=opt,
                          metrics=['accuracy'])

            m = Metrics()
            reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                          factor=0.5,
                                          patience=int(patience / 2),
                                          min_lr=0)
            early_stop = EarlyStopping(monitor='val_loss',
                                       min_delta=0.0001,
                                       patience=patience,
                                       verbose=0,
                                       mode='auto')
            mod_path = './models/subjects/' + subject
            timestr = time.strftime("%Y%m%d-%H%M")

            checkpointer = ModelCheckpoint(filepath=mod_path + '/best_' +
                                           model_name + '_' + timestr,
                                           monitor='val_loss',
                                           verbose=1,
                                           save_best_only=True)
            if early_stopping:
                history = model.fit(
                    x_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    shuffle=True,
                    verbose=2,
                    validation_data=(x_valid, y_valid),
                    callbacks=[m, early_stop, checkpointer, reduce_lr],
                )
            else:
                history = model.fit(
                    x_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    shuffle=True,
                    verbose=2,
                    validation_data=(x_test, y_test),
                    callbacks=[m],
                )

            metrics.append(m)
            histories.append(history)

            probabilities = model.predict(x_test,
                                          batch_size=batch_size,
                                          verbose=0)
            y_predict = [(round(k)) for k in probabilities]
        else:
            x_train = np.reshape(x_train, (x_train.shape[0], -1))
            x_test = np.reshape(x_test, (x_test.shape[0], -1))
            if 'svm' in model_name:
                clf = svm.LinearSVC(random_state=4)
            elif 'lda' in model_name:
                if 'shrinkage' in model_name:
                    clf = LinearDiscriminantAnalysis(solver='lsqr',
                                                     shrinkage='auto')
                else:
                    clf = LinearDiscriminantAnalysis(solver='lsqr',
                                                     shrinkage=None)
            clf.fit(x_train, y_train)
            y_predict = clf.predict(x_test)
            if 'svm' in model_name:
                probs = clf.decision_function(x_test)
            elif 'lda' in model_name:
                probs = clf.decision_function(x_test)
            metrics['acc'].append(clf.score(x_train, y_train))
            metrics = compute_metrics(metrics, probs, y_predict, y_test)

        cnf_matrix = confusion_matrix(y_test, y_predict)
        cnf_matrices.append(cnf_matrix)
    return metrics, histories, cnf_matrices
Exemple #12
0
import numpy as np
import pandas as pd

# Import dataset
df = pdf.read_csv('data.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split into training and test sets
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Feature scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

# Apply LDA
# By default, it reduces to k-1 components,
# where k is the number of classes
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=None)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

# LDA can classify data using posteriors
y_pred = np.max(lda.decision_function(), 1)
spectraCal = np.delete(spectraCal, np.where(bbl == 0)[0], 1)  # remove bad bands

# Read in validation spectral files
# libSpecValFile = libLocation + dateTag + '_transformed_spectral_library_validation_spectra.csv'
libSpecValFile = libLocation + dateTag + '_spectral_library_validation_spectra.csv'
spectraVal = np.loadtxt(libSpecValFile, dtype=object, delimiter=',')  # Load in spectra - skips first line
metaSpecVal = spectraVal[:, 0:5]  # save first 5 columns of spectra separately
spectraVal = np.delete(spectraVal, [0, 1, 2, 3, 4], 1)  # remove the 5 columns of metadata in spectra
spectraVal = spectraVal.astype(np.double)  # convert from string array to double array
spectraVal = np.nan_to_num(spectraVal)  # there are values that are not finite, change them to zero
spectraVal = np.delete(spectraVal, np.where(bbl == 0)[0], 1)  # remove bad bands

# Develop canonical discriminant variables
clf = LinearDiscriminantAnalysis()  # http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html#sklearn.discriminant_analysis.LinearDiscriminantAnalysis
clf.fit(spectraCal, metaCal[:, 15].astype(np.int))
cdaDecision = clf.decision_function(spectraVal)
cdaScore = clf.scalings_
cdaPredict = clf.predict(spectraVal)
calCDA = clf.transform(spectraCal)
valCDA = clf.transform(spectraVal)

# Calculate results from CDA development
regr = linear_model.LinearRegression()
x = metaVal[:, 15].astype(np.int).reshape(len(metaVal[:, 15]), 1)
y = cdaPredict.reshape(len(cdaPredict), 1)
linearResults = regr.fit(x, y)  # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
plt.scatter(metaVal[:, 15].astype(np.int), cdaPredict)
plt.plot(metaVal[:, 15].astype(np.int), regr.predict(x))
plt.ylabel('Predicted')
plt.xlabel('Observed')
plt.title(dateTag)
        compra_sim['durabilid'],
        compra_nao['durabilid'])
     )

print(stats.f_oneway(
        compra_sim['estilo'],
        compra_nao['estilo'])
     )

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X = compra_xls[['durabilid', 'desempenh', 'estilo']]
y = compra_xls['compra'] == 'sim'
print(y)
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
print(clf.decision_function(X))
print(clf.score(X, y))

y_ = clf.predict(X)

print(clf)
print(clf.score(X, y))
print(clf.coef_, clf.intercept_)


comprapredic = pd.read_csv("comprapredic.csv", header=0, sep=";")
X2 = comprapredic[['durabilid', 'desempenh', 'estilo']]
clf.predict(X2)


from sklearn.feature_selection import RFE
Exemple #15
0
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, m_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # Plot also the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
        # and testing points
        ax.scatter(X_test[:, 0],
                   X_test[:, 1],
                   c=y_test,
                   cmap=cm_bright,
                   alpha=0.6)
def main():
    #        global file_exist, file1, file2, channelNum
    Data_path = "C:\\Users\\user\\Desktop\\Drone\\LDA\\Data\\"
    eegData_txt = Data_path + 'eegData.out'
    stims_txt = Data_path + 'stims.out'
    start_txt = Data_path + 'start.out'
    moveData_eeg = 'C:\\Users\\user\\Desktop\\Drone\\LDA\\Online\\eegData\\'
    moveData_stims = 'C:\\Users\\user\\Desktop\\Drone\\LDA\\Online\\stims\\'

    Classifier_path = "C:\\Users\\user\\Desktop\\Drone\\LDA\\Model\\"

    current_list2 = sorted(glob.glob(Classifier_path + '*.pickle'),
                           key=os.path.getmtime,
                           reverse=True)
    Classifier_real = current_list2[0]
    lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
    lda = joblib.load(Classifier_real)

    serverSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    serverSock.bind(('', 12240))
    serverSock.listen(0)
    connectionSock, addr = serverSock.accept()
    print(str(addr), '에서 접속이 확인되었습니다.')

    for i in range(0, 12):
        #load text file
        while True:
            if os.path.isfile(start_txt):
                break
        start_time = time.time()

        while (time.time() - start_time < 25):
            pass

        while True:
            if os.path.isfile(eegData_txt) & os.path.isfile(stims_txt):
                processing_time = time.time()
                os.remove(start_txt)
                eegData = np.loadtxt(eegData_txt, delimiter=",")
                stims = np.loadtxt(stims_txt, delimiter=",")
                ctime = datetime.today().strftime("%m%d_%H%M%S")
                moveData_e = moveData_eeg + ctime + 'eegData.out'
                moveData_s = moveData_stims + ctime + 'stims.out'
                shutil.move(eegData_txt, moveData_e)
                shutil.move(stims_txt, moveData_s)
                break

        print("got process")
        channelNum = 7
        samplingFreq = 300
        buttonNum = 7

        ### Preprocessing process

        #Bandpass Filter
        eegData = butter_bandpass_filter(eegData, 0.1, 30, samplingFreq, 4)

        #Epoching
        epochSampleNum = int(np.floor(1.0 * samplingFreq))
        offset = int(np.floor(0.0 * samplingFreq))
        baseline = int(np.floor(1.0 * samplingFreq))

        Epochs_Aver = np.zeros((buttonNum, channelNum, epochSampleNum))

        resampleRate = 100
        featureNum = channelNum * resampleRate

        Epochs_final = np.zeros((buttonNum, channelNum, resampleRate))

        for i in range(buttonNum):
            Epochs_Aver[i] = Epoching(eegData, stims, (i + 1), samplingFreq,
                                      channelNum, epochSampleNum, offset,
                                      baseline)
            Epochs_final[i] = resampling(Epochs_Aver[i], resampleRate,
                                         channelNum)

        Features = Convert_to_FeatureVector(Epochs_Aver, buttonNum, featureNum)

        Answers = lda.decision_function(Features)
        answer = np.argmax(Answers) + 1

        #            np.savetxt(result_txt, answer)
        print("Process time: ", time.time() - processing_time)
        print("Result: ", answer)
        connectionSock.send(str(answer).encode("utf-8"))
Exemple #17
0
# This call to fit using data is what really does the learning
wc.fit(breast.data, breast.target)

#%%
print('')
print('')
print('And test de classifier')
# and the call to predict is what gives the outputs as labels (as in the target field)
pred = wc.predict(breast.data)
print('')
print('Prediction:\n{}'.format(pred[some]))

# but there are other ways to obtain other outputs. In particular, a
# continuous output can be obtained using decision_function
# But in general you need to have a look at the documentation for each predictor
scores = wc.decision_function(breast.data)
print('')
print('Scores (distances to the classifier)\n{}'.format(scores[some]))

# in some cases, probabilities for each class can also be obtained
probs = wc.predict_proba(breast.data)
print('')
print('Probabilities of being in each class:\n{}'.format(probs[some]))

#%%

# Accuracy
# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
# Overall accuracy: ratio of properly classified samples
Acc = metrics.accuracy_score(breast.target, pred)
print('Overall accuracy is {}'.format(Acc))
Exemple #18
0
    y = esmFeatures[e][~np.isnan(esmFeatures[e])].values
    y = y == 3
    if np.sum(y) / len(y) < 0.2 or np.sum(y) / len(y) > 0.8:
        continue
    print(np.sum(y) / len(y))
    kf = KFold(n_splits=folds)
    weights = np.zeros((folds, x.shape[1]))
    scores = np.zeros((y.shape[0], 2))
    aucs = []
    for fold, (train, test) in enumerate(kf.split(x)):
        est.fit(x[train, :], y[train])
        #scores[test,:] = est.predict_proba(x[test,:])
        if hasattr(est, "predict_proba"):
            prob_pos = est.predict_proba(x[test, :])  #[:, 0]
        else:  # use decision function
            prob_pos = est.decision_function(x[test, :])
            #prob_pos = \
            #    (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
        scores[test, :] = prob_pos
        #importances[fold,:] = est.feature_importances_
        weights[fold, :] = est.coef_
        aucs.append(roc_auc_score(y[test] == 1, scores[test, 0]))
    #scores = cross_val_score(est,x,y,cv=KFold(10))
    auc = roc_auc_score(y == 1, scores[:, 0])
    (fpr, tpr, treshs) = roc_curve(y == 1, scores[:, 0])
    print('Participant %s has auc %f' % (sub, auc))
    print(np.mean(aucs), np.std(aucs))

    # Figure
    sns.set_context('paper')
    fig, ax = plt.subplots(figsize=(4, 4))
Exemple #19
0
class LDA(object):
    def __init__(self,
                 solver="svd",
                 shrinkage=None,
                 priors=None,
                 n_components=None,
                 store_covariance=False,
                 tol=1e-4):
        """
        :param solver: string, 可选项,"svd","lsqr", "eigen"。 默认使用svd, 不计算协方差矩阵,适用于大量特征
        的数据, 最小二乘 lsqr, 结合shrinkage 使用。 eigen 特征值分解, 集合shrinkage  使用
        :param shrinkage: str/float 可选项,概率值,默认为None, "auto", 自动收缩, 0到1内的float, 固定的收缩参数
        :param priors: array, optional, shape (n_classes,) 分类优先
        :param n_components:  # 分量数, 默认None, int, 可选项
        :param store_covariance:  bool, 可选项, 只用于”svd“ 额外计算分类协方差矩阵
        :param tol: 浮点型,默认1e-4, 在svd 中,用于排序评估的阈值
        """
        self.model = LinearDiscriminantAnalysis(
            solver=solver,
            shrinkage=shrinkage,
            priors=priors,
            n_components=n_components,
            store_covariance=store_covariance,
            tol=tol)

    def fit(self, x, y):
        self.model.fit(X=x, y=y)

    def transform(self, x):
        return self.model.transform(X=x)

    def fit_transform(self, x, y):
        return self.model.fit_transform(X=x, y=y)

    def get_params(self, deep=True):
        return self.model.get_params(deep=deep)

    def set_params(self, **params):
        self.model.set_params(**params)

    def decision_function(self, x):
        self.model.decision_function(X=x)

    def predict(self, x):
        return self.model.predict(X=x)

    def predict_log_proba(self, x):
        return self.model.predict_log_proba(X=x)

    def predict_proba(self, x):
        return self.model.predict_proba(X=x)

    def score(self, x, y, sample_weight):
        return self.model.score(X=x, y=y, sample_weight=sample_weight)

    def get_attributes(self):  # 生成模型之后才能获取相关属性值
        coef = self.model.coef_  # 权重向量,
        intercept = self.model.intercept_  # 截距项
        covariance = self.model.covariance_  # 协方差矩阵
        explained_variance_ratio = self.model.explained_variance_ratio_
        means = self.model.means_
        priors = self.model.priors_  # 分类等级, 求和为1 shape (n_classes)
        scalings = self.model.scalings_  # shape(rank,n_classes-1). 缩放
        xbar = self.model.xbar_  # 所有的均值
        classes = self.model.classes_  # 分类标签

        return coef, intercept, covariance, explained_variance_ratio, means, priors, scalings, xbar, classes
Exemple #20
0
    def compute(
            self,
            sample_df: pd.DataFrame,
            data_column: str,
            stimulus_type: str,
            method: str,
            method_kwargs: dict = None,
            use_scaler: bool = False,
            scaler_method: str = None,

    ):
        self.params = {
            'data_column': data_column,
            'stimulus_type': stimulus_type,
            'method': method,
            'method_kwargs': method_kwargs,
            'use_scaler': use_scaler,
            'scaler_method': scaler_method,
        }

        sample_df = sample_df.reset_index(drop=True)

        # Stimulus mapping dataframe
        stim_dataframe = sample_df.iloc[0]['stim_maps'][0][0][stimulus_type]

        # create an array where each frame is labelled with the stimulus name
        self.labels = np.empty(
            shape=(sample_df[data_column].iloc[0].shape),  # shape is (n_frames,)
            dtype=f'<U{stim_dataframe.name.apply(len).max()}'  # just unicode with length of longest stimulus name str
        )

        # fill the array so each frame is labelled with the stimulus name for that frame
        for i, s in stim_dataframe.sort_values(by=['start'], ascending=True).iterrows():
            self.labels[int(s['start']):int(s['end'])] = s['name']

        self.input_data = np.vstack(sample_df[data_column].values).T

        if use_scaler:
            scaler = getattr(preprocessing, scaler_method)()
            X = scaler.fit_transform(self.input_data)
        else:
            X = self.input_data

        if method == 'PCA':
            self.pca = PCA(**method_kwargs)
            self.low_dim_data = self.pca.fit_transform(X)

        elif method == 'LDA':
            if 'store_covariance' in self.params['method_kwargs'].keys():
                store_covariance = self.params['method_kwargs'].pop('method_kwargs')

            elif 'solver' in self.params['method_kwargs'].keys():
                store_covariance = True if self.params['method_kwargs']['solver'] not in ['lsqr', 'eigen'] else False

            else:
                store_covariance=False

            lda = LinearDiscriminantAnalysis(**method_kwargs, store_covariance=store_covariance)
            self.low_dim_data = lda.fit_transform(X, self.labels)

            self.lda_means = lda.means_
            if hasattr(lda, 'covariance_'):
                self.lda_covariance = lda.covariance_
            self.lda_decision_function = lda.decision_function(X)

        return self
        print "The misclassified item:", i

Zx = [[5, 5, 5, 5],
      [3, 3, 3, 3]]  # This is the item that I have made up with 4 features
Z = np.array(Zx)  # I have changed it as a numpy array
print Z
print lda.predict_log_proba(
    Z
)  # This function returns posterior log-probabilities of classification according to each class on an array of test vectors X.
print lda.predict_proba(
    Z
)  # This function returns posterior probabilities of classification according to each class on an array of test vectors X.
print lda.predict(
    Z)  # This function does classification on an array of test vectors X.
print lda.decision_function(
    Z
)  # This function returns the decision function values related to each class on an array of test vectors X.

print confusion_matrix(pred, y)  #
# print fit.score(X, y)  # 96% of accuracy
print accuracy_score(
    y, pred
)  # the use of another function for calculating the accuracy (correct_predictions / all_predictions)
# print accuracy_score(y, pred, normalize=False)  # the number of correct predictions

colors = ['navy', 'turquoise', 'darkorange']
lw = 2
plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_lda[y == i, 0],
                X_lda[y == i, 1],
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
lda.fit(X, y)

# Quadratic Discriminant Analysis
qda = QuadraticDiscriminantAnalysis(store_covariance=True)
qda.fit(X, y)

# class 0 and 1 : areas
nx, ny = 200, 100
x_min, x_max = plt.xlim()
y_min, y_max = plt.ylim()
xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                     np.linspace(y_min, y_max, ny))

Z_LDA = lda.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z_LDA.shape = xx.shape
ax = plt.subplot(2, 2, 2)

ax.contourf(xx, yy, Z_LDA, cmap=cm_bright)
ax.scatter(X0[:, 0], X0[:, 1], marker='.', color='red')
ax.scatter(X1[:, 0], X1[:, 1], marker='.', color='blue')
plt.title('LDA')

Z_QDA = qda.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z_QDA.shape = xx.shape
ax = plt.subplot(2, 2, 3)

ax.contourf(xx, yy, Z_QDA, cmap=cm_bright)
ax.scatter(X0[:, 0], X0[:, 1], marker='.', color='red')
ax.scatter(X1[:, 0], X1[:, 1], marker='.', color='blue')
    print('LDA prediction')

    # test accuracy on the test set
    train_set_df = pd.DataFrame(train_y_values)
    train_set_df['predicted'] = clf.predict(train_x_data.values)
    train_set_df[
        'wrong'] = train_set_df['predicted'] != train_set_df['age_group']
    stats['lda']['train']['wrong'].append(train_set_df['wrong'].sum())
    stats['lda']['train']['size'].append(train_set_df['wrong'].size)
    stats['lda']['train']['score'].append(
        clf.score(train_x_data, train_y_values))
    # coefficent_df['svm']['coef'].append(clf.coef_)
    # coefficent_df['svm']['class'].append(clf.classes_)

    false_positive_rate, true_positive_rate, thresholds = roc_curve(
        train_y_values.values, clf.decision_function(train_x_data))
    roc_auc = auc(false_positive_rate, true_positive_rate)
    stats['lda']['train']['roc_auc'].append(roc_auc)
    plt.subplot(2, 1, 1)
    plt.plot(false_positive_rate,
             true_positive_rate,
             'b',
             label='Train AUC = %0.2f' % roc_auc)
    plt.title('Train set')
    print(
        'LDA-Train - Total wrong predictions : {}, out of: {}, accuracy: {}, auc: {}'
        .format(train_set_df['wrong'].sum(), train_set_df['wrong'].size,
                clf.score(train_x_data, train_y_values), roc_auc))
    # test accuracy on the test set
    test_set_df = pd.DataFrame(test_y_values)
    test_set_df['predicted'] = clf.predict(test_x_data.values)
print "done generating"

X = np.array(arr1)
y = np.array(arr2)
del arr1
del arr2
print "done deleting arr1 arr2"
start = timer()
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
end = timer()

del X
del y

print "Time it took to train:"
print(end - start)
print "Time it took to Predict:"
test.append(randomarr())
test.append(randomarr())
test.append(randomarr())
test.append(randomarr())
start = timer()
print "class prediction";
print(clf.predict(test))
end = timer()
print(end - start)
print "decision function value"
#print test;
print (clf.decision_function(test))
Exemple #25
0
def lda_classify_datasets_demo(datasets):
    ''' Show LDA results on several datasets
    '''
    figure = plt.figure(figsize=(8, 6))
    i = 1

    for ds in datasets:
        mesh_step = 0.2
        x, y = ds  # get dataset points

        # Split current dataset into training and test sets
        x = StandardScaler().fit_transform(
            x)  # set mean=0 and set var=1 for input values
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.4)

        x_min, x_max = x_test[:, 0].min() - .5, x_test[:, 0].max() + .5
        y_min, y_max = x_test[:, 1].min() - .5, x_test[:, 1].max() + .5
        xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step),
                             np.arange(y_min, y_max, mesh_step))

        # On top, show dataset only
        cm = plt.cm.RdBu
        cm_bright = ListedColormap(['#FF0000', '#0000FF'])

        ax = plt.subplot(len(datasets), 2, i)
        ax.scatter(x_test[:, 0],
                   x_test[:, 1],
                   c=y_test,
                   cmap=cm_bright,
                   alpha=0.6)
        ax.set_xticks(())
        ax.set_yticks(())

        # Below that, show results of classifer on dataset
        classifier = LDA()
        classifier.fit(x_train, y_train)

        if hasattr(classifier, "decision_function"):
            Z = classifier.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = classifier.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # Put the result into a color plot
        i += 1
        ax = plt.subplot(len(datasets), 2, i)

        Z = Z.reshape(xx.shape)

        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
        ax.scatter(x_test[:, 0],
                   x_test[:, 1],
                   c=y_test,
                   cmap=cm_bright,
                   alpha=0.6)
        ax.set_xticks(())
        ax.set_yticks(())

        i += 1

    # Plot datasets
    plt.show()
                eyeglass_idx = i

    print(pant_val)
    print(eyeglass_val)

    return pant_idx, eyeglass_idx


def plot_image(img):

    plt.imshow(img.reshape(row, col), cmap='gray')
    plt.show()

    return


# Find and plot the most incorrectly classified object in test set

test_pred_val = lda.decision_function(X_pca_test)

pant_idx, eyeglass_idx = find_most_incorrectly_classified(
    test_pred_val, test_label_2c)

print(test_label_2c[pant_idx])

print(test_label_2c[eyeglass_idx])

plot_image(x_test[pant_idx])

plot_image(x_test[eyeglass_idx])
Exemple #27
0
class FBCSP(object):
	def __init__(self,
		     sample_rate,
		     feat_sel_proportion=0.8,
		     low_cut_hz = 4,
		     high_cut_hz = 36,
		     step = 4,
		     csp_components = 4
		    ):

		self.low_cut_hz = low_cut_hz
		self.high_cut_hz = high_cut_hz
		self.step = step
		self.sample_rate = sample_rate
		self.csp_component = csp_components
		self.feat_proportion = feat_sel_proportion
		self.csp_bank = dict()
		self.low = dict()
		self.high = dict()
		self.n_bank = (self.high_cut_hz - self.low_cut_hz)//self.step
		self.n_feat = int(self.n_bank*self.csp_component*self.feat_proportion)

		for i in range(self.n_bank):
			self.low[i]  = self.low_cut_hz+i*self.step
			self.high[i] = self.low_cut_hz+i*self.step+self.step
			if (self.high_cut_hz - self.high[i]) < self.step:
				self.high[i] = self.high_cut_hz

				
	def fit(self, data, label):
		data_bank = dict()
		for i in range(self.n_bank):
			# get each freq filter bank 
			data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate)
			# extract csp feature for each bank 
			self.csp_bank[i] = CSP(n_components=self.csp_component, reg=None, log=True, norm_trace=False)
			self.csp_bank[i].fit(data_bank[i], label)


	def transform(self, data):
		data_bank = dict()
		csp_feat = dict()
		for i in range(self.n_bank):
			# get each freq filter bank 
			data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate)
			# extract csp feature for each bank 
			csp_feat[i] = self.csp_bank[i].transform(data_bank[i])
			try:
				feature
			except NameError:
				feature = csp_feat[i]
			else:
				feature = np.hstack([feature, csp_feat[i]])
		return feature
	
	
	def fit_transform(self, data, label):
		data_bank = dict()
		csp_feat = dict()
		for i in range(self.n_bank):
			# get each freq filter bank 
			data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate)
			# extract csp feature for each bank 
			self.csp_bank[i] = CSP(n_components=4, reg=None, log=True, norm_trace=False)
			self.csp_bank[i].fit(data_bank[i], label)
			csp_feat[i] = self.csp_bank[i].transform(data_bank[i])
			try:
				feature
			except NameError:
				feature = csp_feat[i]
			else:
				feature = np.hstack([feature, csp_feat[i]])
		return feature


	def bank_filter(self, data, low_cut_hz, high_cut_hz, sample_rate):
		n_trial		= data.shape[0]
		n_channel	= data.shape[1]
		n_length	= data.shape[2]
		data_bank	= []
		for i in range(n_trial):
			data_bank += [np.array([butter_bandpass_filter(data[i, j, :], low_cut_hz, high_cut_hz, sample_rate, pass_type = 'band', order=6) 
							for j in range(n_channel)])]
		return np.array(data_bank)


	def classifier_fit(self, feature, label):
		# feature selection
		self.MI_sel = SelectPercentile(mutual_info_classif, percentile=self.feat_proportion*100)
		self.MI_sel.fit(feature, label)
		new_feat = self.MI_sel.transform(feature)
		# classification
		self.clf = LinearDiscriminantAnalysis()
		self.clf.fit(new_feat, label)


	def classifier_transform(self, feature):
		# feature selection
		new_feat = self.MI_sel.transform(feature)
		# classification
		return self.clf.transform(new_feat)


	def evaluation(self, feature, label):
		# feature selection
		new_feat = self.MI_sel.transform(feature)
		# accuracy
		accuracy = self.clf.score(new_feat, label)
		# f1
		f1 = dict()
		pred = self.clf.predict(new_feat)
		f1["micro"] = f1_score(y_true = label, y_pred = pred, average='micro')
		f1["macro"] = f1_score(y_true = label, y_pred = pred, average='macro')
		# auc
		pred_posi = self.clf.decision_function(new_feat)
		lb = LabelBinarizer()
		test_y = lb.fit_transform(label)
		roc_auc = self.multiclass_roc_auc_score(y_true = test_y, y_score = pred_posi)
		return accuracy, f1, roc_auc


	def multiclass_roc_auc_score(self, y_true, y_score):
		assert y_true.shape == y_score.shape
		fpr = dict()
		tpr = dict()
		roc_auc = dict()
		n_classes = y_true.shape[1]
		# compute ROC curve and ROC area for each class
		for i in range(n_classes):
			fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_score[:, i])
			roc_auc[i] = auc(fpr[i], tpr[i])
		# compute micro-average ROC curve and ROC area
		fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
		roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
	
		# compute macro-average ROC curve and ROC area
		# First aggregate all false positive rates
		all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
		# Then interpolate all ROC curves at this points
		mean_tpr = np.zeros_like(all_fpr)
		for i in range(n_classes):
		    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
		# Finally average it and compute AUC
		mean_tpr /= n_classes
		fpr["macro"] = all_fpr
		tpr["macro"] = mean_tpr
		roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
		return roc_auc
                        delimiter=',')  # Load in spectra - skips first line
metaSpecVal = spectraVal[:, 0:5]  # save first 5 columns of spectra separately
spectraVal = np.delete(spectraVal, [0, 1, 2, 3, 4],
                       1)  # remove the 5 columns of metadata in spectra
spectraVal = spectraVal.astype(
    np.double)  # convert from string array to double array
spectraVal = np.nan_to_num(
    spectraVal)  # there are values that are not finite, change them to zero
spectraVal = np.delete(spectraVal,
                       np.where(bbl == 0)[0], 1)  # remove bad bands

# Develop canonical discriminant variables
clf = LinearDiscriminantAnalysis(
)  # http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html#sklearn.discriminant_analysis.LinearDiscriminantAnalysis
clf.fit(spectraCal, metaCal[:, 15].astype(np.int))
cdaDecision = clf.decision_function(spectraVal)
cdaScore = clf.scalings_
cdaPredict = clf.predict(spectraVal)
calCDA = clf.transform(spectraCal)
valCDA = clf.transform(spectraVal)

# Calculate results from CDA development
regr = linear_model.LinearRegression()
x = metaVal[:, 15].astype(np.int).reshape(len(metaVal[:, 15]), 1)
y = cdaPredict.reshape(len(cdaPredict), 1)
linearResults = regr.fit(
    x, y
)  # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
plt.scatter(metaVal[:, 15].astype(np.int), cdaPredict)
plt.plot(metaVal[:, 15].astype(np.int), regr.predict(x))
plt.ylabel('Predicted')
# train the model
clf.fit(x_train, y_train)

# looking at the attributes
coef = clf.coef_
intercept = clf.intercept_
#covariance_mat = clf.covariance_ # gives the covariance matrix, does not work for the solver 'svd'
perc_vari = clf.explained_variance_ratio_
means = clf.means_
priors = clf.priors_
scalings = clf.scalings_
overall_mean = clf.xbar_
classes = clf.classes_

# looking at the methods
decison_function = clf.decision_function(x_test)
fit_transform = clf.fit_transform(x_test, y_test)
get_params = clf.get_params()
prediction = clf.predict(x_test)
predict_log_proba = clf.predict_log_proba(x_test)
predict_proba = clf.predict_proba(x_test)
mean_accuracy_train = clf.score(x_train, y_train)
mean_accuracy_test = clf.score(x_test, y_test)
transform = clf.transform(x_test)

print(
    'The mean accuracy of the train dataset is: %.3f and the mean accuracy of the test dataset is: %.3f'
    % (mean_accuracy_train, mean_accuracy_test))

pdb.set_trace()