def evaluate_subject_models(data, labels, modelpath, subject):
    """
    Trains and evaluates EEgNet for a given subject in the P300 Speller database
    using repeated stratified K-fold cross validation.
    """
    n_sub = data.shape[0]
    n_ex_sub = data.shape[1]
    n_samples = data.shape[2]
    n_channels = data.shape[3]
    aucs = np.zeros(5 * 10)
    print("Training for subject {0}: ".format(subject))
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=123)
    for k, (t, v) in enumerate(cv.split(data[subject], labels[subject])):
        X_train, y_train, X_test, y_test = data[subject, t, :, :], labels[
            subject, t], data[subject, v, :, :], labels[subject, v]
        X_train, X_valid, y_train, y_valid = train_test_split(X_train,
                                                              y_train,
                                                              test_size=0.2,
                                                              shuffle=True,
                                                              random_state=456)
        print(
            'Partition {0}: X_train = {1}, X_valid = {2}, X_test = {3}'.format(
                k, X_train.shape, X_valid.shape, X_test.shape))

        # channel-wise feature standarization
        sc = EEGChannelScaler(n_channels=n_channels)
        X_train = np.swapaxes(
            sc.fit_transform(X_train)[:, np.newaxis, :], 2, 3)
        X_valid = np.swapaxes(sc.transform(X_valid)[:, np.newaxis, :], 2, 3)
        X_test = np.swapaxes(sc.transform(X_test)[:, np.newaxis, :], 2, 3)

        model = EEGNet(2, Chans=n_channels, Samples=n_samples)
        print(model.summary())
        model.compile(optimizer='adam', loss='categorical_crossentropy')

        # Early stopping setting also follows EEGNet (Lawhern et al., 2018)
        es = EarlyStopping(monitor='val_loss',
                           mode='min',
                           patience=50,
                           restore_best_weights=True)
        history = model.fit(X_train,
                            to_categorical(y_train),
                            batch_size=256,
                            epochs=200,
                            validation_data=(X_valid, to_categorical(y_valid)),
                            callbacks=[es])

        proba_test = model.predict(X_test)
        aucs[k] = roc_auc_score(y_test, proba_test[:, 1])
        print('S{0}, P{1} -- AUC: {2}'.format(subject, k, aucs[k]))
        K.clear_session()

    np.savetxt(modelpath + '/s' + str(subject) + '_aucs.npy', aucs)
Esempio n. 2
0
def evaluate_subject_model(X_train, y_train, X_valid, y_valid, X_test, y_test,
                           timepath):
    print('X_train = {0}, X_valid = {1}, X_test = {2}'.format(
        X_train.shape, X_valid.shape, X_test.shape))

    n_samples = X_train.shape[1]
    n_channels = X_train.shape[2]

    sc = EEGChannelScaler(n_channels=n_channels)
    X_train = np.swapaxes(sc.fit_transform(X_train)[:, np.newaxis, :], 2, 3)
    X_valid = np.swapaxes(sc.transform(X_valid)[:, np.newaxis, :], 2, 3)
    X_test = np.swapaxes(sc.transform(X_test)[:, np.newaxis, :], 2, 3)

    model = EEGNet(2, Chans=n_channels, Samples=n_samples)
    model.compile(optimizer='adam', loss='categorical_crossentropy')

    tt = TrainTime()
    history = model.fit(X_train,
                        to_categorical(y_train),
                        batch_size=256,
                        epochs=10,
                        validation_data=(X_valid, to_categorical(y_valid)),
                        callbacks=[tt])

    start_test = time.time()
    proba_test = model.predict(X_test)
    test_time = time.time() - start_test

    train_size = X_train.shape[0]
    valid_size = X_valid.shape[0]
    test_size = X_test.shape[0]

    times = [[
        np.mean(tt.times),
        np.sum(tt.times), 10, train_size, valid_size, test_time, test_size,
        test_time / test_size
    ]]
    df = pd.DataFrame(times,
                      columns=[
                          'Mean Epoch Time', 'Total Train Time', 'Epochs',
                          'Train Size', 'Valid Size', 'Test Time', 'Test Size',
                          'Test per example'
                      ])
    df.to_csv(timepath + 'EEGNet_times.csv', encoding='utf-8')
# Chans, Samples  : number of channels and time points in the EEG data
# configure the EEGNet-8,2,16 model with kernel length of 32 samples (other
# model configurations may do better, but this is a good starting point)
model = EEGNet(nb_classes=2,
               Chans=chans,
               Samples=samples,
               dropoutRate=0.5,
               kernLength=256,
               F1=4,
               D=2,
               F2=8,
               dropoutType='Dropout')

# compile the model and set the optimizers
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()
# count number of parameters in the model
numParams = model.count_params()

# set a valid path for your system to record model checkpoints
checkpointer = ModelCheckpoint(
    filepath='/content/gdrive/MyDrive/checkpoint.h5',
    verbose=1,
    save_best_only=True)

###############################################################################
# if the classification task was imbalanced (significantly more trials in one
# class versus the others) you can assign a weight to each class during
# optimization to balance it out. This data is approximately balanced so we
def evaluate_cross_subject_model(data, labels, modelpath):
    """
    Trains and evaluates EEGNet for each subject in the P300 Speller database
    using random cross validation.
    """
    n_sub = data.shape[0]
    n_ex_sub = data.shape[1]
    n_samples = data.shape[2]
    n_channels = data.shape[3]

    aucs = np.zeros(n_sub)

    data = data.reshape((n_sub * n_ex_sub, n_samples, n_channels))
    labels = labels.reshape((n_sub * n_ex_sub))
    groups = np.array([i for i in range(n_sub) for j in range(n_ex_sub)])

    cv = LeaveOneGroupOut()
    for k, (t, v) in enumerate(cv.split(data, labels, groups)):
        X_train, y_train, X_test, y_test = data[t], labels[t], data[v], labels[
            v]

        rg = np.random.choice(t, 1)
        sv = groups[t] == groups[rg]
        st = np.logical_not(sv)
        X_train, y_train, X_valid, y_valid = data[t][st], labels[t][st], data[
            t][sv], labels[t][sv]
        print("Partition {0}: train = {1}, valid = {2}, test = {3}".format(
            k, X_train.shape, X_valid.shape, X_test.shape))
        print("Groups train = {0}, valid = {1}, test = {2}".format(
            np.unique(groups[t][st]), np.unique(groups[t][sv]),
            np.unique(groups[v])))

        # channel-wise feature standarization
        sc = EEGChannelScaler(n_channels=n_channels)
        X_train = np.swapaxes(
            sc.fit_transform(X_train)[:, np.newaxis, :], 2, 3)
        X_valid = np.swapaxes(sc.transform(X_valid)[:, np.newaxis, :], 2, 3)
        X_test = np.swapaxes(sc.transform(X_test)[:, np.newaxis, :], 2, 3)

        model = EEGNet(2,
                       dropoutRate=0.25,
                       Chans=n_channels,
                       Samples=n_samples)
        print(model.summary())
        model.compile(optimizer='adam', loss='categorical_crossentropy')

        es = EarlyStopping(monitor='val_loss',
                           mode='min',
                           patience=50,
                           restore_best_weights=True)
        model.fit(X_train,
                  to_categorical(y_train),
                  batch_size=256,
                  epochs=200,
                  validation_data=(X_valid, to_categorical(y_valid)),
                  callbacks=[es])

        proba_test = model.predict(X_test)
        aucs[k] = roc_auc_score(y_test, proba_test[:, 1])
        print('P{0} -- AUC: {1}'.format(k, aucs[k]))
        K.clear_session()

    np.savetxt(modelpath + '/aucs.npy', aucs)
Esempio n. 5
0
     
 # convert data to NHWC (trials, channels, samples, kernels) format. Data 
 # contains 64 channels and 1537 time-points. Set the number of kernels to 1.
 X_train      = X_train.reshape(X_train.shape[0], chans, samples, kernels)
 X_validate   = X_validate.reshape(X_validate.shape[0], chans, samples, kernels)
 X_test       = X_test.reshape(X_test.shape[0], chans, samples, kernels)
         
 #%%    
 # configure the EEGNet-8,2,16 model with kernel length of 32 samples (other 
 # model configurations may do better, but this is a good starting point)
 model = EEGNet(nb_classes = 2, Chans = chans, Samples = samples, 
                dropoutRate = 0.5, kernLength = 32, F1 = 8, D = 2, F2 = 16, 
                dropoutType = 'Dropout')
 #%%
 # compile the model and set the optimizers.
 model.compile(loss='binary_crossentropy', optimizer='adam', 
                   metrics = ['accuracy'])
     
 # count number of parameters in the model
 numParams    = model.count_params()    
     
 # set a valid path for your system to record model checkpoints
 checkpointer = ModelCheckpoint(filepath='C:/Users/PUBLIC.DESKTOP-8KLP27O/Desktop/SSSEP/SSSEP_data/tmp/checkpoint.h5',
                                verbose=1, save_best_only=True)
     
 ##########################################################################
 # if the classification task was imbalanced (significantly more trials in one
 # class versus the others) you can assign a weight to each class during 
 # optimization to balance it out. This data is approximately balanced so we 
 # don't need to do this, but is shown here for illustration/completeness. 
 ##########################################################################
 # the syntax is {class_1:weight_1, class_2:weight_2,...}. Here just setting
Esempio n. 6
0
    def trainAndPredict(
        self,
        epochs=300,
        batchSize=1000,
        class_weights=None,
        F1=8,
        D=2,
        kernLength=None,
        dropoutRate=0.5,
        learningRate=0.001,
    ):
        if class_weights is None:
            class_weights = getClassWeights(self.y_train)
        if kernLength is None:
            kernLength = int(self.samples / 2)
        # class_weights = {1:1, 0:1}
        # class_weights = {0:22, 1:1}

        # configure the EEGNet-8,2,16 model with kernel length of 32 samples (other
        # model configurations may do better, but this is a good starting point)

        F2 = F1 * D

        print('F1 (temporal filters)', F1)
        print('D (spatial filters', D)
        print('F2 (pointwise filters', F2)
        print('kernLength', kernLength)
        print('learningRate', learningRate)
        print('class_weights', class_weights)
        print('epochs', epochs)
        print('batchSize', batchSize)

        model = EEGNet(nb_classes=getNumClasses(),
                       Chans=self.chans,
                       Samples=self.samples,
                       dropoutRate=dropoutRate,
                       kernLength=kernLength,
                       F1=F1,
                       D=D,
                       F2=F2,
                       dropoutType='Dropout')

        # model = DeepConvNet(nb_classes=getNumClasses(), Chans=self.chans, Samples=self.samples, dropoutRate=dropoutRate)

        # model = EEGNet_old(nb_classes = getNumClasses(), Chans = self.chans, Samples = self.samples,
        #     dropoutRate = dropoutRate)

        optimizer = Adam(lr=learningRate)

        metrics = ['accuracy']

        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics=metrics)

        # set a valid path for your system to record model checkpoints
        checkpointer = ModelCheckpoint(filepath='/tmp/checkpoint.h5',
                                       verbose=1,
                                       save_best_only=True)

        class OnEpochEndCallback(Callback):
            def on_epoch_end(self, epoch, logs=None):
                x_test = self.validation_data[0]
                y_test = self.validation_data[1]
                # x_test, y_test = self.validation_data
                predictions = self.model.predict(x_test)
                y_test = np.argmax(y_test, axis=-1)
                predictions = np.argmax(predictions, axis=-1)
                c = confusion_matrix(y_test, predictions)

                roc_auc = roc_auc_score(y_test, predictions)

                print('Confusion matrix:\n', c)
                print('sensitivity', c[0, 0] / (c[0, 1] + c[0, 0]))
                print('specificity', c[1, 1] / (c[1, 1] + c[1, 0]))
                print('roc_auc_score', roc_auc)

        model.fit(self.X_train,
                  self.Y_train,
                  batch_size=batchSize,
                  epochs=epochs,
                  verbose=2,
                  validation_data=(self.X_validate, self.Y_validate),
                  callbacks=[checkpointer, OnEpochEndCallback()],
                  class_weight=class_weights)

        probs = model.predict(self.X_test)
        preds = probs.argmax(axis=-1)
        acc = np.mean(preds == self.Y_test.argmax(axis=-1))
        print("Classification accuracy: %f " % (acc))

        if getNumClasses() == 2:
            roc_auc = roc_auc_score(self.y_test, preds)

            print('roc_auc_score', roc_auc)

            probsConverted = probs[:, 1]
            fpr, tpr, thresholds = roc_curve(self.y_test, probsConverted)

            gmeans = np.sqrt(tpr * (1 - fpr))
            # locate the index of the largest g-mean
            ix = np.argmax(gmeans)
            print('Best Threshold=%f, G-Mean=%.3f' %
                  (thresholds[ix], gmeans[ix]))

            roc_auc = auc(fpr, tpr)
            plt.title('Receiver Operating Characteristic')
            plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
            plt.scatter(fpr[ix],
                        tpr[ix],
                        marker='o',
                        color='black',
                        label='Best')

            plt.legend(loc='lower right')
            plt.plot([0, 1], [0, 1], 'r--')
            plt.xlim([0, 1])
            plt.ylim([0, 1])
            plt.ylabel('True Positive Rate')
            plt.xlabel('False Positive Rate')
            plt.savefig('roc')

        print('confusion_matrix')
        print(confusion_matrix(self.y_test, preds))
        log(epochs, batchSize, self.samples, kernLength, dropoutRate,
            learningRate, roc_auc, acc, F1, D)