def PersonWorker(person): print('starting on person: ', str(person)) #data = 40 videos x 32 alpha(csp channel) X_train, y_train, X_test, y_test, csp = DL.loadPerson(person=person, featureFunc = featureFunc, use_csp=False, use_median = False ) C = 1 clf = LinearSVC(C=C,random_state=40) K_CV = KFold(n=len(X_train), n_folds=len(X_train), random_state=17, shuffle=False) #leave out one validation predictions, truths = [], [] for train_index, CV_index in K_CV: #train clf.fit(X_train[train_index], y_train[train_index]) #predict pred = clf.predict(X_train[CV_index]) #save for metric calculations predictions.extend(pred) truths.extend(y_train[CV_index]) #optimization metric: best_metric = UT.auc(predictions, truths) best_C = C #try other C values for C in [0.01,0.03,0.1,0.3,3,10]: clf = LinearSVC(C=C,random_state=40) K_CV = KFold(n=len(X_train), n_folds=len(X_train), random_state=17, shuffle=True) #leave out one validation predictions, truths = [], [] for train_index, CV_index in K_CV: #train clf.fit(X_train[train_index], y_train[train_index]) #predict pred = clf.predict(X_train[CV_index]) #save for metric calculations predictions.extend(pred) truths.extend(y_train[CV_index]) #optimization metric: metric = UT.auc(predictions, truths) if metric > best_metric: best_metric = metric best_C = C #C param is now optimized, its value is stored in best_C #calculate all performance metrics on testset, using the optimal classifier clf = LinearSVC(C=C,random_state=40) clf.fit(X_train,y_train) #fit all training data #print("coef ", clf.coef_) predictions = clf.predict(X_test) acc = UT.accuracy(predictions, y_test) (tpr,tnr,fpr,fnr) = UT.tprtnrfprfnr(predictions, y_test) auc = UT.auc(predictions, y_test) print('person: ', person, ' - acc: ', str(acc), ' - tpr: ' , str(tpr), ' - tnr: ' , str(tnr), ' - auc: ', str(auc) ) return [acc,tpr,tnr,fpr,fnr,auc]
def PersonWorker(person): max_k = 4#len(X_train[0]) #load data X_train, y_train, X_test, y_test = loadPerson( person = person, classFunc = valClassFunc, featureFunc = featureFunc, plots = False ) #init academic loop to optimize k param k = 1 anova_filter = SelectKBest(f_regression) lda = LinearDiscriminantAnalysis() anova_lda = Pipeline([ ('anova', anova_filter), ('lda', lda) ]) anova_lda.set_params(anova__k=k) K_CV = KFold(n=len(X_train), n_folds=len(X_train), random_state=17, #fixed randomseed ensure that the sets are always the same shuffle=False ) #leave out one validation predictions, truths = [], [] for train_index, CV_index in K_CV: #train index here is a part of the train set #train anova_lda.fit(X_train[train_index], y_train[train_index]) #predict pred = anova_lda.predict(X_train[CV_index]) #save for metric calculations predictions.extend(pred) truths.extend(y_train[CV_index]) #optimization metric: best_acc = UT.accuracy(predictions, truths) best_k = k #now try different k values for k in range(2,max_k): anova_filter = SelectKBest(f_regression) lda = LinearDiscriminantAnalysis() anova_lda = Pipeline([ ('anova', anova_filter), ('lda', lda) ]) #set k param anova_lda.set_params(anova__k=k) #leave one out validation to determine how good the k value performs K_CV = KFold(n=len(X_train), n_folds=len(X_train), random_state=17, #fixed randomseed ensure that the sets are always the same shuffle=False ) predictions, truths = [], [] for train_index, CV_index in K_CV: #train index here is a part of the train set #train anova_lda.fit(X_train[train_index], y_train[train_index]) #predict pred = anova_lda.predict(X_train[CV_index]) #save for metric calculations predictions.extend(pred) truths.extend(y_train[CV_index]) #optimization metric: curr_acc = UT.accuracy(predictions, truths) if curr_acc > best_acc: best_acc = curr_acc best_k = k #now the k param is optimized and stored in best_k #create classifier and train it on all train data anova_filter = SelectKBest(f_regression) lda = LinearDiscriminantAnalysis() anova_lda = Pipeline([ ('anova', anova_filter), ('lda', lda) ]) #set k param anova_lda.set_params(anova__k=best_k) anova_lda.fit(X_train, y_train) predictions = anova_lda.predict(X_test) acc = UT.accuracy(predictions, y_test) (tpr,tnr,fpr,fnr) = UT.tprtnrfprfnr(predictions, y_test) auc = UT.auc(predictions, y_test) print('person: ', person, ' - k: ' , str(best_k), ' - acc: ', str(acc), ' - tpr: ' , str(tpr), ' - tnr: ' , str(tnr), ' - auc: ', str(auc), 'used features', anova_lda.named_steps['anova'].get_support() ) retArr = [best_k, acc,tpr,tnr,fpr,fnr,auc] retArr.extend(anova_lda.named_steps['anova'].get_support()) ''' print('person: ', person, ' - k: ' , str(best_k), ' - acc: ', str(acc), 'used features', getUsedFeatures(anova_lda.named_steps['anova'].get_support()) #anova_lda.named_steps['anova'].get_support() ) classCorr = UT.classCorrect(predictions, y_test) dimCorr = UT.dimCorrect(predictions, y_test) returnArr = [best_k, acc ] returnArr.extend(classCorr) returnArr.extend(dimCorr) returnArr.extend(anova_lda.named_steps['anova'].get_support()) return returnArr ''' return retArr
def PersonWorker(person): print('starting on person: ', str(person)) #data = 40 videos x 32 alpha(csp channel) X_train, y_train, X_test, y_test, csp = DL.loadPerson(person=person, featureFunc = featureFunc, use_median=False, use_csp=True, prefilter=False ) #store weights of upper CSP channel for topoplots csp.write_filters() #optimize channelPairs with leave-one out validation #prior probabilities pos_prior = np.sum(y_train) neg_prior = 40 - pos_prior pos_prior /= float(40) neg_prior /= float(40) #academic loop start with 1 channelPair channelPairs = 1 #filter out the channel pairs X = np.zeros((len(X_train),channelPairs * 2,)) top_offset = channelPairs * 2 - 1 for j, k in zip(range(channelPairs), range(31,31-channelPairs,-1)): X[:,j] = X_train[:,j] X[:,top_offset -j] = X_train[:,k] #LDA lda = LinearDiscriminantAnalysis(priors=[neg_prior, pos_prior]) K_CV = KFold(n=len(X), n_folds=len(X), random_state=17, shuffle=False) #leave out one validation predictions, truths = [], [] for train_index, CV_index in K_CV: #train lda = lda.fit(X[train_index], y_train[train_index]) #predict pred = lda.predict(X[CV_index]) #save for metric calculations predictions.extend(pred) truths.extend(y_train[CV_index]) #optimization metric: best_metric = UT.accuracy(predictions, truths) best_channelPairs = channelPairs #try other channel pairs for channelPairs in range(2,17): #filter out the channel pairs X = np.zeros((len(X_train),channelPairs * 2,)) top_offset = channelPairs * 2 - 1 for j, k in zip(range(channelPairs), range(31,31-channelPairs,-1)): X[:,j] = X_train[:,j] X[:,top_offset -j] = X_train[:,k] #LDA lda = LinearDiscriminantAnalysis(priors=[neg_prior, pos_prior]) K_CV = KFold(n=len(X), n_folds=len(X), random_state=17, shuffle=True) #leave out one validation predictions, truths = [], [] for train_index, CV_index in K_CV: #train lda = lda.fit(X[train_index], y_train[train_index]) #predict pred = lda.predict(X[CV_index]) #save for metric calculations predictions.extend(pred) truths.extend(y_train[CV_index]) #optimization metric: metric = UT.accuracy(predictions, truths) if metric > best_metric: best_metric = metric best_channelPairs = channelPairs #channel pairs are now optimized, its value is stored in best_channelPairs #calculate all performance metrics on testset, using the optimal classifier lda = LinearDiscriminantAnalysis(priors=[neg_prior, pos_prior]) lda = lda.fit(X_train,y_train) #fit all training data predictions = lda.predict(X_test) acc = UT.accuracy(predictions, y_test) (tpr,tnr,fpr,fnr) = UT.tprtnrfprfnr(predictions, y_test) auc = UT.auc(predictions, y_test) print('person: ', person, ' - channelPairs: ', str(best_channelPairs), ' - acc: ', str(acc), ' - tpr: ' , str(tpr), ' - tnr: ' , str(tnr), ' - auc: ', str(auc) ) return [best_channelPairs, acc,tpr,tnr,fpr,fnr,auc]