def train_classifiers(data_files, valid_runs_dict_uiuc, valid_runs_dict_whasc): ''' Produces test data and tests whether projecting matrices into the tangent space finds the correct discriminative connection. Parameters: data_files (list of pairs (filename,data)): the input data valid_runs_dict_uiuc (dictionary): dictionary containing valid runs for each patient valid_runs_dict_whasc (dictionary): dictionary containing valid runs for each patient Returns: accDict (dictionary): mean accuracy on each file's data simDict (dictionary): mean cosine similarity of classifier coefficients for each file matDict (dictionary): mean confusion matrix for each file corrDict (dictionary): before and after projection correlations spearDict (dictionary): before and after projection spearman correlations ''' accDict = {} simDict = {} matDict = {} corrDict = {} spearDict = {} simArr = [] for fname, data in data_files: # get time series data to make covariance matrices X = np.array([sample['TimeSeries'] for sample in data['samples'] ]) # if data_selector(sample)]) y = np.array([ get_label_8(sample['Group'], sample['Location']) for sample in data['samples'] ]) # if data_selector(sample)]) # gsr seems to produce a rank deficient covariance matrix, so oas regularization is necessary covest = Covariances() ts = TangentSpace() #sym = to_symm_mat(0,33) #diag = to_upper_tri(1) svc = SVC(kernel='linear') clf_riem = make_pipeline(covest, ts, svc) rf = RandomForestClassifier(200) clf_rf = make_pipeline(covest, ts, rf) covest2 = Correlations() svc2 = SVC(kernel='linear') get_tri_inds = to_upper_tri(0) clf_cov = make_pipeline(covest2, get_tri_inds, svc2) #Check clustering #to_TS = make_pipeline(covest,ts) #X_in_TS = to_TS.transform(X) #kmeans = KMeans(n_clusters=4,random_state=0).fit(X_in_TS) # Monte Carlo, in theory should run this len(y)^2 times, but I need to save my poor computer's memory. accRiemList = [] accCovList = [] accRfList = [] coeffArr = [] matRiemList = [] corrArrBefore = [] corrArrAfter = [] spearArrBefore = [] spearArrAfter = [] rs = StratifiedShuffleSplit(n_splits=100, test_size=.3) for i, (train_inds, test_inds) in enumerate(rs.split(X, y)): X_train, X_test, y_train, y_test = X[train_inds], X[test_inds], y[ train_inds], y[test_inds] X_train_cov, X_test_cov, y_train_cov, y_test_cov = X_train.copy( ), X_test.copy(), y_train.copy(), y_test.copy() clf_riem.fit(X_train, y_train) clf_rf.fit(X_train, y_train) clf_cov.fit(X_train_cov, y_train_cov) #get riemann svm coefficients coeffArr.append(clf_riem[2].coef_) #compare correlation corr_coeffs_before = np.corrcoef(np.vstack( [x[np.triu_indices(33)].flatten() for x in X_train]), rowvar=False) corrArrBefore.append(np.linalg.norm(corr_coeffs_before)) #spearman correlation spearman_coeffs_before, _ = scipy.stats.spearmanr(np.vstack( [x[np.triu_indices(33)].flatten() for x in X_train]), axis=0) spearArrBefore.append(np.linalg.norm(spearman_coeffs_before)) ref = ts.reference_ covs = covest.transform(X_train) mapped = ts.transform(covs) corr_coeffs_after = np.corrcoef(mapped, rowvar=False) spearman_coeffs_after = scipy.stats.spearmanr(mapped, axis=0) corrArrAfter.append(np.linalg.norm(corr_coeffs_after)) spearArrAfter.append(np.linalg.norm(spearman_coeffs_after)) y_pred = clf_riem.predict(X_test) y_pred_cov = clf_cov.predict(X_test_cov) y_pred_rf = clf_rf.predict(X_test) # save accuracy accRiemList.append(accuracy_score(y_pred, y_test)) accCovList.append(accuracy_score(y_pred_cov, y_test_cov)) accRfList.append(accuracy_score(y_pred_rf, y_test)) # confusion matrix mat = confusion_matrix(y_test, y_pred, normalize='true', labels=[0, 1, 2, 3, 4, 5, 6, 7]) matRiemList.append(mat) for z in range(0, len(coeffArr[0])): class_z_coeffs = [x[z] for x in coeffArr] cos_sim = cosine_similarity(class_z_coeffs) upperTri = cos_sim[np.triu_indices(cos_sim.shape[0], 1)] cos_avg = np.mean(upperTri.flatten()) simArr.append(cos_avg) avgMatRiem = sum(matRiemList) / len(matRiemList) simDict.update({fname: simArr}) matDict.update({fname: avgMatRiem}) riemAcc = np.mean(accRiemList) covAcc = np.mean(accCovList) rfAcc = np.mean(accRfList) accDict.update( {'raw_data': { 'riem': riemAcc, 'rf': rfAcc, 'cov': covAcc }}) corrDict.update({ 'raw_data': { 'before': np.mean(corrArrBefore), 'after': np.mean(corrArrAfter) } }) spearDict.update({ 'raw_data': { 'before': np.mean(spearArrBefore), 'after': np.mean(spearArrAfter) } }) print("Mean Accuracy w/ Riemann on data " + fname + ": " + str(riemAcc)) print("Mean Accuracy w/ Cov on data " + fname + ": " + str(covAcc)) print("Mean Accuracy w/ RF on data " + fname + ": " + str(rfAcc)) print("----------------") return accDict, corrDict, spearDict, matDict, simDict
subject = name.split('_') data = loadmat(f_name) data_IS = data[list(data.keys())[-1]] data_tensor = [data_IS[0][0]] for j in range(len(data_IS)): if j == 0: k = 1 else: k = 0 for i in range(k, len(data_IS[j])): temp = [data_IS[j][i]] data_tensor = np.concatenate((data_tensor, temp), axis=0) cov = Covariances(estimator='lwf') ts = TangentSpace() cov.fit(data_tensor, label) cov_train = cov.transform(data_tensor) ts.fit(cov_train, label) ts_train = ts.transform(cov_train) ts_shape = (np.shape(ts_train)) pca = PCA() ann = MLPClassifier(max_iter=5000) clf = BaggingClassifier(base_estimator=ann, bootstrap=True) pipe = Pipeline(steps=[('pca', pca), ('clf', clf)]) param_grid = { 'pca__n_components': [20, 30, 40, 50, 60, 70, 80, 90, 100], 'clf__base_estimator__hidden_layer_sizes': [(10), (20), (30), (40), (50), (60), (70), (80), (90), (100), (110), (120), (130), (140), (150), (160), (170), (180)], 'clf__n_estimators': [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180