コード例 #1
0
ファイル: riem_funcs.py プロジェクト: dcarmod2/RiemNeuro
def train_classifiers(data_files, valid_runs_dict_uiuc, valid_runs_dict_whasc):
    '''
    Produces test data and tests whether projecting matrices into the tangent space finds the correct discriminative connection.
    
        Parameters:
            data_files (list of pairs (filename,data)): the input data
            valid_runs_dict_uiuc (dictionary): dictionary containing valid runs for each patient
            valid_runs_dict_whasc (dictionary): dictionary containing valid runs for each patient
            
        Returns:
            accDict (dictionary): mean accuracy on each file's data
            simDict (dictionary): mean cosine similarity of classifier coefficients for each file
            matDict (dictionary): mean confusion matrix for each file
            corrDict (dictionary): before and after projection correlations
            spearDict (dictionary): before and after projection spearman correlations
    '''
    accDict = {}
    simDict = {}
    matDict = {}
    corrDict = {}
    spearDict = {}

    simArr = []
    for fname, data in data_files:
        # get time series data to make covariance matrices
        X = np.array([sample['TimeSeries'] for sample in data['samples']
                      ])  # if data_selector(sample)])
        y = np.array([
            get_label_8(sample['Group'], sample['Location'])
            for sample in data['samples']
        ])  # if data_selector(sample)])

        # gsr seems to produce a rank deficient covariance matrix, so oas regularization is necessary
        covest = Covariances()
        ts = TangentSpace()
        #sym = to_symm_mat(0,33)
        #diag = to_upper_tri(1)
        svc = SVC(kernel='linear')
        clf_riem = make_pipeline(covest, ts, svc)

        rf = RandomForestClassifier(200)
        clf_rf = make_pipeline(covest, ts, rf)

        covest2 = Correlations()

        svc2 = SVC(kernel='linear')
        get_tri_inds = to_upper_tri(0)
        clf_cov = make_pipeline(covest2, get_tri_inds, svc2)

        #Check clustering
        #to_TS = make_pipeline(covest,ts)
        #X_in_TS = to_TS.transform(X)
        #kmeans = KMeans(n_clusters=4,random_state=0).fit(X_in_TS)

        # Monte Carlo, in theory should run this len(y)^2 times, but I need to save my poor computer's memory.
        accRiemList = []
        accCovList = []
        accRfList = []
        coeffArr = []
        matRiemList = []
        corrArrBefore = []
        corrArrAfter = []
        spearArrBefore = []
        spearArrAfter = []

        rs = StratifiedShuffleSplit(n_splits=100, test_size=.3)
        for i, (train_inds, test_inds) in enumerate(rs.split(X, y)):

            X_train, X_test, y_train, y_test = X[train_inds], X[test_inds], y[
                train_inds], y[test_inds]
            X_train_cov, X_test_cov, y_train_cov, y_test_cov = X_train.copy(
            ), X_test.copy(), y_train.copy(), y_test.copy()

            clf_riem.fit(X_train, y_train)
            clf_rf.fit(X_train, y_train)
            clf_cov.fit(X_train_cov, y_train_cov)

            #get riemann svm coefficients
            coeffArr.append(clf_riem[2].coef_)

            #compare correlation
            corr_coeffs_before = np.corrcoef(np.vstack(
                [x[np.triu_indices(33)].flatten() for x in X_train]),
                                             rowvar=False)
            corrArrBefore.append(np.linalg.norm(corr_coeffs_before))
            #spearman correlation
            spearman_coeffs_before, _ = scipy.stats.spearmanr(np.vstack(
                [x[np.triu_indices(33)].flatten() for x in X_train]),
                                                              axis=0)
            spearArrBefore.append(np.linalg.norm(spearman_coeffs_before))

            ref = ts.reference_
            covs = covest.transform(X_train)
            mapped = ts.transform(covs)
            corr_coeffs_after = np.corrcoef(mapped, rowvar=False)
            spearman_coeffs_after = scipy.stats.spearmanr(mapped, axis=0)
            corrArrAfter.append(np.linalg.norm(corr_coeffs_after))
            spearArrAfter.append(np.linalg.norm(spearman_coeffs_after))

            y_pred = clf_riem.predict(X_test)
            y_pred_cov = clf_cov.predict(X_test_cov)
            y_pred_rf = clf_rf.predict(X_test)

            # save accuracy
            accRiemList.append(accuracy_score(y_pred, y_test))
            accCovList.append(accuracy_score(y_pred_cov, y_test_cov))
            accRfList.append(accuracy_score(y_pred_rf, y_test))

            # confusion matrix
            mat = confusion_matrix(y_test,
                                   y_pred,
                                   normalize='true',
                                   labels=[0, 1, 2, 3, 4, 5, 6, 7])
            matRiemList.append(mat)

        for z in range(0, len(coeffArr[0])):
            class_z_coeffs = [x[z] for x in coeffArr]
            cos_sim = cosine_similarity(class_z_coeffs)
            upperTri = cos_sim[np.triu_indices(cos_sim.shape[0], 1)]
            cos_avg = np.mean(upperTri.flatten())
            simArr.append(cos_avg)

        avgMatRiem = sum(matRiemList) / len(matRiemList)
        simDict.update({fname: simArr})
        matDict.update({fname: avgMatRiem})
        riemAcc = np.mean(accRiemList)
        covAcc = np.mean(accCovList)
        rfAcc = np.mean(accRfList)

        accDict.update(
            {'raw_data': {
                'riem': riemAcc,
                'rf': rfAcc,
                'cov': covAcc
            }})
        corrDict.update({
            'raw_data': {
                'before': np.mean(corrArrBefore),
                'after': np.mean(corrArrAfter)
            }
        })
        spearDict.update({
            'raw_data': {
                'before': np.mean(spearArrBefore),
                'after': np.mean(spearArrAfter)
            }
        })
        print("Mean Accuracy w/ Riemann on data " + fname + ": " +
              str(riemAcc))
        print("Mean Accuracy w/ Cov on data " + fname + ": " + str(covAcc))
        print("Mean Accuracy w/ RF on data " + fname + ": " + str(rfAcc))
        print("----------------")

    return accDict, corrDict, spearDict, matDict, simDict
コード例 #2
0
 subject = name.split('_')
 data = loadmat(f_name)
 data_IS = data[list(data.keys())[-1]]
 data_tensor = [data_IS[0][0]]
 for j in range(len(data_IS)):
     if j == 0:
         k = 1
     else:
         k = 0
     for i in range(k, len(data_IS[j])):
         temp = [data_IS[j][i]]
         data_tensor = np.concatenate((data_tensor, temp), axis=0)
 cov = Covariances(estimator='lwf')
 ts = TangentSpace()
 cov.fit(data_tensor, label)
 cov_train = cov.transform(data_tensor)
 ts.fit(cov_train, label)
 ts_train = ts.transform(cov_train)
 ts_shape = (np.shape(ts_train))
 pca = PCA()
 ann = MLPClassifier(max_iter=5000)
 clf = BaggingClassifier(base_estimator=ann, bootstrap=True)
 pipe = Pipeline(steps=[('pca', pca), ('clf', clf)])
 param_grid = {
     'pca__n_components': [20, 30, 40, 50, 60, 70, 80, 90, 100],
     'clf__base_estimator__hidden_layer_sizes':
     [(10), (20), (30), (40), (50), (60), (70), (80), (90), (100), (110),
      (120), (130), (140), (150), (160), (170), (180)],
     'clf__n_estimators': [
         10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150,
         160, 170, 180