def test_permutation_test_score_allow_nans():
    # Check that permutation_test_score allows input data with NaNs
    X = np.arange(200, dtype=np.float64).reshape(10, -1)
    X[2, :] = np.nan
    y = np.repeat([0, 1], X.shape[0] / 2)
    p = Pipeline([("imputer", Imputer(strategy="mean", missing_values="NaN")), ("classifier", MockClassifier())])
    cval.permutation_test_score(p, X, y, cv=5)
Example #2
0
def test_permutation_test_score_allow_nans():
    # Check that permutation_test_score allows input data with NaNs
    X = np.arange(200, dtype=np.float64).reshape(10, -1)
    X[2, :] = np.nan
    y = np.repeat([0, 1], X.shape[0] / 2)
    p = Pipeline([
        ('imputer', Imputer(strategy='mean', missing_values='NaN')),
        ('classifier', MockClassifier()),
    ])
    cval.permutation_test_score(p, X, y, cv=5)
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel="linear")
    cv = cval.StratifiedKFold(y, 2)

    score, scores, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
    assert_greater(score, 0.9)
    assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy", labels=np.ones(y.size), random_state=0
    )
    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SVC(kernel="linear")
    cv_sparse = cval.StratifiedKFold(y, 2)
    score_label, _, pvalue_label = cval.permutation_test_score(
        svm_sparse,
        X_sparse,
        y,
        n_permutations=30,
        cv=cv_sparse,
        scoring="accuracy",
        labels=np.ones(y.size),
        random_state=0,
    )

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # test with custom scoring object
    def custom_score(y_true, y_pred):
        return ((y_true == y_pred).sum() - (y_true != y_pred).sum()) / y_true.shape[0]

    scorer = make_scorer(custom_score)
    score, _, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0)
    assert_almost_equal(score, 0.93, 2)
    assert_almost_equal(pvalue, 0.01, 3)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.2)
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel='linear')
    cv = cval.StratifiedKFold(y, 2)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, zero_one_score, cv)

    assert_greater(score, 0.9)
    np.testing.assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = cval.permutation_test_score(svm,
                                                               X,
                                                               y,
                                                               zero_one_score,
                                                               cv,
                                                               labels=np.ones(
                                                                   y.size),
                                                               random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SparseSVC(kernel='linear')
    cv_sparse = cval.StratifiedKFold(y, 2, indices=True)
    score_label, _, pvalue_label = cval.permutation_test_score(svm_sparse,
                                                               X_sparse,
                                                               y,
                                                               zero_one_score,
                                                               cv_sparse,
                                                               labels=np.ones(
                                                                   y.size),
                                                               random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, zero_one_score, cv)

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.4)
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel='linear')
    cv = cval.StratifiedKFold(y, 2)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
    assert_greater(score, 0.9)
    assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy",
        labels=np.ones(y.size), random_state=0)
    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SVC(kernel='linear')
    cv_sparse = cval.StratifiedKFold(y, 2)
    score_label, _, pvalue_label = cval.permutation_test_score(
        svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse,
        scoring="accuracy", labels=np.ones(y.size), random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # test with custom scoring object
    def custom_score(y_true, y_pred):
        return (((y_true == y_pred).sum() - (y_true != y_pred).sum())
                / y_true.shape[0])

    scorer = make_scorer(custom_score)
    score, _, pvalue = cval.permutation_test_score(
        svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0)
    assert_almost_equal(score, .93, 2)
    assert_almost_equal(pvalue, 0.01, 3)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.2)
Example #6
0
def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle = True, cvmeth = 'shufflesplit', score_type = 'r2', n_perm = 1000):
    """
    An easy way to evaluate the significance of a cross-validated score by permutations
    -------------------------------------------------
    Parameters:
        estimator: linear model estimator
        X: IV
        y: DV
        n_fold: fold number cross validation
        cvmeth: kfold or shufflesplit. 
                shufflesplit is the random permutation cross-validation iterator
        score_type: scoring type, 'r2' as default
        n_perm: permutation numbers
    Return:
        score: model scores
        permutation_scores: model scores when permutation labels
        pvalues: p value of permutation scores
    """
    if X.ndim == 1:
        X = np.expand_dims(X, axis = 1)
    if y.ndim == 1:
        y = np.expand_dims(y, axis = 1)
    X = preprocessing.scale(X)
    y = preprocessing.scale(y)
    if cvmeth == 'kfold':
        cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle)
    elif cvmeth == 'shufflesplit':
        testsize = 1.0/n_fold
        cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0)
    score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm)
    return score, permutation_scores, pvalues
Example #7
0
File: tools.py Project: helloTC/ATT
def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle=True, cvmeth='shufflesplit', score_type='r2', n_perm=1000):
    """
    An easy way to evaluate the significance of a cross-validated score by permutations
    -------------------------------------------------
    Parameters:
        estimator: linear model estimator
        X: IV
        y: DV
        n_fold: fold number cross validation
        cvmeth: kfold or shufflesplit. 
                shufflesplit is the random permutation cross-validation iterator
        score_type: scoring type, 'r2' as default
        n_perm: permutation numbers
    Return:
        score: model scores
        permutation_scores: model scores when permutation labels
        pvalues: p value of permutation scores
    """
    try:
        from sklearn import cross_validation, preprocessing
    except ImportError:
        raise Exception('To call this function, please install sklearn')
    if X.ndim == 1:
        X = np.expand_dims(X, axis = 1)
    if y.ndim == 1:
        y = np.expand_dims(y, axis = 1)
    X = preprocessing.scale(X)
    y = preprocessing.scale(y)
    if cvmeth == 'kfold':
        cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle)
    elif cvmeth == 'shufflesplit':
        testsize = 1.0/n_fold
        cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0)
    score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm)
    return score, permutation_scores, pvalues
def automatic_bernulli():
    data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';')
    Y = np.array(data['fight'].get_values())
    np.random.shuffle(Y)
    data.drop(['match', 'city', 'date', 'fight'], 1, inplace=True)
    # data = data[['anger_over_value_relation', 'avg_likes', 'sc_max_surprise', 'sc_median_fear',
    #              'fear_over_value_relation']]

    X = data.as_matrix()

    features_number = 0
    result = {}
    for features_number in range(3, 16):
        X_new = SelectKBest(f_classif, k=features_number).fit_transform(X, Y)
        # X_new = X
        classifier = ExtraTreesClassifier()
        super_means = []
        for i in range(1000):
            kf = KFold(len(X_new), n_folds=6, shuffle=True)
            means = []
            for training, testing in kf:
                classifier.fit(X_new[training], Y[training])
                prediction = classifier.predict(X_new[testing])
                curmean = np.mean(prediction == Y[testing])
                means.append(curmean)
            super_means.append(np.mean(means))
        print 'features_number=', features_number, 'Mean accuracy: {:.1%} '.format(
                np.mean(super_means))
            # result['fn'+str(features_number)+'n_n'+str(n_neib)] = np.mean(super_means)
        score, permutation_scores, pvalue = permutation_test_score(classifier, X_new, Y, scoring="accuracy", cv=kf,
                                                                n_permutations=len(Y), n_jobs=1)
        print ("Classification score %s (pvalue : %s)" % (score, pvalue))
def classify(x, y, classifier='lda', kern='rbf', n_folds=10, rep=10, kind='sf', n_jobs=1, n_knn=3, n_perm=0, n_tree=100,
             cvkind='skfold'):
    "da, all_scores, permutation_scores, pvalue"
    # Check format :
    x = checkfeat(x,y)
    n_epoch, n_feat = x.shape
    priors = n.array([1/len(n.unique(y))]*len(n.unique(y)))

    # - Classifier's choice :
    if (type(classifier) is int) | (type(classifier) is str):
        clf = classifier_choice(classifier, kern=kern, n_knn=n_knn, n_tree=n_tree, priors=priors)
    else : clf = classifier

    # - Cross validation definition :
    if kind == 'mf' and n_perm == 0:  # Multi feature classification
        da, all_scores, cv_model = classify_fcn(x, y, clf, n_folds=n_folds, rep=rep, n_jobs=n_jobs, cvkind=cvkind)
    elif kind == 'sf' and n_perm == 0:  # Single features classification
        da = n.zeros((1, n_feat))
        all_scores = n.zeros((rep, n_folds, n_feat))
        for k in range(0, n_feat):
            da[:, k], all_scores[:, :, k], cv_model = classify_fcn(x[:, k], y, clf, n_folds=n_folds, rep=rep,
                                                                   n_jobs=n_jobs, cvkind=cvkind)

    # Statistical evaluation :
    if n_perm == 0:
        permutation_scores, pvalue = 0, [[0]]
    else:
        all_scores = 0
        cv_model = crossval_choice(y, cvkind=cvkind, n_folds=n_folds, rndstate=0)
        if kind == 'mf':  # Multi feature classification
            da, permutation_scores, pvalue = cross_validation.permutation_test_score(clf, x, y, scoring="accuracy",
                                                                                     cv=cv_model, n_permutations=n_perm,
                                                                                     n_jobs=n_jobs)
        elif kind == 'sf':  # Single features classification
            permutation_scores = n.zeros((n_perm, n_feat))
            da = n.zeros((1, n_feat))
            pvalue = n.zeros((1, n_feat))
            for k in range(0, n_feat):
                da[0, k], permutation_scores[:, k], pvalue[0, k] = cross_validation.permutation_test_score(clf, x[:, k], y,
                                                                                                           scoring="accuracy",
                                                                                                           cv=cv_model,
                                                                                                           n_permutations=n_perm,
                                                                                                           n_jobs=n_jobs)

    return 100*da, 100*all_scores, permutation_scores, list(pvalue[0])
def computeScore(svm, X, y, cv):
    score, permutation_scores, pvalue = permutation_test_score(svm, \
                                                               X, y, \
                                                               scoring='accuracy', \
                                                               cv=cv, \
                                                               n_permutations=100, \
                                                               n_jobs=1)
    print("Classification score %s (pvalue: %s)" % (score, pvalue))
    return score, permutation_scores, pvalue
def check_trop_score(X_data, trop_clusters):
    
    cv = Bootstrap(X_data.shape[0], n_iter=3, train_size=0.7)
    pred = KMeans(n_clusters=len(set(trop_clusters)))
    t_score, scores, pval = permutation_test_score(pred, X_data, 
                                                   n_permutations=100,
                                                   y = trop_clusters,
                                                   n_jobs=20,
                                                   scoring=rand_linker,
                                                   cv=cv)
    return t_score, scores, pval
    def test_permutation_test_score(self):
        import sklearn.svm as svm
        iris = datasets.load_iris()

        df = pdml.ModelFrame(iris)
        clf = svm.SVC(kernel=str('linear'), C=1)
        result = df.cross_validation.permutation_test_score(clf, cv=5)
        expected = cv.permutation_test_score(clf, iris.data, y=iris.target, cv=5)

        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])
        self.assertEqual(result[2], expected[2])
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel='linear')
    cv = cval.StratifiedKFold(y, 2)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, zero_one_score, cv)

    assert_greater(score, 0.9)
    np.testing.assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = cval.permutation_test_score(
        svm, X, y, zero_one_score, cv, labels=np.ones(y.size), random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SVC(kernel='linear')
    cv_sparse = cval.StratifiedKFold(y, 2, indices=True)
    score_label, _, pvalue_label = cval.permutation_test_score(
        svm_sparse, X_sparse, y, zero_one_score, cv_sparse,
        labels=np.ones(y.size), random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = cval.permutation_test_score(svm, X, y,
            zero_one_score, cv)

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.4)
Example #14
0
def permutation():
    file = 'data/n228_bcdefgh.mat'
    dat = data.load(file)
    X, y = data.build(dat, range(0, 96), 'fr1', 17)

    # Univariate Feature Selection
    select = SelectKBest(f_classif,k=27).fit(X,y)
    Xa = select.transform(X)

    # Select good cell with heuristic
    channel = data.goodCell(dat)
    Xb, y = data.build(dat, channel, 'fr1', 17)

    # PCA Dimentionnality Reduction
    pca = PCA(n_components=38)
    Xc = pca.fit_transform(X)


    dat = [X, Xa, Xb, X, Xc,Xa]
    pNB = PoissonNB()
    gNB = GaussianNB()
    classifiers = [pNB,pNB,pNB,gNB,gNB,gNB]
    label = ['Poisson Unreduced', 'Poisson Univariate Reduction', 'Poisson Heuristic Reduction', 'Gaussion No reduction', 'Gaussian PCA reduction', 'Gaussian Univariate Reduction']
    scores = []
    perm_scores = []
    p_value = []

    for i in range(0,len(dat)):
        score, permutation_score, pvalue = permutation_test_score(classifiers[i], dat[i], y, cv=StratifiedKFold(y, n_folds=3, shuffle=True, random_state=42),n_permutations=100, n_jobs=-1, random_state=42, scoring=make_scorer(error_distance, greater_is_better=False))
        scores.append(score)
        perm_scores.append(np.mean(permutation_score))
        p_value.append(pvalue)

    ind = np.arange(len(scores))
    plt.bar(ind, scores)
#    ax.set_xticks(ind)
#    ax.set_xticklabels(label)
    plt.plot(ind, perm_scores)


    plt.show()


    print "Average Distance between real location and predicted location"
    print score
    print "Chance Performance, from permutation"
    print np.mean(permutation_score)
    print "p-value"
    print pvalue
def handle_bayes():
    input_data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';')
    signs = ['ms_avg_sadness', 'ms_avg_sadness', 'ms_disgust', 'ms_contempt', 'ms_max_sadness', 'ms_median_surprise',
             'ms_avg_happiness']
    signs = ['ms_avg_sadness', 'ms_avg_sadness', 'ms_disgust', 'ms_contempt', 'ms_max_sadness', 'ms_median_surprise']
    signs = ['ms_median_sadness', 'likes', 'ms_min_anger', 'ms_min_disgust', 'ms_min_fear', 'ms_avg_anger' ]
    X = input_data[signs]
    X = X.as_matrix()
    Y = np.array(input_data['fight'].get_values())
    classifier = GaussianNB()
    kf = KFold(len(signs), n_folds=6, shuffle=True)
    for training, testing in kf:
        classifier.fit(X[training], Y[training])
    score, permutation_scores, pvalue = permutation_test_score(
    classifier, X, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1)
    print ("Classification score %s (pvalue : %s)" % (score, pvalue))
Example #16
0
    def test_permutation_test_score(self):
        import sklearn.svm as svm
        iris = datasets.load_iris()

        df = pdml.ModelFrame(iris)
        clf = svm.SVC(kernel=str('linear'), C=1)
        result = df.cross_validation.permutation_test_score(clf, cv=5)
        expected = cv.permutation_test_score(clf,
                                             iris.data,
                                             y=iris.target,
                                             cv=5)

        self.assertEqual(len(result), 3)
        self.assertEqual(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])
        self.assertEqual(result[2], expected[2])
Example #17
0
def test_permutation_score():
    iris = load_iris()
    X = iris.data
    X_sparse = coo_matrix(X)
    y = iris.target
    svm = SVC(kernel='linear')
    cv = cval.StratifiedKFold(y, 2)

    score, scores, pvalue = cval.permutation_test_score(
        svm, X, y, cv=cv, scoring="accuracy")
    assert_greater(score, 0.9)
    assert_almost_equal(pvalue, 0.0, 1)

    score_label, _, pvalue_label = cval.permutation_test_score(
        svm, X, y, cv=cv, scoring="accuracy", labels=np.ones(y.size),
        random_state=0)
    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # test with custom scoring object
    scorer = make_scorer(fbeta_score, beta=2)
    score_label, _, pvalue_label = cval.permutation_test_score(
        svm, X, y, scoring=scorer, cv=cv, labels=np.ones(y.size),
        random_state=0)
    assert_almost_equal(score_label, .97, 2)
    assert_almost_equal(pvalue_label, 0.01, 3)

    # check that we obtain the same results with a sparse representation
    svm_sparse = SVC(kernel='linear')
    cv_sparse = cval.StratifiedKFold(y, 2)
    score_label, _, pvalue_label = cval.permutation_test_score(
        svm_sparse, X_sparse, y, cv=cv_sparse,
        scoring="accuracy", labels=np.ones(y.size), random_state=0)

    assert_true(score_label == score)
    assert_true(pvalue_label == pvalue)

    # set random y
    y = np.mod(np.arange(len(y)), 3)

    score, scores, pvalue = cval.permutation_test_score(svm, X, y, cv=cv,
                                                        scoring="accuracy")

    assert_less(score, 0.5)
    assert_greater(pvalue, 0.2)

    # test with deprecated interface
    with warnings.catch_warnings(record=True):
        score, scores, pvalue = cval.permutation_test_score(
            svm, X, y, score_func=accuracy_score, cv=cv)
    assert_less(score, 0.5)
    assert_greater(pvalue, 0.2)
Example #18
0
    def regr_one(self, train_x, train_y, test_size, predict_ornot):
        if predict_ornot:
            train_x, test_x, train_y, test_y = train_test_split(
                train_x, train_y, test_size=test_size, random_state=10)  #
        else:
            test_x, test_y = train_x, train_y
        regr = linear_model.LogisticRegression()
        #         regr = linear_model.LinearRegression()
        regr.fit(X=train_x, y=train_y)
        predict_result = regr.predict(X=test_x)
        i, j = 0, 0
        for a, b in zip(*(predict_result, test_y)):
            i += 1
            if a != b:
                j += 1
                #print a,b
        print 'accuracy:', (i - j) / (i * 1.0)
        score, permutation_scores, pvalue = permutation_test_score(
            regr, train_x, train_y, scoring="accuracy")
        print 'score,  pvalue = ', score, pvalue

        return regr, predict_result, test_y
def bayes_classification(permutation, test):
    input_data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';')
    output_data = []
    Y = np.array(input_data['fight'].get_values())
    if permutation == True:
        np.random.shuffle(Y)
    input_data = input_data.drop(['match', 'city', 'date', 'fight'], 1)
    data_array = input_data.as_matrix()
    for features_number in range(3,30,1):
        X_new = SelectKBest(f_classif, k=features_number).fit_transform(data_array, Y)
        classifier = GaussianNB()
        kf = KFold(len(X_new), n_folds=6, shuffle=True)
        means = []
        for training, testing in kf:
            classifier.fit(X_new[training], Y[training])
            prediction = classifier.predict(X_new[testing])
            curmean = np.mean(classifier.score(X_new[testing], Y[testing]))
            means.append(curmean)
        output_data.append(np.mean(means))
        score, permutation_scores, pvalue = permutation_test_score(
    classifier, X_new, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1)
        if test:
            print ("Classification score %s (pvalue : %s)" % (score, pvalue))
    return output_data
Example #20
0
#cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv, n_jobs=-1, verbose=10) 

session_label = labels['chunks']  
session_label = session_label[condition_mask] 
cv = LeaveOneLabelOut(labels=session_label)  
cv_scores_one = cross_val_score(svc, fmri_masked, target, cv=cv) 
#使用F1评分
#cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv,  scoring='f1')  
#计算平均分类准确率
classification_accuracy = np.mean(cv_scores)

classification_accuracy_one = np.mean(cv_scores_one)
#计算随机分类器的交叉验证得分
null_cv_scores = cross_val_score(DummyClassifier(), fmri_masked, target, cv=cv)  
#置换检验
null_cv_scores_2 = permutation_test_score(svc, fmri_masked, target, cv=cv)  

# Retrieve the SVC discriminating weights
coef_ = svc.coef_

# Reverse masking thanks to the Nifti Masker
coef_img = nifti_masker.inverse_transform(coef_)

# Save the coefficients as a Nifti image
coef_img.to_filename('haxby_svc_weights.nii')


from nilearn import image
from nilearn.plotting import plot_stat_map, show
import nibabel as nib
Example #21
0
        tmp = cls_all[j][band]
        data_cls.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_pln = []
    for j in range(len(pln_all)):
        tmp = pln_all[j][band]
        data_pln.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_cls = np.asarray(data_cls)
    data_pln = np.asarray(data_pln)

    X = np.vstack([data_cls, data_pln])
    y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))])

    cv = StratifiedKFold(y, n_folds=6, shuffle=True)

    model = joblib.load(source_folder +
                        "graph_data/sk_models/eigen_ada_pln_%s.plk" % band)

    score, perm_scores, pval = permutation_test_score(model,
                                                      X,
                                                      y,
                                                      cv=cv,
                                                      n_permutations=5000,
                                                      n_jobs=1)

    result = {"score": score, "perm_scores": perm_scores, "pval": pval}
    results_all[band] = result

np.save(source_folder + "graph_data/perm_test_eigen_pln.npy", results_all)
X = iris.data
y = iris.target
n_classes = np.unique(y).size

# Some noisy data not correlated
random = np.random.RandomState(seed=0)
E = random.normal(size=(len(X), 2200))

# Add noisy data to the informative features for make the task harder
X = np.c_[X, E]

svm = SVC(kernel="linear")
cv = StratifiedKFold(y, 2)

score, permutation_scores, pvalue = permutation_test_score(
    svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1
)

print("Classification score %s (pvalue : %s)" % (score, pvalue))

###############################################################################
# View histogram of permutation scores
pl.hist(permutation_scores, 20, label="Permutation scores")
ylim = pl.ylim()
# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
# pl.vlines(score, ylim[0], ylim[1], linestyle='--',
#          color='g', linewidth=3, label='Classification Score'
#          ' (pvalue %s)' % pvalue)
# pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
#          color='k', linewidth=3, label='Luck')
pl.plot(2 * [score], ylim, "--g", linewidth=3, label="Classification Score" " (pvalue %s)" % pvalue)
    data_pln = []
    for j in range(len(pln_all)):
        tmp = pln_all[j][band]
        data_pln.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_cls = np.asarray(data_cls)
    data_pln = np.asarray(data_pln)

    X = np.vstack([data_cls, data_pln])
    y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))])

    cv = StratifiedKFold(y, n_folds=6, shuffle=True)

    model = joblib.load(source_folder +
                        "graph_data/sk_models/path-strength_ada_%s_pln.plk" %
                        band)

    score, perm_scores, pval = permutation_test_score(model,
                                                      X,
                                                      y,
                                                      cv=cv,
                                                      n_permutations=10000,
                                                      n_jobs=1,
                                                      verbose=2)

    result = {"score": score, "perm_scores": perm_scores, "pval": pval}
    results_all[band] = result

np.save(source_folder + "graph_data/perm_test_path-strength_pln.npy",
        results_all)
def search_all(
    log_dir="data/step4/left_hemi_select_rois",
    conn_filter_fn=lambda conn: np.all([
        i['name'] in get_jhu_names("data/jhu_rois_left_adjusted.csv")
        for i in all_jhu_coordinates()[conn].itervalues()
    ])):
    def _log_dir(f_name):
        import os
        return os.path.join(log_dir, f_name)

    import logging
    logging.basicConfig(filename=_log_dir('search_results.log'),
                        level=logging.DEBUG,
                        filemode='w+')

    data_types = {'atw': ['z'], 'adw': ['z']}
    for data_type in data_types:
        full = pd.read_csv("data/step3/full_%s.csv" % data_type)

        if conn_filter_fn is not None:
            full = filter_roi_conns(full, conn_filter_fn)

        for target_col in data_types[data_type]:
            target_col = data_type + '_' + target_col
            print("about to process: %s" % target_col)
            logger = logging.getLogger(target_col)

            logger.info("results for %s" % target_col)

            search = run(full, target_col)
            search_normalize = run(full, target_col, normalize=True)

            (search, normalized) = (search, "no") if search.best_score_ > search_normalize.best_score_ \
                else (search_normalize, "yes")

            logger.info("normalized: %s" % normalized)

            logger.info("best score: %s" % search.best_score_)
            logger.info("best params: %s" % search.best_params_)

            data, target = separate(full, target_col)

            best_svr = search.best_estimator_.named_steps['svr']
            best_svr.reset_perm_coefs()

            def save_csv(desc, arr):
                f_name = _log_dir('%s_%s.csv' % (target_col, desc))
                np.savetxt(f_name, arr, delimiter=',')

            save_csv('best_coefs', best_svr.coef_)

            score, permutation_pred_scores, p_value = permutation_test_score(
                search.best_estimator_,
                data.get_values(),
                target.get_values(),
                scoring=search.scoring,
                cv=search.cv,
                n_permutations=100)

            logger.info("best score perms: %s" % score)

            save_csv('permute_pred_scores', permutation_pred_scores)
            save_csv('permute_max_coefs', best_svr.permute_max_coefs())
            save_csv('permute_min_coefs', best_svr.permute_min_coefs())

            logger.info("p-value: %s" % p_value)
            if p_value >= .05:
                logger.warn("p_value of %s >= .05")

            train_sizes, train_scores, test_scores = learning_curve(
                search.best_estimator_,
                data.get_values(),
                target.get_values(),
                cv=search.cv,
                train_sizes=np.linspace(.1, 1.0, 5))

            save_csv("learning_curve_train_sizes", train_sizes)
            save_csv("learning_curve_train_scores", train_scores)
            save_csv("learning_curve_test_scores", test_scores)
Example #25
0
    clf = RandomForestClassifier(n_estimators=500, max_features=None)
elif args.clf == "GradientBoostingClassifier":   
    clf = GradientBoostingClassifier(
            n_estimators=100, learning_rate=1.0, 
            max_depth=1, random_state=prng
            )
else:
    raise ValueError("--clf not understood")

# Go
acc, perm, p = permutation_test_score(
        clf, X, y, 
        score_func=None, 
        cv=cv, 
        n_permutations=args.null, 
        n_jobs=5, 
        labels=None, 
        random_state=prng, 
        verbose=0, 
        scoring="accuracy"
        )

# Save
f = open(args.o[0], "a")
f.write("{0},{1},{2},{3}\n".format(    
        np.round(acc, decimals=3), 
        np.round(np.mean(perm), decimals=3),
        np.round(p, decimals=4),
        args.name)
        )
f.close()
Example #26
0
def do_session(
    ds,
    clf=SVC(kernel="linear", probability=True),
    scoring=score,
    targets="quantized_distance",
    n_jobs=1,
    learning_curve=False,
    permutation_test=False,
):

    ds.sa["chunks"] = ["{}:{}".format(sid, scan) for sid, scan in zip(ds.sa["session_id"], ds.sa["run"])]

    ds.sa["targets"] = ds.sa[targets]

    # fixme: do wiener filter here

    from mvpa2.mappers.detrend import PolyDetrendMapper

    detrender = PolyDetrendMapper(polyord=1, chunks_attr="chunks")

    ds = ds.get_mapped(detrender)

    ds = ds[numpy.logical_not(numpy.logical_or(ds.sa.move, ds.sa.cue)), :]

    if ds.nfeatures > 3000:
        fs = SelectKBest(k=3000)
        fs.fit(ds.samples, ds.sa.search > 0)

    ds = ds[ds.sa.search > 0, :]

    if ds.nfeatures > 3000:
        ds = ds[:, fs.get_support()]

    logger.info("Configuring cross validation")
    cv = StratifiedKFold(ds.sa.quantized_distance, n_folds=6)  # FIXME: make this a function parameter

    logger.info("Beginning cross validation")
    scores = cross_val(clf, ds.samples, ds.targets, cv, scoring)

    if learning_curve:
        from sklearn.learning_curve import learning_curve

        logger.info("Beginning learning curve analysis")

        train_sizes_abs, train_scores, test_scores = learning_curve(
            clf, ds.samples, ds.targets, n_jobs=n_jobs, verbose=50, scoring="accuracy"
        )

    if permutation_test:
        logger.info("Beginning permutation test")
        score,
        permutation_scores,
        pvalue = permutation_test_score(
            clf, ds.samples, ds.targets, cv=cv, n_jobs=n_jobs, verbose=50, scoring="accuracy"
        )

    result = {}
    result["datetime"] = datetime.datetime.now()
    if ds.nfeatures > 3000:
        result["fs"] = fs
    result["mapper"] = ds.mapper
    # result['clf'] = clf
    # result['cv'] = cv
    # result['scoring'] = scoring
    result["scores"] = scores
    if learning_curve:
        result["learning_curve"] = (train_sizes_abs, train_scores, test_scores)
    else:
        result["learning_curve"] = None

    if permutation_test:
        result["pvalue"] = pvalue
    else:
        result["pvalue"] = None

    return result
def search_all(log_dir="data/step4/left_hemi_select_rois",
               conn_filter_fn=lambda conn: np.all(
                   [i['name'] in get_jhu_names("data/jhu_rois_left_adjusted.csv")
                    for i in all_jhu_coordinates()[conn].itervalues()])
               ):
    def _log_dir(f_name):
        import os
        return os.path.join(log_dir, f_name)

    import logging
    logging.basicConfig(filename=_log_dir('search_results.log'), 
                        level=logging.DEBUG, filemode='w+')

    data_types = {'atw': ['z'],
                  'adw': ['z']}
    for data_type in data_types:
        full = pd.read_csv("data/step3/full_%s.csv" % data_type)

        if conn_filter_fn is not None:
            full = filter_roi_conns(full, conn_filter_fn)

        for target_col in data_types[data_type]:
            target_col = data_type + '_' + target_col
            print("about to process: %s" % target_col)
            logger = logging.getLogger(target_col)

            logger.info("results for %s" % target_col)

            search = run(full, target_col)
            search_normalize = run(full, target_col, normalize=True)

            (search, normalized) = (search, "no") if search.best_score_ > search_normalize.best_score_ \
                else (search_normalize, "yes")

            logger.info("normalized: %s" % normalized)

            logger.info("best score: %s" % search.best_score_)
            logger.info("best params: %s" % search.best_params_)

            data, target = separate(full, target_col)

            best_svr = search.best_estimator_.named_steps['svr']
            best_svr.reset_perm_coefs()

            def save_csv(desc, arr):
                f_name = _log_dir('%s_%s.csv' % (target_col, desc))
                np.savetxt(f_name, arr, delimiter=',')

            save_csv('best_coefs', best_svr.coef_)

            score, permutation_pred_scores, p_value = permutation_test_score(
                search.best_estimator_,
                data.get_values(),
                target.get_values(),
                scoring=search.scoring,
                cv=search.cv,
                n_permutations=100
            )

            logger.info("best score perms: %s" % score)

            save_csv('permute_pred_scores', permutation_pred_scores)
            save_csv('permute_max_coefs', best_svr.permute_max_coefs())
            save_csv('permute_min_coefs', best_svr.permute_min_coefs())

            logger.info("p-value: %s" % p_value)
            if p_value >= .05:
                logger.warn("p_value of %s >= .05")

            train_sizes, train_scores, test_scores = learning_curve(
                search.best_estimator_,
                data.get_values(), target.get_values(),
                cv=search.cv, train_sizes=np.linspace(.1, 1.0, 5))

            save_csv("learning_curve_train_sizes", train_sizes)
            save_csv("learning_curve_train_scores", train_scores)
            save_csv("learning_curve_test_scores", test_scores)

k=60
feature_selection = SelectKBest(f_classif, k=k)
    
pipeline_anova = Pipeline([('anova', feature_selection), ('scale', scaler),('classif_name', svm)])
pipeline = Pipeline([('scale', scaler),('classif_name', svm)])
grid = GridSearchCV(pipeline_anova, param_grid={'anova__k':[20,60,100,200]}, verbose=1)


gr=GraphTransformer(rest=rest, coords=coords, kind='mixed',
                     method='correlation',spars=0.5,geo_alpha=0.00015)
param = [
      {'graph__kind': ['geometric'], 'graph__method':['distance'],'graph__spars':[0.,0.5],'graph__geo_alpha':[0.00015]},
      {'graph__kind': ['functional'], 'graph__method':['covariance','correlation'],'graph__spars':[0.1,0.3,0.5,0.7],'anova__k':[10,30,60,100,200]},
      {'graph__kind': ['mixed'], 'graph__method':['covariance','correlation'],'graph__spars':[0.3,0.5,0.7]}, 
     ]
pipeline_graph_anova = Pipeline([('graph',gr),('anova', feature_selection), ('scale', scaler),('classif_name', svm)])
grid_graph = GridSearchCV(pipeline_graph_anova, param_grid=param, verbose=1)
#nested_cv_scores = cross_val_score(grid, cond, y,cv=cv)
#print("Nested CV score: %.4f" % np.mean(nested_cv_scores))


########################
# Cat IMP/DES CROSS VALIDATION STIM

cv = LeaveOneLabelOut(block)
score_cv = cross_val_score(pipeline_anova, cond, y,cv=cv)
null_score_cv= permutation_test_score(pipeline_anova, cond, y,cv=cv)#weights=pipeline_anova.named_steps['classif_name'].coef_
#plot_selectedregions(pipeline_anova,masker,weights=weights,anova_name='anova')
Example #29
0
for train, test in cv.StratifiedKFold(pdata.classtype, 18):
    model = LinearRegression()
    model.fit(Xnew[train], y[train])
    result.append([y[test], model.predict(Xnew[test])])
result_lsas = result
y_true = []; y_pred = []
for a,b in result:
    y_true.extend(a.tolist())
    y_pred.extend(b.tolist())
result = np.array(np.vstack((y_true, y_pred))).T

# <codecell>

value, distribution, pvalue = cv.permutation_test_score(LinearRegression(), Xnew, y,
                                                        score_func=skm.mean_square_error,
                                                        cv=cv.StratifiedKFold(pdata.classtype, 18),
                                                        n_permutations=2000,
                                                        )

# <codecell>

hist(distribution, 32, alpha=0.5, color='gray')
plot([value, value], [0,200], 'r')
title('p=%.2f' % (1-pvalue))
xlabel('Mean square error')

# <codecell>

print np.corrcoef(result.T)
Rmodel(result.T[0], result.T[1])
iris = datasets.load_iris()
X = iris.data
y = iris.target
n_classes = np.unique(y).size

# Some noisy data not correlated
random = np.random.RandomState(seed=0)
E = random.normal(size=(len(X), 2200))

# Add noisy data to the informative features for make the task harder
X = np.c_[X, E]

svm = SVC(kernel='linear')
cv = StratifiedKFold(y, 2)

score, permutation_scores, pvalue = permutation_test_score(
    svm, X, y, zero_one_score, cv=cv, n_permutations=100, n_jobs=1)

print "Classification score %s (pvalue : %s)" % (score, pvalue)

###############################################################################
# View histogram of permutation scores
pl.hist(permutation_scores, 20, label='Permutation scores')
ylim = pl.ylim()
# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
#pl.vlines(score, ylim[0], ylim[1], linestyle='--',
#          color='g', linewidth=3, label='Classification Score'
#          ' (pvalue %s)' % pvalue)
#pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
#          color='k', linewidth=3, label='Luck')
pl.plot(2 * [score], ylim, '--g', linewidth=3,
        label='Classification Score'
Example #31
0
    _, pdata = get_subject_data(X)
    X = pdata.subject
    y = pdata.lsas_pre - pdata.lsas_post
    n_subjects, = X.shape

    """
    result = []
    for train, test in cv.StratifiedKFold(pdata.classtype, 18):
        model = BrainReg().fit(X[train], y[train])
        result.append((y[test], model.predict(X[test])))
    """

    value, distribution, pvalue = cv.permutation_test_score(BrainReg(), X, y,
                                                            skm.mean_square_error,
                                                            cv=cv.StratifiedKFold(
                                                                pdata.classtype,
                                                                18),
                                                            n_permutations=200,
                                                            n_jobs=4)
 
    print distribution
    print value
    print pvalue
    plt.figure()
    plt.hist(distribution, 128)
    plt.plot([value, value], [0, 50], color='r')
    plt.title('p = %.3f' % pvalue)
    plt.savefig(os.path.join(outdir,"permtest_hist.png"),dpi=100,format="png")
    #model, varidx, labels, nlabels = _fit(X, y, pdata.lsas_pre[:,None])

    
Example #32
0
#cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv, n_jobs=-1, verbose=10)

session_label = labels['chunks']
session_label = session_label[condition_mask]
cv = LeaveOneLabelOut(labels=session_label)
cv_scores_one = cross_val_score(svc, fmri_masked, target, cv=cv)
#使用F1评分
#cv_scores = cross_val_score(svc, fmri_masked, target, cv=cv,  scoring='f1')
#计算平均分类准确率
classification_accuracy = np.mean(cv_scores)

classification_accuracy_one = np.mean(cv_scores_one)
#计算随机分类器的交叉验证得分
null_cv_scores = cross_val_score(DummyClassifier(), fmri_masked, target, cv=cv)
#置换检验
null_cv_scores_2 = permutation_test_score(svc, fmri_masked, target, cv=cv)

# Retrieve the SVC discriminating weights
coef_ = svc.coef_

# Reverse masking thanks to the Nifti Masker
coef_img = nifti_masker.inverse_transform(coef_)

# Save the coefficients as a Nifti image
coef_img.to_filename('haxby_svc_weights.nii')

from nilearn import image
from nilearn.plotting import plot_stat_map, show
import nibabel as nib

# Plot the mean image because we have no anatomic data
Example #33
0
X = np.array(X)
y = np.array(y)


base_pipe = Pipeline([('saxizer', SAXTransformer(points_per_symbol=1)),
                     ('features', FeatureUnion([('countvect', CountVectorizer(min_df=1, analyzer='char', ngram_range=(1, 10))),
                                                ('tfidfvect', TfidfVectorizer(min_df=1, analyzer='char', ngram_range=(1, 2)))])),
                     ('svc', svm.LinearSVC())])

bop_pipe = Pipeline([('saxizer', SAXTransformer(points_per_symbol=1)),
                     ('features', FeatureUnion([('countvect', CountVectorizer(min_df=1, analyzer='char', ngram_range=(1, 10))),
                                                ('tfidfvect', TfidfVectorizer(min_df=1, analyzer='char', ngram_range=(1, 2)))])),
                     ('svc', svm.LinearSVC())])


for i in [bop_pipe, base_pipe]:
    score, permutation_scores, pvalue = permutation_test_score(
        i, X, y, scoring="accuracy", cv=StratifiedKFold(y, 2), n_permutations=5, n_jobs=4)
    print("Score %s (pvalue : %s)" % (score, pvalue))

# svm_pipe = Pipeline([('svc', clf)])
# score, permutation_scores, pvalue = permutation_test_score(
#     svm_pipe, X, y, scoring="accuracy", cv=StratifiedKFold(y, 2), n_permutations=100, n_jobs=4)
# print("Baseline Classification score %s (pvalue : %s)" % (score, pvalue))

# X2 = np.array([SAX(i).sax() for i in X])
# svm_pipe = Pipeline([('svc', clf)])
# score, permutation_scores, pvalue = permutation_test_score(
#     svm_pipe, X2, y, scoring="accuracy", cv=StratifiedKFold(y, 2), n_permutations=100, n_jobs=1)
# print("Baseline Classification score %s (pvalue : %s)" % (score, pvalue))
Example #34
0
def do_session(ds,
               clf=SVC(kernel='linear', probability=True),
               scoring=score,
               targets='quantized_distance',
               n_jobs=1,
               n_features=3000,
               learning_curve=False,
               permutation_test=False):

    ds.sa['chunks'] = [
        '{}:{}'.format(sid, scan)
        for sid, scan in zip(ds.sa['session_id'], ds.sa['run'])
    ]

    ds.sa['targets'] = ds.sa[targets]

    #fixme: do wiener filter here

    from mvpa2.mappers.detrend import PolyDetrendMapper

    detrender = PolyDetrendMapper(polyord=1, chunks_attr='chunks')

    ds = ds.get_mapped(detrender)

    ds = ds[numpy.logical_not(numpy.logical_or(ds.sa.move, ds.sa.cue)), :]

    if ds.nfeatures > n_features:
        fs = SelectKBest(k=n_features)
        fs.fit(ds.samples, ds.sa.search > 0)

    ds = ds[ds.sa.search > 0, :]

    if ds.nfeatures > n_features:
        ds = ds[:, fs.get_support()]

    logger.info('Configuring cross validation')
    cv = StratifiedKFold(ds.sa.quantized_distance,
                         n_folds=6)  #FIXME: make this a function parameter

    logger.info('Beginning cross validation')
    scores = cross_val(clf, ds.samples, ds.targets, cv, scoring)

    if learning_curve:
        from sklearn.learning_curve import learning_curve
        logger.info('Beginning learning curve analysis')

        train_sizes_abs, train_scores, test_scores = learning_curve(
            clf,
            ds.samples,
            ds.targets,
            n_jobs=n_jobs,
            verbose=50,
            scoring='accuracy')

    if permutation_test:
        logger.info('Beginning permutation test')
        score,
        permutation_scores,
        pvalue = permutation_test_score(clf,
                                        ds.samples,
                                        ds.targets,
                                        cv=cv,
                                        n_jobs=n_jobs,
                                        verbose=50,
                                        scoring='accuracy')

    result = {}
    result['datetime'] = datetime.datetime.now()
    if ds.nfeatures > n_features:
        result['fs'] = fs
    result['mapper'] = ds.mapper
    #result['clf'] = clf
    #result['cv'] = cv
    #result['scoring'] = scoring
    result['scores'] = scores
    if learning_curve:
        result['learning_curve'] = (train_sizes_abs, train_scores, test_scores)
    else:
        result['learning_curve'] = None

    if permutation_test:
        result['pvalue'] = pvalue
    else:
        result['pvalue'] = None

    return result
iris = datasets.load_iris()
X = iris.data
y = iris.target
n_classes = np.unique(y).size

# Some noisy data not correlated
random = np.random.RandomState(seed=0)
E = random.normal(size=(len(X), 2200))

# Add noisy data to the informative features for make the task harder
X = np.c_[X, E]

svm = SVC(kernel='linear')
cv = StratifiedKFold(y, 2)

score, permutation_scores, pvalue = permutation_test_score(
    svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1)

print("Classification score %s (pvalue : %s)" % (score, pvalue))

###############################################################################
# View histogram of permutation scores
pl.hist(permutation_scores, 20, label='Permutation scores')
ylim = pl.ylim()
# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
#pl.vlines(score, ylim[0], ylim[1], linestyle='--',
#          color='g', linewidth=3, label='Classification Score'
#          ' (pvalue %s)' % pvalue)
#pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',
#          color='k', linewidth=3, label='Luck')
pl.plot(2 * [score], ylim, '--g', linewidth=3,
        label='Classification Score'
Example #36
0
def complex_networks_mapping_uri_data(directory):
    """
    Parameters
    ----------
    directory: string
        The path of the directory containing all data files.
    """

    # Computing the graph encoding
    graphs = []
    classes = []
    subjects = []
    vects = []

    # have 100 graphs already built
    niter = 100
    for subjid in ['pandit', 'ctrl']:
        thresh_dens = '0.1'
        for n in range(niter):
            subj_name = '%s_%d' % (subjid, n)
            g_name = 'iter%d.a.%s.dens_%s.edgelist.gz' % \
                (n, subjid, thresh_dens)
            el = nx.read_edgelist(os.path.join(directory, g_name),
                                  nodetype=int)
            g = nx.Graph()
            # there are 148 regions, or nodes
            g.add_nodes_from(range(148))
            g.add_edges_from(el.edges())
            graphs.append(g)
            subjects.append(subj_name)
            classes.append(subjid)
            vects.append(complex_network_mapping(graphs[-1]))
            print "Graph built for subject %s and class %s." % \
                (subj_name, subjid)

    # Reordering data for the leave-one-subject-out cross-validation
    nm_graphs = [None] * len(graphs)
    nm_classes = [None] * len(classes)
    nm_subjects = [None] * len(subjects)
    nm_vects = [None] * len(vects)

    for i in range(len(graphs) / 2):
        nm_graphs[i*2] = graphs[i]
        nm_graphs[i*2 + 1] = graphs[(len(graphs) / 2) + i]
        nm_classes[i*2] = classes[i]
        nm_classes[i*2 + 1] = classes[(len(classes) / 2) + i]
        nm_subjects[i*2] = subjects[i]
        nm_subjects[i*2 + 1] = subjects[(len(subjects) / 2) + i]
        nm_vects[i*2] = vects[i]
        nm_vects[i*2 + 1] = vects[(len(vects) / 2) + i]

    print nm_subjects
    print nm_classes

    nm_vects = np.array(nm_vects)
#    nm_vects = np.where(nm_vects == inf, 10, nm_vects)
#    nm_vects = np.where(nm_vects == nan, 10, nm_vects)

    ss = StandardScaler()
    X = ss.fit_transform(nm_vects)
    print X
    print np.mean(X)
    print np.max(X)

    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

    tuned_parameters2 = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                     {'kernel': ['sigmoid'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                     {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]


    from sklearn.grid_search import GridSearchCV
    clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=KFold(len(nm_classes), niter, shuffle=False))
    clf2 = GridSearchCV(SVC(C=1), tuned_parameters2, cv=KFold(len(nm_classes), niter, shuffle=False))
    clf.fit(X, np.array(nm_classes))
    clf.best_params_
    clf = SVC(C=100, kernel='linear')
    print "Now getting cross validation "
    cvr = SVC(C=1000, gamma=.001, kernel='rbf')
    cv_scores = cross_val_score(cvr, X, np.array(nm_classes),
                                cv=KFold(len(nm_classes),
                                         niter, shuffle=False))

    cv_scores = cross_val_score(cvr, X, np.array(nm_classes),
                                cv=KFold(len(nm_classes),
                                         niter, shuffle=False))


    cv_scores = cross_val_score(clf, X, np.array(nm_classes),
                                cv=KFold(len(nm_classes),
                                         niter, shuffle=False))


    from sklearn.linear_model import SGDClassifier
    clfGD = SGDClassifier(loss='log')
    clfGD.fit(X, np.array(nm_classes))
    cv_scores = cross_val_score(clfGD, X, np.array(nm_classes),
                                cv=KFold(len(nm_classes),
                                         niter, shuffle=False))
    print cv_scores
    print np.mean(cv_scores)
    print("Accuracy: %0.2f (+/- %0.2f)" %
        (cv_scores.mean(), cv_scores.std() * 2))

     from sklearn.dummy import DummyClassifier
     null_scores = cross_val_score(DummyClassifier(), X, np.array(nm_classes),
                                   cv=KFold(len(nm_classes),
                                            niter, shuffle=False))

     print null_scores.mean()

     from sklearn.cross_validation import permutation_test_score
     null_scores_perm = permutation_test_score(cvr, X, np.array(nm_classes),
                                               cv=KFold(len(nm_classes),
                                                        niter, shuffle=False))

     print null_scores_perm.mean()
    data_cls = []
    for j in range(len(cls_all)):
        tmp = cls_all[j][band]
        data_cls.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_pln = []
    for j in range(len(pln_all)):
        tmp = pln_all[j][band]
        data_pln.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_cls = np.asarray(data_cls)
    data_pln = np.asarray(data_pln)

    X = np.vstack([data_cls, data_pln])
    y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))])

    cv = StratifiedKFold(y, n_folds=6, shuffle=True)

    model = joblib.load(source_folder +
                        "graph_data/sk_models/path-strength_ada_%s_pln.plk" %
                        band)

    score, perm_scores, pval = permutation_test_score(
        model, X, y, cv=cv, n_permutations=10000, n_jobs=1, verbose=2)

    result = {"score": score, "perm_scores": perm_scores, "pval": pval}
    results_all[band] = result

np.save(source_folder + "graph_data/perm_test_path-strength_pln.npy",
        results_all)
import numpy as np
from sklearn import linear_model
from sklearn.cross_validation import StratifiedKFold, permutation_test_score
from sklearn import datasets

X, y = datasets.make_classification(n_samples=100, n_features=5)
n_classes = np.unique(y).size
cls = linear_model.LogisticRegression()
cv = StratifiedKFold(y, 2)
score, permutation_scores, pvalue = permutation_test_score(cls,
                                                           X,
                                                           y,
                                                           scoring="f1",
                                                           cv=cv,
                                                           n_permutations=10,
                                                           n_jobs=1)

print("Classification score %s (pvalue : %s)" % (score, pvalue))
print("Permutation scores %s" % (permutation_scores))
for k, band in enumerate(bands.keys()):
    data_cls = []
    for j in range(len(cls_all)):
        tmp = cls_all[j][band]
        data_cls.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_pln = []
    for j in range(len(pln_all)):
        tmp = pln_all[j][band]
        data_pln.append(
            np.asarray([bct.strengths_und(g) for g in tmp]).mean(axis=0))
    data_cls = np.asarray(data_cls)
    data_pln = np.asarray(data_pln)

    X = np.vstack([data_cls, data_pln])
    y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))])

    cv = StratifiedKFold(y, n_folds=6, shuffle=True)

    model = joblib.load(source_folder +
                        "graph_data/sk_models/eigen_ada_pln_%s.plk" % band)

    score, perm_scores, pval = permutation_test_score(
        model, X, y, cv=cv, n_permutations=5000, n_jobs=1)

    result = {"score": score, "perm_scores": perm_scores, "pval": pval}
    results_all[band] = result

np.save(source_folder + "graph_data/perm_test_eigen_pln.npy", results_all)
#print "The different cross_scores: ", cross_score_LDA
   

#### Naive bayes ####

from sklearn.naive_bayes import GaussianNB
ngb = GaussianNB()

cross_score_NB = cross_val_score(ngb, X_scl, y, scoring="accuracy", cv = loo, 
                    n_jobs = 8, verbose = True)
                    
print "Cross val score: ", cross_score_NB.mean() 
print "The different cross_scores: ", cross_score_NB

score_NB, permutation_score_NB, pvalue_NB = permutation_test_score(ngb, X_scl, y,
        scoring="accuracy", cv = cv, n_permutations = 2000, 
        n_jobs = n_jobs, verbose = True)
print 'Classification score:', score_NB, 'p-value:', pvalue_NB

#### SVM ####
from sklearn.svm import LinearSVC
svc = LinearSVC()

cross_score_SVM = cross_val_score(svc, X_scl, y, scoring="accuracy", cv = loo, 
                    n_jobs = 8, verbose = True)
                    
print "Cross val score: ", cross_score_SVM.mean() 
print "The different cross_scores: ", cross_score_SVM


score_SVM, permutation_score_SVM, pvalue_SVM = permutation_test_score(svc, X, y,
n_classes = np.unique(y).size

# Some noisy data not correlated
random = np.random.RandomState(seed=0)
E = random.normal(size=(len(X), 2200))

# Add noisy data to the informative features for make the task harder
X = np.c_[X, E]

svm = SVC(kernel='linear')
cv = StratifiedKFold(y, 2)

score, permutation_scores, pvalue = permutation_test_score(svm,
                                                           X,
                                                           y,
                                                           zero_one_score,
                                                           cv=cv,
                                                           n_permutations=100,
                                                           n_jobs=1)

print "Classification score %s (pvalue : %s)" % (score, pvalue)

###############################################################################
# View histogram of permutation scores
pl.hist(permutation_scores, 20, label='Permutation scores')
ylim = pl.ylim()
# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib
#pl.vlines(score, ylim[0], ylim[1], linestyle='--',
#          color='g', linewidth=3, label='Classification Score'
#          ' (pvalue %s)' % pvalue)
#pl.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',