from scikits.learn.gaussian_process import GaussianProcess
from scikits.learn.cross_val import cross_val_score, KFold
from scikits.learn.metrics import r2_score

# Load the dataset from scikits' data sets
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target

# Instanciate a GP model
gp = GaussianProcess(regr='constant',
                     corr='absolute_exponential',
                     theta0=[1e-4] * 10,
                     thetaL=[1e-12] * 10,
                     thetaU=[1e-2] * 10,
                     nugget=1e-2,
                     optimizer='Welch')

# Fit the GP model to the data performing maximum likelihood estimation
gp.fit(X, y)

# Deactivate maximum likelihood estimation for the cross-validation loop
gp.theta0 = gp.theta  # Given correlation parameter = MLE
gp.thetaL, gp.thetaU = None, None  # None bounds deactivate MLE

# Perform a cross-validation estimate of the coefficient of determination using
# the cross_val module using all CPUs available on the machine
K = 20  # folds
R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=-1).mean()
print("The %d-Folds estimate of the coefficient of determination is R2 = %s" %
      (K, R2))
Example #2
0
################################################################################
# Create a feature-selection transform and an instance of SVM that we
# combine together to have an full-blown estimator

transform = feature_selection.SelectPercentile(feature_selection.f_classif)

clf = Pipeline([transform], svm.SVC())

################################################################################
# Plot the cross-validation score as a function of percentile of features
score_means = list()
score_stds = list()
percentiles = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100)

for percentile in percentiles:
    transform._set_params(percentile=percentile)
    this_scores = cross_val.cross_val_score(clf, X, y)
    score_means.append(this_scores.mean())
    score_stds.append(this_scores.std())

pl.errorbar(percentiles, score_means, np.array(score_stds))

pl.title(
    'Performance of the SVM-Anova varying the percentile of features selected')
pl.xlabel('Percentile')
pl.ylabel('Cross-validation errors rate')

pl.axis('tight')
pl.show()
Example #3
0
def classify(data='All_10.dr',plot=[],fig=[],sort=1,L1=''):

    #reading and parsing labels
    with open("/home/fs0/madugula/scratch/FC/covarscript/fgcutshncl2.txt") as f:
        label=f.read().splitlines()
        label=array(map(int,label))
        blk=concatenate(([1],diff(label)))
        blk[blk!=0]=1
        ind=nonzero(blk)[0]

        mats={}
        results=[]
        errorb=[]
        xlabels=[]

        tasks=['r','t','v','vt','vtbw']
        netmats=['1','0','0a','2','3','4','5','6','7','8','9','10','11','12']
        #netmats=['1','0','0a','2','5']
        netmatnames=['Corr','Cov','Amp','ICOV','ICOV0.1','ICOV1','ICOV10','ICOV20''ICOV40','ICOV60','ICOV80','ICOV100','ICOV150','ICOV200']
        #netmatnames=['Corr','Cov','Amp','ICOV10']


        for nmm in arange(len(netmats)): 
            mm=netmats[nmm]
            mats[nmm]=[]
            targetslist=[]
            results.append([])
            errorb.append([])

            for i in range(len(tasks)):

                # tmp_task=scipy.io.loadmat(tasks[i]+'_'+data+'/out_tpts_all_150.mat')
                tmp_task=scipy.io.loadmat(tasks[i]+'_'+data+'/out.mat')
                tmp2=tmp_task['netmat'+mm]

                mats[nmm].append((tmp2))
                targetslist.append(ones((tmp2.shape[0]))*i)
                

            subnum=mats[nmm][0].shape[0]

            titles=[]
            matsub=[]
    
###        # create new feature sets
###
        nmm+=1
        mats[nmm]=[]
###
        results.append([])
        errorb.append([])
###
###        # L1cov
###
###        if L1 != '':
###            for i in range(len(tasks)):
###                tmp = loadtxt('r_'+data+'/all_conds/cov_' + tasks[i] + '.txt') 
###                mats[nmm].append((tmp))
###            netmatnames.append('L1Cov')
###
###            nmm+=1
###            mats[nmm]=[]
###
###            results.append([])
###
###        # L1prec
###
###        if L1 != '':
###            for i in range(len(tasks)):
###                tmp = loadtxt('r_'+data+'/all_conds/prec_' + tasks[i] + '.txt') 
###                mats[nmm].append((tmp))
###                   
###            netmatnames.append('L1Prec')
###
###            nmm+=1
###            mats[nmm]=[]
###
###            results.append([])
###
###
        # corr+amp

        shp=mats[0][0].shape 
        nsubs=shp[0]
        nels=shp[1] 
        size=mats[0][0].size 
        diagels=diag(reshape(arange(nels),(nels**.5,nels**.5)))

        for i in range(len(tasks)):
            corrs=mats[0][i]
            for ii in range(nsubs):
                corrs[ii,diagels]=mats[2][i][ii,:]
            mats[nmm].append(corrs)
        netmatnames.append('Corr+Amp')

        # corr+ICOVs
        for ii in r_[1,arange(3,5)]:

            nmm+=1
            mats[nmm]=[]
            results.append([])
            errorb.append([])

            for i in range(len(tasks)):
                mats[nmm].append(c_[mats[0][i],mats[ii][i]])

            netmatnames.append('Corr + ' + netmatnames[ii])
    
        # now, prediction

        for nmm in arange(len(mats)): 

            print(nmm)
            subnum=mats[nmm][0].shape[0]

            titles=[]
            matsub=[]

            #for i in arange(len(mats[nmm])-1)+1:
            #    matsub.append(mats[nmm][i]-mats[nmm][0])
             
            for i in arange(len(mats[nmm])):
                matsub.append(mats[nmm][i]-mean(mats[nmm],0))
                                  
             # print("vs rest, no sub ")
            for x,y in combinations((arange(len(mats[nmm]))),2):
                training=concatenate((mats[nmm][x],mats[nmm][y]))
                targets=concatenate((0*ones((subnum)),1*ones((subnum))))
                labels=concatenate([arange(subnum),arange(subnum)])
                vec=arange(mats[nmm][x].shape[0])
                # random.shuffle(vec)
                # training=training[vec]
                # targets=targets[vec]
                clf=svm.SVC(kernel='linear')
                lolo=cv.LeaveOneLabelOut(labels)

                results[nmm].append(mean(cv.cross_val_score(clf,training,targets,cv=lolo)))
                errorb[nmm].append(stats_binom.wilson_score_interval(results[nmm][-1]*len(targets),len(targets),0.1))

                xlabels.append(tasks[x] + ' vs ' + tasks[y] +' No sub' )
                # results=validate(clf,K,training,targets)
                # print(task[x]+" versus "+task[y]+": "+str(results[nmm][-1]))
             # print("vs rest, no sub ")
            
            # multi-label

            training=concatenate(matsub[0:-1])
            targets=concatenate(targetslist[0:-1])
            labels=tile(arange(subnum),[1,len(mats[nmm][0:-1])]).flatten()
            clf=svm.SVC(kernel='linear')
            lolo=cv.LeaveOneLabelOut(labels)

            results[nmm].append(mean(cv.cross_val_score(clf,training,targets,cv=lolo)))
            errorb[nmm].append(stats_binom.wilson_score_interval(results[nmm][-1],len(targets),0.1))
            xlabels.append('Multi' )
               
###            # print("Ranking")
###            for x in range(len(matsub)):
###
###                training=concatenate([matsub[x],-matsub[x]])
###                targets=concatenate((0*ones((subnum)),1*ones((subnum))))
###                labels=concatenate([arange(subnum),arange(subnum)])
###                clf=svm.SVC(kernel='linear')
###                lolo=cv.LeaveOneLabelOut(labels)
###
###                results[nmm].append(mean(cv.cross_val_score(clf,training,targets,cv=lolo)))
###                xlabels.append(tasks[x+1] + ' vs r')
###                # results=validate(clf,K,training,targets)
###                # print(tasks[x]+"from Rest: "+str(results))
###
            # print("Subtractions")
            for x,y in combinations((arange(len(matsub))),2):
                training=concatenate((matsub[x],matsub[y]))
                targets=concatenate((0*ones((subnum)),1*ones((subnum))))
                labels=concatenate([arange(subnum),arange(subnum)])
                vec=arange(mats[nmm][x].shape[0])
                # random.shuffle(vec)
                # training=training[vec]
                # targets=targets[vec]
                clf=svm.SVC(kernel='linear')
                lolo=cv.LeaveOneLabelOut(labels)

                results[nmm].append(mean(cv.cross_val_score(clf,training,targets,cv=lolo)))
                errorb[nmm].append(stats_binom.wilson_score_interval(results[nmm][-1]*len(targets),len(targets),0.1))
                xlabels.append(tasks[x] + ' vs ' + tasks[y])
                # results=validate(clf,K,training,targets)
                # print(task[x]+" versus "+task[y]+": "+str(results[nmm][-1]))

        if plot:
            if fig == []:
                fig=plt.figure()
                
            multibar(array(results),fig,sort=sort,xlabels=xlabels,condlabels=netmatnames,title='Rest Measures',ylabel='Accuracy')

        return results,errorb,netmatnames,xlabels
Example #4
0
from scikits.learn import cross_val, datasets, linear_model

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

lasso = linear_model.Lasso()

alphas = np.logspace(-4, -1, 20)

scores = list()
scores_std = list()

for alpha in alphas:
    lasso.alpha = alpha
    this_scores = cross_val.cross_val_score(lasso, X, y, n_jobs=-1)
    scores.append(np.mean(this_scores))
    scores_std.append(np.std(this_scores))

pl.figure(1, figsize=(2.5, 2))
pl.clf()
pl.axes([.1, .25, .8, .7])
pl.semilogx(alphas, scores)
pl.semilogx(alphas, np.array(scores) + np.array(scores_std) / 20, 'b--')
pl.semilogx(alphas, np.array(scores) - np.array(scores_std) / 20, 'b--')
pl.yticks(())
pl.ylabel('CV score')
pl.xlabel('alpha')
pl.axhline(np.max(scores), linestyle='--', color='.5')
pl.text(2e-4, np.max(scores) + 1e-4, '.489')
gp = GaussianProcess(regr='constant', corr='absolute_exponential',
                     theta0=[1e-4] * 10, thetaL=[1e-12] * 10,
                     thetaU=[1e-2] * 10, nugget=1e-2, optimizer='Welch',
                     verbose=False)

# Fit the GP model to the data
gp.fit(X, y)
gp.theta0 = gp.theta
gp.thetaL = None
gp.thetaU = None
gp.verbose = False

# Estimate the leave-one-out predictions using the cross_val module
n_jobs = 2 # the distributing capacity available on the machine
y_pred = y + cross_val.cross_val_score(gp, X, y=y,
                                   cv=cross_val.LeaveOneOut(y.size),
                                   n_jobs=n_jobs,
                                ).ravel()

# Compute the empirical explained variance
Q2 = metrics.explained_variance_score(y, y_pred)

# Goodness-of-fit plot
pl.figure()
pl.title('Goodness-of-fit plot (Q2 = %1.2e)' % Q2)
pl.plot(y, y_pred, 'r.', label='Leave-one-out')
pl.plot(y, gp.predict(X), 'k.', label='Whole dataset (nugget=1e-2)')
pl.plot([y.min(), y.max()], [y.min(), y.max()], 'k--')
pl.xlabel('Observations')
pl.ylabel('Predictions')
pl.legend(loc='upper left')
pl.axis('tight')
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

################################################################################
# Plot the PCA spectrum
pca.fit(X_digits)

pl.figure(1, figsize=(4, 3))
pl.clf()
pl.axes([.2, .2, .7, .7])
pl.plot(pca.explained_variance_, linewidth=2)
pl.axis('tight')
pl.xlabel('n_components')
pl.ylabel('explained_variance_')

################################################################################
# Prediction
scores = cross_val.cross_val_score(pipe, X_digits, y_digits, n_jobs=-1)

from scikits.learn.grid_search import GridSearchCV

n_components = [10, 15, 20, 30, 40, 50, 64]
Cs = np.logspace(-4, 4, 16)
estimator = GridSearchCV(pipe,
                         dict(pca__n_components=n_components,
                              logistic__C=Cs),
                         n_jobs=-1)
estimator.fit(X_digits, y_digits)
Example #7
0
################################################################################
# Create a feature-selection transform and an instance of SVM that we
# combine together to have an full-blown estimator

transform = feature_selection.SelectPercentile(feature_selection.f_classif)

clf = Pipeline([('anova', transform), ('svc', svm.SVC())])

################################################################################
# Plot the cross-validation score as a function of percentile of features
score_means = list()
score_stds  = list()
percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)

for percentile in percentiles:
    clf._set_params(anova__percentile=percentile)
    # Compute cross-validation score using all CPUs
    this_scores = cross_val.cross_val_score(clf, X, y, n_jobs=1)
    score_means.append(this_scores.mean())
    score_stds.append(this_scores.std())

pl.errorbar(percentiles, score_means, np.array(score_stds))

pl.title(
    'Performance of the SVM-Anova varying the percentile of features selected')
pl.xlabel('Percentile')
pl.ylabel('Prediction rate')

pl.axis('tight')
pl.show()
################################################################################
# Create a feature-selection transform and an instance of SVM that we
# combine together to have an full-blown estimator

transform = feature_selection.SelectPercentile(feature_selection.f_classif)

clf = Pipeline([('anova', transform), ('svc', svm.SVC())])

################################################################################
# Plot the cross-validation score as a function of percentile of features
score_means = list()
score_stds = list()
percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)

for percentile in percentiles:
    clf._set_params(anova__percentile=percentile)
    # Compute cross-validation score using all CPUs
    this_scores = cross_val.cross_val_score(clf, X, y, n_jobs=1)
    score_means.append(this_scores.mean())
    score_stds.append(this_scores.std())

pl.errorbar(percentiles, score_means, np.array(score_stds))

pl.title(
    'Performance of the SVM-Anova varying the percentile of features selected')
pl.xlabel('Percentile')
pl.ylabel('Prediction rate')

pl.axis('tight')
pl.show()
# Author: Vincent Dubourg <*****@*****.**>
# License: BSD style

from scikits.learn import datasets
from scikits.learn.gaussian_process import GaussianProcess
from scikits.learn.cross_val import cross_val_score, KFold

# Load the dataset from scikits' data sets
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target

# Instanciate a GP model
gp = GaussianProcess(regr='constant', corr='absolute_exponential',
                     theta0=[1e-4] * 10, thetaL=[1e-12] * 10,
                     thetaU=[1e-2] * 10, nugget=1e-2, optimizer='Welch')

# Fit the GP model to the data performing maximum likelihood estimation
gp.fit(X, y)

# Deactivate maximum likelihood estimation for the cross-validation loop
gp.theta0 = gp.theta # Given correlation parameter = MLE
gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE

# Perform a cross-validation estimate of the coefficient of determination using
# the cross_val module using all CPUs available on the machine
K = 20 # folds
R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=-1).mean()
print("The %d-Folds estimate of the coefficient of determination is R2 = %s"
    % (K, R2))
from scikits.learn import cross_val, datasets, linear_model

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

lasso = linear_model.Lasso()

alphas = np.logspace(-4, -1, 20)

scores = list()
scores_std = list()

for alpha in alphas:
    lasso.alpha = alpha
    this_scores = cross_val.cross_val_score(lasso, X, y, n_jobs=-1)
    scores.append(np.mean(this_scores))
    scores_std.append(np.std(this_scores))

pl.figure(1, figsize=(2.5, 2))
pl.clf()
pl.axes([.1, .25, .8, .7])
pl.semilogx(alphas, scores)
pl.semilogx(alphas, np.array(scores) + np.array(scores_std)/20, 'b--')
pl.semilogx(alphas, np.array(scores) - np.array(scores_std)/20, 'b--')
pl.yticks(())
pl.ylabel('CV score')
pl.xlabel('alpha')
pl.axhline(np.max(scores), linestyle='--', color='.5')
pl.text(2e-4, np.max(scores)+1e-4, '.489')
X, y, session = X[y!=0], y[y!=0], session[y!=0]
n_samples, n_features = X.shape
n_conditions = np.size(np.unique(y))

### Define the prediction function to be used.
# Here we use a Support Vector Classification, with a linear kernel and C=1
clf = SVC(kernel='linear', C=1.)

### Define the dimension reduction to be used.
# Here we use a classical univariate feature selection based on F-test,
# namely Anova. We set the number of features to be selected to 500
feature_selection = SelectKBest(f_classif, k=500)

### We combine the dimension reduction and the prediction function
anova_svc = Pipeline([('anova', feature_selection), ('svc', clf)])

### Define the cross-validation scheme used for validation.
# Here we use a LeaveOneLabelOut cross-validation on the session, which
# corresponds to a leave-one-session-out
cv = LeaveOneLabelOut(session)

### Compute the prediction accuracy for the different folds (i.e. session)
cv_scores = cross_val_score(anova_svc, X, y, cv=cv, n_jobs=-1,
                            verbose=1, iid=True)

### Return the corresponding mean prediction accuracy
classification_accuracy = np.sum(cv_scores) / float(n_samples)
print "Classification accuracy: %f" % classification_accuracy, \
    " / Chance level: %f" % (1. / n_conditions)

import numpy as np
from scikits.learn import cross_val, datasets, svm

digits = datasets.load_digits()
X = digits.data
y = digits.target

svc = svm.SVC()
gammas = np.logspace(-6, -1, 10)

scores = list()
scores_std = list()
for gamma in gammas:
    svc.gamma = gamma
    this_scores = cross_val.cross_val_score(svc, X, y, n_jobs=-1)
    scores.append(np.mean(this_scores))
    scores_std.append(np.std(this_scores))

import pylab as pl
pl.figure(1, figsize=(2.5, 2))
pl.clf()
pl.axes([.1, .25, .8, .7])
pl.semilogx(gammas, scores)
pl.semilogx(gammas, np.array(scores) + np.array(scores_std), 'b--')
pl.semilogx(gammas, np.array(scores) - np.array(scores_std), 'b--')
pl.yticks(())
pl.ylabel('CV score')
pl.xlabel('gamma')
pl.ylim(0, 1.1)
#pl.axhline(np.max(scores), linestyle='--', color='.5')
pl.text(gammas[np.argmax(scores)], .9*np.max(scores), '%.3f' % np.max(scores),