Esempio n. 1
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        if hasattr(np, 'VisibleDeprecationWarning'):
            # Let's not catch the numpy internal DeprecationWarnings
            warnings.simplefilter('ignore', np.VisibleDeprecationWarning)
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
        assert_equal(len(warning_list), 1)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
def ward_adni_rs_fmri(func_files, n_clusters=200):

    masker = NiftiMasker(mask_strategy='epi', mask_args=dict(opening=1))
    masker.fit(func_files)
    func_masked = masker.transform(func_files)
    #func_masked = masker.transform_niimgs(func_files, n_jobs=4)
    func_masked = np.vstack(func_masked)

    ###########################################################################
    # Ward
    ###########################################################################

    mask = masker.mask_img_.get_data().astype(np.bool)
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0],
                                       n_y=shape[1],
                                       n_z=shape[2],
                                       mask=mask)

    # Computing the ward for the first time, this is long...
    ward = WardAgglomeration(n_clusters=n_clusters,
                             connectivity=connectivity,
                             memory='nilearn_cache')

    ward.fit(func_masked)
    ward_labels_unique = np.unique(ward.labels_)
    ward_labels = ward.labels_

    ward_filename = '_'.join(['ward', str(n_clusters)])
    img_ward = masker.inverse_transform(ward.labels_)
    img_ward.to_filename(os.path.join(CACHE_DIR, ward_filename))
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        if hasattr(np, 'VisibleDeprecationWarning'):
            # Let's not catch the numpy internal DeprecationWarnings
            warnings.simplefilter('ignore', np.VisibleDeprecationWarning)
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
        assert_equal(len(warning_list), 1)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Esempio n. 4
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)

    with ignore_warnings():
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Esempio n. 5
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rng.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)

    with ignore_warnings():
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Esempio n. 6
0
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100):
    # data preparation
    Z = nifti_masker.fit_transform(imgs)
    pca = RandomizedPCA(n_components=n_components)
    Z_ = pca.fit_transform(Z.T).T
    ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity,
                             memory='nilearn_cache').fit(Z_)
    W = ward.transform(Z)
    del Z
    # data cube is a more convenient representation
    cube = np.array([W[subject_label == subject]
                     for subject in np.arange(n_subjects)])
    # parcel connectivity
    parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward)
    return cube, ward, parcel_connectivity
Esempio n. 7
0
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100):
    # data preparation
    Z = nifti_masker.fit_transform(imgs)
    pca = RandomizedPCA(n_components=n_components)
    Z_ = pca.fit_transform(Z.T).T
    ward = WardAgglomeration(n_clusters=n_clusters,
                             connectivity=connectivity,
                             memory='nilearn_cache').fit(Z_)
    W = ward.transform(Z)
    del Z
    # data cube is a more convenient representation
    cube = np.array(
        [W[subject_label == subject] for subject in np.arange(n_subjects)])
    # parcel connectivity
    parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward)
    return cube, ward, parcel_connectivity
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
    ward.fit(X)
    assert_true(np.size(np.unique(ward.labels_)) == 5)

    Xred = ward.transform(X)
    assert_true(Xred.shape[1] == 5)
    Xfull = ward.inverse_transform(Xred)
    assert_true(np.unique(Xfull[0]).size == 5)
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
    ward.fit(X)
    assert_true(np.size(np.unique(ward.labels_)) == 5)

    Xred = ward.transform(X)
    assert_true(Xred.shape[1] == 5)
    Xfull = ward.inverse_transform(Xred)
    assert_true(np.unique(Xfull[0]).size == 5)
    assert_array_almost_equal(ward.transform(Xfull), Xred)
# Mask data
epi_masked = epi_img[mask]

### Ward ######################################################################

# Compute connectivity matrix
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=mask)

# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time
start = time.time()
ward = WardAgglomeration(n_clusters=500, connectivity=connectivity,
                         memory='nisl_cache')
ward.fit(epi_masked.T)
print "Ward agglomeration 500 clusters: %.2fs" % (time.time() - start)

# Compute the ward with more clusters, should be faster
start = time.time()
ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity,
                         memory='nisl_cache')
ward.fit(epi_masked.T)
print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start)

### Prepare output ############################################################

### Show result ###############################################################

from matplotlib import pyplot as pl
### Ward ######################################################################

# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0],
                                   n_y=shape[1],
                                   n_z=shape[2],
                                   mask=mask)

# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time
start = time.time()
ward = WardAgglomeration(n_clusters=n_clusters,
                         connectivity=connectivity,
                         memory='nilearn_cache',
                         compute_full_tree=True)
ward.fit(fmri_masked)
print "Ward agglomeration %d clusters: %.2fs" % (n_clusters,
                                                 time.time() - start)

labels = ward.labels_ + 1
labels = nifti_masker.inverse_transform(labels).get_data()
# 0 is the background, putting it to -1
labels = labels - 1

# Display the labels
plot_labels(labels, 8)
pl.savefig('ward.eps')
pl.savefig('ward.pdf')
Esempio n. 12
0
"""
Test various n_clusters
"""
for N_CLUSTERS in N_CLUSTERS_SET:

    ##############################################################################
    # Ward
    ##############################################################################
    
    mask = masker.mask_img_.get_data().astype(np.bool)
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                       n_z=shape[2], mask=mask)
    
    # Computing the ward for the first time, this is long...
    ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity,
                             memory='nilearn_cache')

    ward.fit(pet_data_masked)
    ward_labels_unique = np.unique(ward.labels_)
    ward_labels = ward.labels_
    
    ##############################################################################
    # Generate cluster matrix
    ##############################################################################
    
    x = np.zeros((len(data), N_CLUSTERS))
    for idx in np.arange(len(data)):
        for val in ward_labels_unique :
            ind = (ward_labels == val)
            x[idx, val] = np.mean(pet_data_masked[idx, ind])
    
masker = NiftiMasker(mask_strategy='epi',
                     mask_args=dict(opening=8))
masker.fit(pet_files)

pet_masked = masker.transform_niimgs(pet_files, n_jobs=2)
#pet_masked = np.vstack(pet_masked)


mask = masker.mask_img_.get_data().astype(np.bool)
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=mask)

# Computing the ward for the first time, this is long...
start = time.time()
ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity,
                         memory='nilearn_cache')
ward.fit(pet_masked[0])
print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start)


labels = ward.labels_ + 1
labels_img = masker.inverse_transform(labels)
first_plot = plot_roi(labels_img, pet_img[0], title="Ward parcellation",
                      display_mode='xz')
# labels_img is a Nifti1Image object, it can be saved to file with the
# following code:
labels_img.to_filename('parcellation.nii')


"""
##################################################################
# Perform parcellation on smoothed PCA-ed timecourses for each ROI
mem = Memory(cachedir=".", verbose=1)
n_clust = np.zeros(n_rois)  # Different #clusters for different ROI
template = np.zeros((dim[0], dim[1], dim[2]))
print ("Performing Ward Clustering")
for i in np.arange(n_rois):
    # Determine the number of clusters to divide each ROI into
    roi_mask = brain == rois[i]
    n_clust[i] = np.round(np.sum(roi_mask) * n_parcels / n_vox)
    if n_clust[i] <= 1:
        template[roi_mask] = np.shape(np.unique(template))[0]
    else:
        # Define connectivity based on brain mask
        A = grid_to_graph(n_x=dim[0], n_y=dim[1], n_z=dim[2], mask=roi_mask)
        # Create ward object
        ward = WardAgglomeration(n_clusters=n_clust[i], connectivity=A.tolil(), memory=mem)
        ward.fit(tc_group[roi_mask.ravel(), :].T)
        template[roi_mask] = ward.labels_ + np.shape(np.unique(template))[0]

# Remove parcels with zero timecourses in any of the subjects
template = template.ravel()
template_refined = template.copy()
label = np.unique(template)
for sub in subList:
    print str("Subject" + sub)
    # Load preprocessed voxel timecourses
    tc = io.loadmat(os.path.join(BASE_DIR, sub, "restfMRI/tc_vox.mat"))
    tc = tc["tc"]

    # Generate subject-specific tissue mask
    gm_file = os.path.join(BASE_DIR, sub, "anat", "gmMask.nii")
Esempio n. 15
0
if __name__ == '__main__':
    memory = Memory('/havoc/cache', mmap_mode='r+')

    le = LabelEncoder()
    lb = LabelBinarizer()
    loader = NiftiMasker(mask='/tmp/mask.nii.gz',
                         memory=memory, memory_level=1)
    reporter = Reporter(report_dir='/tmp/reporter')

    cv = ShuffleSplit(len(target), n_iter=5)
    Cs = [1e-3, 1e-2, 1e-1, 1., 10, 1e2, 1e3]

    scaler = StandardScaler()
    n_x, n_y, n_z = mask.shape
    connectivity = grid_to_graph(n_x, n_y, n_z, mask=mask_array)
    ward = WardAgglomeration(n_clusters=2000,
                             connectivity=connectivity, memory=memory)
    svc = LinearSVC(penalty='l1', dual=False)
    # rand_svc = RandomizedWardClassifier(mask_array, n_iter=16,
    #                                     memory=memory, n_jobs=-1)

    pipe = Pipeline([('scaler', scaler), ('clf', svc)])
    grid = GridSearchCV(pipe, param_grid={'clf__C': Cs},
                        cv=cv, n_jobs=1)
    grid.best_estimator_ = grid.estimator
    ovr = OneVsRestClassifier(grid, n_jobs=1)

    # decoder = Decoder(ovr, loader, lb, reporter)
    # decoder.fit(niimgs, target).score(niimgs, target)

    # pipeline = Pipeline([('scaler', scaler), ('clf', clf)])
    decoder = Decoder(ovr, loader, lb, reporter)
Esempio n. 16
0
# Perform parcellation on smoothed PCA-ed timecourses for each ROI
mem = Memory(cachedir='.', verbose=1)
n_clust = np.zeros(n_rois) # Different #clusters for different ROI
template = np.zeros((dim[0], dim[1], dim[2]))
print("Performing Ward Clustering")
for i in np.arange(n_rois):
    # Determine the number of clusters to divide each ROI into
    roi_mask = brain == rois[i]
    n_clust[i] = np.round(np.sum(roi_mask) * n_parcels / n_vox)
    if n_clust[i] <= 1:
        template[roi_mask] = np.shape(np.unique(template))[0]
    else:        
        # Define connectivity based on brain mask
        A = grid_to_graph(n_x=dim[0], n_y=dim[1], n_z=dim[2], mask=roi_mask)
        # Create ward object
        ward = WardAgglomeration(n_clusters=n_clust[i], connectivity=A.tolil(), memory=mem)
        ward.fit(tc_group[roi_mask.ravel(), :].T)
        template[roi_mask] = ward.labels_ + np.shape(np.unique(template))[0] 

# Run relabel_disconnected_parcel.py

# Saving the template
io.savemat(os.path.join(BASE_DIR, "group/ica_roi_parcel150.mat"), {"template":template})
nii = nib.Nifti1Image(template, brain_img.affine)
nib.save(nii, os.path.join(BASE_DIR, "group/ica_roi_parcel150.nii"))         

# Remove parcels with zero timecourses in any of the subjects
template = template.ravel()
template_refined = template.copy()
label = np.unique(template)
for sub in subList:
Esempio n. 17
0
# Spatial smoothing to encourage smooth parcels
dim = np.shape(brain)
tc = tc.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc.shape[-1]
for t in np.arange(n_tpts):
    tc[:, :, :, t] = gaussian_filter(tc[:, :, :, t], sigma=1)
tc = tc.reshape((-1, n_tpts))
tc = tc[brain.ravel() == 1, :]

# Functional parcellation with Ward clustering
print("Performing Ward Clustering")
mem = Memory(cachedir='.', verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=n_parcels, connectivity=A.tolil(), memory=mem)
ward.fit(tc.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain==1] = ward.labels_ + 1 # labels start from 0, which is used for background

# Remove single voxels not connected to parcel
#for i in np.unique(template)[1:]:
#    labels, n_labels = label(template == i, structure=np.ones((3,3,3)))
#    if n_labels > 1:
#	for j in np.arange(n_labels):
#	    if np.sum(labels == j + 1) < 10:
#		template[labels == j + 1] = 0

# Saving the template
nii = nib.Nifti1Image(template, brain_img.affine)
nib.save(nii, PARCEL_PATH)
elif folding == 'leaveoneout':
    n_folds[0] = y.shape[0]
    cv = LeaveOneOut(n=y.shape[0])
else:
    print("unknown crossvalidation method!")

#-- classifier
clf = svm.SVC(kernel='linear', probability=True, C=svm_C)

#-- normalizer
scaler = Scaler()

#-- Clustering
n_clusters = 100
cluster = WardAgglomeration(n_clusters=n_clusters,
                            connectivity=None,
                            compute_full_tree='auto')

#-- feature selection
fs_n = round(n_features * fs_n) / n_features
if fs_n == 100:
    fs = SelectKBest(f_classif, k=n_features)
else:
    fs = SelectPercentile(f_classif, percentile=fs_n)

#-- results initialization
if compute_predict:
    predict = np.zeros([n_splits, n_samples, n_dims, n_dims_tg])**np.nan
    predictg = np.zeros([n_splits, n_samplesg, n_dimsg, n_dimsg_tg,
                         n_folds])**np.nan
else:
Esempio n. 19
0
    pl.axis('off')


# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0],
                                   n_y=shape[1],
                                   n_z=shape[2],
                                   mask=mask)

for n_clusters in 100, 1000:
    # Compute Ward clustering
    from sklearn.cluster import WardAgglomeration
    ward = WardAgglomeration(n_clusters=n_clusters,
                             connectivity=connectivity,
                             memory='nilearn_cache',
                             compute_full_tree=True)
    ward.fit(X)

    labels = ward.labels_ + 1
    labels = masking.unmask(labels, adhd_mask)
    # 0 is the background, putting it to -1
    labels = labels - 1

    # Display the labels
    plot_labels(labels, 8)
    pl.savefig(join('clustering', 'ward_%i.eps' % n_clusters))
    pl.savefig(join('clustering', 'ward_%i.pdf' % n_clusters))

    # Compute Kmeans clustering
    from sklearn.cluster import MiniBatchKMeans
def BMA_consensus_cluster_parallel(cfg, remote_path, remote_BOLD_fn, remote_mask_fn, Y, nifti_masker, \
                        num_vox, K_clus, K_clusters, \
                        parc, alpha, prop, nbItRFIR, onsets, durations,\
                        output_sub_parc, rescale=True, averg_bold=False):
    '''
    Performs all steps for one clustering case (Kclus given, number l of the parcellation given)
    remote_path: path on the cluster, where results will be stored
    '''
    import os
    import sys
    sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_BB/Parcellations/")
    sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/")
    sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/Scripts_divers_utiles/Scripts_utiles/")
    sys.path.append('/home/pc174679/local/installations/consensus-cluster-0.6')
    
    from Random_parcellations import random_parcellations, subsample_data_on_time
    from Divers_parcellations_test import *
    
    from RFIR_evaluation_parcellations import JDE_estim, RFIR_estim, clustering_from_RFIR
    
    from Random_parcellations import hrf_roi_to_vox
    from pyhrf.tools._io import remote_copy, remote_mkdir
    from nisl import io
    
    #nifti_masker.mask=remote_mask_fn
    
    # Creation of the necessary paths --> do not do here
    parc_name = 'Subsampled_data_with_' + str(K_clus) + 'clusters' 
    parc_name_clus = parc_name + 'rnd_number_' + str(parc+1)
    
    remote_sub = os.sep.join((remote_path, parc_name))   
    #if not os.path.exists(remote_sub):
        #os.path.exists(remote_sub)
        #print 'remote_sub:', remote_sub
        #os.makedirs(remote_sub)
    remote_sub_parc = os.sep.join((remote_sub,parc_name_clus))   
    #if not os.path.exists(remote_sub_parc):
        #os.makedirs(remote_sub_parc)
    
    output_RFIR_parc = os.sep.join((output_sub_parc,'RFIR_estim'))
    
    ###################################
    ## 1st STEP: SUBSAMPLING
    print '--- Subsample data ---'
    Ysub = subsample_data_on_time(Y, remote_mask_fn, K_clus, alpha, prop, \
                    nifti_masker, rescale=rescale)
    print 'Ysub:', Ysub
    print 'remote_sub_prc:', remote_sub_parc
    Ysub_name = 'Y_sub_'+ str(K_clus) + 'clusters_' + 'rnd_number_' + str(parc+1) +'.nii'
    Ysub_fn = os.sep.join((remote_sub_parc, Ysub_name))
    Ysub_masked = nifti_masker.inverse_transform(Ysub).get_data()
    write_volume(Ysub_masked, Ysub_fn)                        
    
    
    
    ###################################
    ## 2D STEP: RFIR
    print '--- Performs RFIR estimation ---'

    
    remote_RFIR_parc_clus = os.sep.join((remote_sub_parc, 'RFIR_estim'))
    #if not os.path.exists(remote_RFIR_parc):os.makedirs(remote_RFIR_parc)
    #remote_RFIR_parc_clus = os.sep.join((remote_RFIR_parc, parc_name_clus))
    #if not os.path.exists(remote_RFIR_parc_clus):os.makedirs(remote_RFIR_parc_clus)
    
    print '  * output path for RFIR ', remote_RFIR_parc_clus
    print '  * RFIR for subsampling nb ', str(parc+1), ' with ', K_clus, ' clusters' 
    RFIR_estim(nbItRFIR, onsets, durations, Ysub_fn, remote_mask_fn, \
                remote_RFIR_parc, avg_bold=averg_bold) 
                  
    hrf_fn = os.sep.join((remote_RFIR_parc_clus, 'rfir_ehrf.nii'))
    #remote_copy([hrf_fn], remote_host, 
                #remote_user, remote_path)[0]
    
    ###################################
    ## 3D STEP: CLUSTERING FROM RFIR RESULTS
    name_hrf = 'rfir_ehrf.nii'
    
    from pyhrf.tools._io import write_volume, read_volume
    from pyhrf.tools._io import read_volume, write_volume
    import nisl.io as ionisl
    from sklearn.feature_extraction import image
    from sklearn.cluster import WardAgglomeration
    from scipy.spatial.distance import cdist, pdist
    
    hrf_fn = os.sep.join((remote_RFIR_parc_clus,name_hrf))
    hrf=read_volume(hrf_fn)[0]
    hrf_t_fn = add_suffix(hrf_fn, 'transpose')
    #taking only 1st condition to parcellate
    write_volume(hrf[:,:,:,:,0], hrf_t_fn)
    
    nifti_masker = ionisl.NiftiMasker(remote_mask_fn)
    Nm = nifti_masker.fit(hrf_t_fn)
    
    #features: coeff of the HRF
    HRF = Nm.fit_transform(hrf_t_fn)
    
    mask, meta_data = read_volume(remote_mask_fn)
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
            n_z=shape[2], mask=mask)
            
    #features used for clustering
    features = HRF.transpose()

    ward = WardAgglomeration(n_clusters=K_clus, connectivity=connectivity,
                                memory='nisl_cache')
    ward.fit(HRF)
    labels_tot = ward.labels_+1 
        
        
    #Kelbow, Perc_WSS, all_parc_from_RFIR_fns, all_parc_RFIR = \
    #clustering_from_RFIR(K_clusters, remote_RFIR_parc_clus, remote_mask_fn, name_hrf, plots=False)
    #labels_tot = all_parc_RFIR[str(Kelbow)]
    
    #to retrieve clustering with as many clusters as determined in K_clusters
    #labels_tot = all_parc_RFIR[str(K_clus)]
    #Parcellation retrieved: for K=Kelbow
    #clusters_RFIR_fn = all_parc_from_RFIR[str(Kelbow)]
    #clustering_rfir_fn = os.path.join(remote_RFIR_parc_clus, 'output_clustering_elbow.nii')
    #write_volume(read_volume(clusters_RFIR_fn)[0], clustering_rfir_fn, meta_bold)

    #labels_tot = nifti_masker.fit_transform([clusters_RFIR_fn])[0]
    #labels_tot = read_volume(clusters_RFIR_fn)[0]
    
    #labels_name='labels_' + str(int(K_clus)) + '_' + str(parc+1) + '.pck'
    #name_f = os.sep.join((remote_sub_parc, labels_name))
    #pickle_labels=open(name_f, 'w')
    #cPickle.dump(labels_tot,f)
    #pickle_labels.close()
    
    #remote_copy(pickle_labels, remote_user, 
            #remote_host, output_sub_parc)
    
    #################################
    ## Prepare consensus clustering
    print 'Prepare consensus clustering'
    clustcount, totalcount = upd_similarity_matrix(labels_tot)
    print 'results:', clustcount
    
    return clustcount.astype(np.bool)
Esempio n. 21
0
#io.savemat(os.path.join(BASE_DIR, "group/tc_rest_pca_vox.mat"), {"tc_group": tc_group})

# Perform parcellation on PCA-ed timecourses
brain_img = as_volume_img("/volatile/bernardng/templates/spm8/rgrey.nii")
brain = brain_img.get_data()
dim = np.shape(brain)
brain = brain > 0.2  # Generate brain mask
brain = mask_utils.largest_cc(brain)
mem = Memory(cachedir='.', verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0],
                  n_y=brain.shape[1],
                  n_z=brain.shape[2],
                  mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=500, connectivity=A, memory=mem)
tc_group = tc_group.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc_group.shape[-1]
for t in np.arange(n_tpts):
    tc_group[:, :, :, t] = gaussian_filter(tc_group[:, :, :, t], sigma=5)
tc_group = tc_group.reshape((-1, n_tpts))
tc_group = tc_group[brain.ravel() == 1, :]
print("Performing Ward Clustering")
ward.fit(tc_group.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain ==
         1] = ward.labels_ + 1  # Previously processed data did not include +1

# Remove parcels with zero timecourses in any of the subjects
template = template.ravel()
template_refined = template.copy()
    pl.figure(figsize=(3.8, 4.5))
    pl.axes([0, 0, 1, 1])
    pl.imshow(colors[np.rot90(cut)], interpolation='nearest')
    pl.axis('off')


# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=mask)

for n_clusters in 100, 1000:
    # Compute Ward clustering
    from sklearn.cluster import WardAgglomeration
    ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity,
                            memory='nilearn_cache', compute_full_tree=True)
    ward.fit(X)

    labels = ward.labels_ + 1
    labels = masking.unmask(labels, adhd_mask)
    # 0 is the background, putting it to -1
    labels = labels - 1

    # Display the labels
    plot_labels(labels, 8)
    pl.savefig(join('clustering', 'ward_%i.eps' % n_clusters))
    pl.savefig(join('clustering', 'ward_%i.pdf' % n_clusters))

    # Compute Kmeans clustering
    from sklearn.cluster import MiniBatchKMeans
# Spatial smoothing to encourage smooth parcels
dim = np.shape(brain)
tc = tc.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc.shape[-1]
for t in np.arange(n_tpts):
    tc[:, :, :, t] = gaussian_filter(tc[:, :, :, t], sigma=1)
tc = tc.reshape((-1, n_tpts))
tc = tc[brain.ravel() == 1, :]

# Functional parcellation with Ward clustering
print("Performing Ward Clustering")
mem = Memory(cachedir=".", verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=n_parcels, connectivity=A.tolil(), memory=mem)
ward.fit(tc.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain == 1] = ward.labels_ + 1  # labels start from 0, which is used for background

# Remove single voxels not connected to parcel
# for i in np.unique(template)[1:]:
#    labels, n_labels = label(template == i, structure=np.ones((3,3,3)))
#    if n_labels > 1:
# 	for j in np.arange(n_labels):
# 	    if np.sum(labels == j + 1) < 10:
# 		template[labels == j + 1] = 0

# Saving the template
nii = nib.Nifti1Image(template, brain_img.affine)
nib.save(nii, PARCEL_PATH)
Esempio n. 24
0
        if types[ai] == bool:
            hemi = (hemi <= 0.).astype(float) - 0.5
        hemi = factors[ai] * hemi
        X = np.append(X.T, [hemi], axis=0).T
    X = np.append(X[:, 3:].T, fmri_masked, axis=0).T
    print(X.shape)
    # Compute a connectivity matrix (for constraining the clustering)
    connectivity = sk_image.grid_to_graph(n_x=mask.shape[0],
                                          n_y=mask.shape[1],
                                          n_z=mask.shape[2],
                                          mask=mask)

    # Cluster (#2)
    start = time.time()
    ward = WardAgglomeration(n_clusters=n_clusters,
                             connectivity=connectivity,
                             memory=memory)
    ward.fit(X.T)

    print("Ward agglomeration %d clusters: %.2fs" %
          (n_clusters, time.time() - start))

    # Compute an image with one ROI per label, and save to disk
    labels = ward.labels_ + 1  # Avoid 0 label - 0 means mask.
    labels_img = nifti_masker.inverse_transform(labels)
    labels_img.to_filename('parcellation.nii')

    # Plot image with len(labels) ROIs, and store
    #   the cut coordinates to reuse for all plots
    #   and the figure for plotting all to a common axis
    if 1 in plots:
Esempio n. 25
0
def feature_extractor(imgfile, maskfile, featurefile, maskerfile, wardfile, nclusters=[1000,], selectfile=None, targetfile=None, metafile=None, cachefile=None):
    
    resultdict = {"imgfile":imgfile, "maskfile":maskfile}
    # load data
    print "--loading data"
    nifti_masker = input_data.NiftiMasker(mask=maskfile, memory=cachefile, memory_level=1,
                              standardize=False)
    fmri_masked = nifti_masker.fit_transform(imgfile)
    print "--getting mask"
    mask = nifti_masker.mask_img_.get_data().astype(np.bool)
    
    # saveit
    joblib.dump(nifti_masker, maskerfile)
    resultdict["mask"]  = mask
    resultdict["Xmask"] = fmri_masked
    resultdict["maskerfile"] = maskerfile
    
    # get connectivity
    print "--getting connectivity"
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=mask)
    # saveit
    resultdict["connectivity"]    = connectivity
    print "--save main file"
    np.savez(featurefile+"_main.npz", **resultdict)
    
    # run  ward
    y     = np.load(targetfile)["ymap"]
    meta  = np.load(metafile)
    train = meta["train"]
    test  = meta["test"]
    ncv   = meta['ycv']
    
    # for each cv set
    for cvx in range(ncv):
        trainidx = train[cvx]
        testidx  = test[cvx]
        resultdict = {}        
        wardfiles = []
        selectfiles = []
        print "--Running ward %d"%(cvx, )
        for ix, nc in enumerate(nclusters):
            ward = WardAgglomeration(n_clusters=nc, connectivity=connectivity, memory=cachefile)
            ward.fit(fmri_masked[trainidx])
            fmri_reduced_train = ward.transform(fmri_masked[trainidx])
            fmri_reduced_test  = ward.transform(fmri_masked[testidx])
            
            # saveit
            subwardfile = wardfile+"_D%d_cv%d.pkl"%(nc, cvx,)
            joblib.dump(ward, subwardfile)
            resultdict["Xward_%d_train"%(nc,)] = fmri_reduced_train
            resultdict["Xward_%d_test"%(nc,)]  = fmri_reduced_test
            wardfiles.append(subwardfile)
            
            # additional feature selection
            selector = SelectPercentile(f_classif, percentile=30)
            selector.fit(fmri_reduced_train, y[trainidx])
            fmri_select_train = selector.transform(fmri_reduced_train)
            fmri_select_test  = selector.transform(fmri_reduced_test)
            
            # saveit
            subselectfile = selectfile+"_D%d_cv%d.pkl"%(nc, cvx,)
            joblib.dump(selector, subselectfile)
            resultdict["Xselect_%d_train"%(nc,)] = fmri_select_train
            resultdict["Xselect_%d_test"%(nc,)]  = fmri_select_test
            selectfiles.append(subselectfile)
            
        resultdict["wardfiles"]   = wardfiles
        resultdict["selectfiles"] = selectfiles
        
        # save results
        print "--save cv result"
        np.savez(featurefile+"_cv%d.npz"%(cvx, ), **resultdict)
### Mask ######################################################################

fmri_data = dataset.func[0]

# Compute a brain mask
from nisl import masking

mask = masking.compute_mask(fmri_data)

# Mask data: go from a 4D dataset to a 2D dataset with only the voxels
# in the mask
fmri_masked = fmri_data[mask]

### Ward ######################################################################

# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image

shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)

# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time

start = time.time()
ward = WardAgglomeration(n_clusters=5000, connectivity=connectivity)  # memory='nisl_cache')
ward.fit(fmri_masked.T)
print "Ward agglomeration 500 clusters: %.2fs" % (time.time() - start)
Esempio n. 27
0
def boo(subject_idx=0, cut_coords=None, n_components=20, n_clusters=2000, memory='nilearn_cache'):

    mem = Memory(cachedir='nilearn_cache')

    # ## Load the data ###################################################

    print("Fetch the data files from Internet")
    haxby_dataset = datasets.fetch_haxby(n_subjects=subject_idx + 1)

    print("Second, load the labels")
    haxby_labels = np.genfromtxt(haxby_dataset.session_target[0],
                                 skip_header=1, usecols=[0],
                                 dtype=basestring)

    # ## Find voxels of interest ##############################################

    print("Load the data.")
    anat_filename = haxby_dataset.anat[subject_idx]
    anat_img = nibabel.load(anat_filename)
    fmri_filename = haxby_dataset.func[subject_idx]
    fmri_raw_img = nibabel.load(fmri_filename)

    print("Build a mask based on the activations.")
    epi_masker = NiftiMasker(mask_strategy='epi', detrend=True, standardize=True)
    epi_masker = mem.cache(epi_masker.fit)(fmri_raw_img)
    plot_roi(epi_masker.mask_img_,
             bg_img=anat_img,
             title='EPI mask',
             cut_coords=cut_coords)

    print("Normalize the (transformed) data")  # zscore per pixel, over examples.
    fmri_masked_vectors = epi_masker.transform(fmri_raw_img)
    fmri_normed_vectors = mem.cache(stats.mstats.zscore)(fmri_masked_vectors, axis=0)
    fmri_normed_img = epi_masker.inverse_transform(fmri_normed_vectors)

    print("Smooth the (spatial) data.")
    fmri_smooth_img = mem.cache(image.smooth_img)(fmri_normed_img, fwhm=7)

    print("Mask the MRI data.")
    masked_fmri_vectors = mem.cache(epi_masker.transform)(fmri_smooth_img)

    # ## Compute mean values based on condition matrix ##########################################
    condition_names = list(np.unique(haxby_labels))
    n_conditions = len(condition_names)
    n_good_voxels = masked_fmri_vectors.shape[1]

    mean_vectors = np.empty((n_conditions, n_good_voxels))
    for ci, condition in enumerate(condition_names):
        condition_vectors = masked_fmri_vectors[haxby_labels == condition, :]
        mean_vectors[ci, :] = condition_vectors.mean(axis=0)

    # ## Use similarity across conditions as the 4th dimension ##########################################
    n_conds = len(condition_names)
    n_compares = n_conds * (n_conds - 1) / 2

    p_vectors = np.zeros((n_compares, masked_fmri_vectors.shape[1]))
    comparison_text = []
    comparison_img = []
    idx = 0
    for i, cond in enumerate(condition_names):
        for j, cond2 in enumerate(condition_names[i+1:]):
            print("Computing ttest for %s vs. %s." % (cond, cond2))
            _, p_vector = stats.ttest_ind(
                masked_fmri_vectors[haxby_labels == cond, :],
                masked_fmri_vectors[haxby_labels == cond2, :],
                axis=0)

            p_vector /= p_vector.max()  # normalize
            p_vector = -np.log10(p_vector)
            p_vector[np.isnan(p_vector)] = 0.
            p_vector[p_vector > 10.] = 10.

            p_img = epi_masker.inverse_transform(p_vector)
            comparison_img.append(p_img)
            comparison_text.append('%s vs. %s' % (cond, cond2))
            p_vectors[idx, :] = p_vector
            idx += 1

    #n_comparisons = n_conditions * (n_conditions-1) / 2
    #similarity_vectors = np.empty((n_good_voxels, n_comparisons))
    #for vi in np.arange(n_good_voxels):
    #    similarity_vectors[vi, :] = pdist(mean_vectors[:, vi])



    # Compute a connectivity matrix (for constraining the clustering)
    mask_data = epi_masker.mask_img_.get_data().astype(np.bool)
    connectivity = sk_image.grid_to_graph(n_x=mask_data.shape[0], n_y=mask_data.shape[1],
                                          n_z=mask_data.shape[2], mask=mask_data)

    # Cluster (#2)

    start = time.time()
    ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory=memory)
    ward.fit(p_vectors)

    print("Ward agglomeration %d clusters: %.2fs" % (
        n_clusters, time.time() - start))

    # Compute an image with one ROI per label, and save to disk
    labels = ward.labels_ + 1    # Avoid 0 label - 0 means mask.
    labels_img = epi_masker.inverse_transform(labels)
    labels_img.to_filename('parcellation.nii')

    # Plot image with len(labels) ROIs, and store
    #   the cut coordinates to reuse for all plots
    #   and the figure for plotting all to a common axis
    first_plot = plot_roi(labels_img, title="Ward parcellation", bg_img=anat_img)
    plt.show()
Esempio n. 28
0
def feature_extractor(imgfile,
                      maskfile,
                      featurefile,
                      maskerfile,
                      wardfile,
                      nclusters=[
                          1000,
                      ],
                      selectfile=None,
                      targetfile=None,
                      metafile=None,
                      cachefile=None):

    resultdict = {"imgfile": imgfile, "maskfile": maskfile}
    # load data
    print "--loading data"
    nifti_masker = input_data.NiftiMasker(mask=maskfile,
                                          memory=cachefile,
                                          memory_level=1,
                                          standardize=False)
    fmri_masked = nifti_masker.fit_transform(imgfile)
    print "--getting mask"
    mask = nifti_masker.mask_img_.get_data().astype(np.bool)

    # saveit
    joblib.dump(nifti_masker, maskerfile)
    resultdict["mask"] = mask
    resultdict["Xmask"] = fmri_masked
    resultdict["maskerfile"] = maskerfile

    # get connectivity
    print "--getting connectivity"
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0],
                                       n_y=shape[1],
                                       n_z=shape[2],
                                       mask=mask)
    # saveit
    resultdict["connectivity"] = connectivity
    print "--save main file"
    np.savez(featurefile + "_main.npz", **resultdict)

    # run  ward
    y = np.load(targetfile)["ymap"]
    meta = np.load(metafile)
    train = meta["train"]
    test = meta["test"]
    ncv = meta['ycv']

    # for each cv set
    for cvx in range(ncv):
        trainidx = train[cvx]
        testidx = test[cvx]
        resultdict = {}
        wardfiles = []
        selectfiles = []
        print "--Running ward %d" % (cvx, )
        for ix, nc in enumerate(nclusters):
            ward = WardAgglomeration(n_clusters=nc,
                                     connectivity=connectivity,
                                     memory=cachefile)
            ward.fit(fmri_masked[trainidx])
            fmri_reduced_train = ward.transform(fmri_masked[trainidx])
            fmri_reduced_test = ward.transform(fmri_masked[testidx])

            # saveit
            subwardfile = wardfile + "_D%d_cv%d.pkl" % (
                nc,
                cvx,
            )
            joblib.dump(ward, subwardfile)
            resultdict["Xward_%d_train" % (nc, )] = fmri_reduced_train
            resultdict["Xward_%d_test" % (nc, )] = fmri_reduced_test
            wardfiles.append(subwardfile)

            # additional feature selection
            selector = SelectPercentile(f_classif, percentile=30)
            selector.fit(fmri_reduced_train, y[trainidx])
            fmri_select_train = selector.transform(fmri_reduced_train)
            fmri_select_test = selector.transform(fmri_reduced_test)

            # saveit
            subselectfile = selectfile + "_D%d_cv%d.pkl" % (
                nc,
                cvx,
            )
            joblib.dump(selector, subselectfile)
            resultdict["Xselect_%d_train" % (nc, )] = fmri_select_train
            resultdict["Xselect_%d_test" % (nc, )] = fmri_select_test
            selectfiles.append(subselectfile)

        resultdict["wardfiles"] = wardfiles
        resultdict["selectfiles"] = selectfiles

        # save results
        print "--save cv result"
        np.savez(featurefile + "_cv%d.npz" % (cvx, ), **resultdict)
Esempio n. 29
0
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
    """
    Given the predictors and labels, performs multi-label 
    classification with the given classifier using n-fold
    c.v. Constructs a OvR classifier for multilabel prediction.
    
    Parameters
    -----------
    x : `numpy.ndarray`
        (n_samples x n_features) array of features
    y : `numpy.ndarray`
        (n_samples x n_labels) array of labels
    classifier : str, optional
        which classifier model to use. Must be one of 'naive_bayes'| 'decision_tree' | 'logistic_regression'.
        Defaults to the original naive_bayes.
    clustering : bool, optional
        whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
        value. Defaults to True.
    n_folds : int
        the number of fold of cv
        
    Returns
    -------
    score_per_label, score_per_class : tuple
        The results are stored as a tuple of two dicts, with the keywords specifying the metrics.
    """
    clf = None
    ward = None
    
    lb = preprocessing.LabelBinarizer()
    y_new = lb.fit_transform(y)
    #specify connectivity for clustering
    mask = nb.load('data/MNI152_T1_2mm_brain.nii.gz').get_data().astype('bool')
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
    ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
    
    # choose and assign appropriate classifier
    classifier_dict = { 'naive_bayes' : OneVsRestClassifier(MultinomialNB()),
                        'logistic_regression' : OneVsRestClassifier(LogisticRegression(penalty='l2')),
	                'decision_tree' : tree.DecisionTreeClassifier()                     
                       }
    
    clf = classifier_dict[classifier]
    kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
    score_per_class = []
    score_per_label = []
    for train, test in kf:
        x_train = np.ascontiguousarray(x[train])
        y_train = np.ascontiguousarray(y_new[train])
        x_test = np.ascontiguousarray(x[test])
        y_test = np.ascontiguousarray(y_new[test])
        if clustering: 
            ward.fit(x_train)
            x_train = ward.transform(x_train)
            x_test = ward.transform(x_test)
        model = clf.fit(x_train, y_train)
        predicted  = model.predict(x_test)
        predict_prob = model.predict_proba(x_test)
        if isinstance(predict_prob, list):
            predict_prob = np.array(predict_prob)
        cls_scores = utils.score_results(y_test, predicted, predict_prob)
        label_scores = utils.label_scores(y_test, predicted, predict_prob)
        score_per_class.append(cls_scores)
        score_per_label.append(label_scores)
    return (score_per_class,score_per_label)
Esempio n. 30
0
### Ward ######################################################################

# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0],
                                   n_y=shape[1],
                                   n_z=shape[2],
                                   mask=mask)

# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time
start = time.time()
ward = WardAgglomeration(n_clusters=1000,
                         connectivity=connectivity,
                         memory='nilearn_cache')
ward.fit(fmri_masked)
print("Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start))

# Compute the ward with more clusters, should be faster as we are using
# the caching mechanism
start = time.time()
ward = WardAgglomeration(n_clusters=2000,
                         connectivity=connectivity,
                         memory='nilearn_cache')
ward.fit(fmri_masked)
print("Ward agglomeration 2000 clusters: %.2fs" % (time.time() - start))

### Show result ###############################################################
Esempio n. 31
0
fmri_masked = nifti_masker.fit_transform(dataset.func[0])
mask = nifti_masker.mask_img_.get_data().astype(np.bool)

### Ward ######################################################################

# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=mask)

# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time
start = time.time()
ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity,
                         memory='nisl_cache')
ward.fit(fmri_masked)
print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start)

# Compute the ward with more clusters, should be faster as we are using
# the caching mechanism
start = time.time()
ward = WardAgglomeration(n_clusters=2000, connectivity=connectivity,
                         memory='nisl_cache')
ward.fit(fmri_masked)
print "Ward agglomeration 2000 clusters: %.2fs" % (time.time() - start)

### Show result ###############################################################

# Unmask data
# Avoid 0 label
Esempio n. 32
0
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
    """
    Given the predictors and labels, performs single-class
    classification with the given classifier using n-fold
    c.v. Constructs a OvO classifier for every pair of terms.
    
    Parameters
    -----------
    x : `numpy.ndarray`
        (n_samples x n_features) array of features
    y : `numpy.ndarray`
        (1 x n_samples) array of labels
    classifier : str, optional
        which classifier model to use. Must be one of 'naive_bayes'| 'svm' | 'logistic_regression' | 'ensemble'.
        Defaults to the original naive_bayes.
    clustering : bool, optional
        whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
        value. Defaults to True.
    n_folds : int
        the number of fold of cv
        
    Returns
    -------
    accuracy : `numpy.ndarray`
        The results are stored as a list of confusion matrices for each fold and saved
        as a numpy array of arrays, for further analysis.
    """
    clf = None
    ward = None
    le = preprocessing.LabelEncoder()
    le.fit(y)
    y_new = le.transform(y)
    
    # choose and assign appropriate classifier
    classifier_dict = { 'naive_bayes' : MultinomialNB(),
                        'logistic_regression' : LogisticRegression(penalty='l2'),
                        'svm' : GridSearchCV(LinearSVC(), [{'C': [1, 10, 100, 1000]}])  
                       }
    if classifier == 'ensemble':
      clf_nb = classifier_dict['naive_bayes']
      clf_svm = classifier_dict['svm']
      clf_lr = classifier_dict['logistic_regression']
    else:
        clf = classifier_dict[classifier]
        
    # perform ward clustering if specified    
    if clustering:
        mask = np.load('data/2mm_brain_mask.npy')
        shape = mask.shape
        connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
        ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
    
    # actual cross validation    
    kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
    accuracy = []
    for train, test in kf:
        x_train = x[train]
        y_train  = y_new[train]
        x_test = x[test]
        y_test = y_new[test] 
        if clustering:
            ward.fit(x_train)
            x_train = ward.transform(x_train)
            x_test = ward.transform(x_test)
        if classifier != 'ensemble':        
            predicted = clf.fit(x_train, y_train).predict(x_test)
        else:
            predicted_nb = clf_nb.fit(x_train, y_train).predict(x_test)
            predicted_lr = clf_lr.fit(x_train, y_train).predict(x_test)
            predicted_svm = clf_svm.fit(x_train, y_train).predict(x_test)
            predicted = predicted_nb + predicted_lr + predicted_svm
            predicted = np.array(predicted >= 2, dtype=int)
        conf_mat =  confusion_matrix(y_test, predicted, labels=[0,1])
        accuracy.append(conf_mat)
    return accuracy
Esempio n. 33
0
    
#   loading the mask, and binarise
            
    mask = seed.astype(np.bool)
#
    shape = mask.shape
#
    print 'compute adjacency matrix...'
    # compute the adjacency matrix over the target mask
    from sklearn.neighbors import kneighbors_graph
    connectivity = kneighbors_graph(connect_use2,7)
    print 'connectivity for ward:' ,connectivity

    print 'ward clustering...'
#   perform a hierarchical clustering considering spatial neighborhood
    ward = WardAgglomeration(n_clusters = nb_cluster, connectivity=connectivity)    
    ward.fit(np.transpose(connect))
    labelsf = ward.labels_
    # the labels are the final labels of each voxels

# OBSOLETE : DON'T USE
elif option_cluster == 2:
# perform the k-means clustering : the labels for each voxel seed are in table : labelsf
    print 'kmeans...'
#
    k_means = KMeans(init = 'k-means++', n_clusters = nb_cluster, n_init = 10)
#
    k_means.fit(connect)
#
    labelsf = k_means.labels_
# USE IT INSTEAD
Esempio n. 34
0
    else:
        tc_group = np.hstack((tc_group, preprocessing.standardize(pca.transform(tc.T))))
    print("Concatenating subject" + sub + "'s timecourses")
#io.savemat(os.path.join(BASE_DIR, "group/tc_rest_pca_vox.mat"), {"tc_group": tc_group})

# Perform parcellation on PCA-ed timecourses
brain_img = as_volume_img("/volatile/bernardng/templates/spm8/rgrey.nii")
brain = brain_img.get_data()
dim = np.shape(brain)
brain = brain > 0.2 # Generate brain mask
brain = mask_utils.largest_cc(brain)
mem = Memory(cachedir='.', verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=500, connectivity=A, memory=mem)
tc_group = tc_group.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc_group.shape[-1]
for t in np.arange(n_tpts):
    tc_group[:,:,:,t] = gaussian_filter(tc_group[:,:,:,t], sigma=5)
tc_group = tc_group.reshape((-1, n_tpts))
tc_group = tc_group[brain.ravel()==1, :]
print("Performing Ward Clustering")
ward.fit(tc_group.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain==1] = ward.labels_ + 1 # Previously processed data did not include +1

# Remove parcels with zero timecourses in any of the subjects
template = template.ravel()
template_refined = template.copy()
label = np.unique(template)
y = np.dot(X, coef.ravel())
noise = np.random.randn(y.shape[0])
noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2)
y += noise_coef * noise  # add noise

###############################################################################
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(len(y), 2)  # cross-validation generator for model selection
ridge = BayesianRidge()
cachedir = tempfile.mkdtemp()
mem = Memory(cachedir=cachedir, verbose=1)

# Ward agglomeration followed by BayesianRidge
A = grid_to_graph(n_x=size, n_y=size)
ward = WardAgglomeration(n_clusters=10, connectivity=A, memory=mem,
                         n_components=1)
clf = Pipeline([('ward', ward), ('ridge', ridge)])
# Select the optimal number of parcels with grid search
clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv)
clf.fit(X, y)  # set the best parameters
coef_ = clf.best_estimator_.steps[-1][1].coef_
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)
coef_agglomeration_ = coef_.reshape(size, size)

# Anova univariate feature selection followed by BayesianRidge
f_regression = mem.cache(feature_selection.f_regression)  # caching function
anova = feature_selection.SelectPercentile(f_regression)
clf = Pipeline([('anova', anova), ('ridge', ridge)])
# Select the optimal percentage of features with grid search
clf = GridSearchCV(clf, {'anova__percentile': [5, 10, 20]}, cv=cv)
clf.fit(X, y)  # set the best parameters
Esempio n. 36
0
def _ward_fit_transform(all_subjects_data, fit_samples_indices, connectivity,
                        n_parcels, offset_labels):
    """Ward clustering algorithm on a subsample and apply to the whole dataset.

    Computes a brain parcellation using Ward's clustering algorithm on some
    images, then averages the signal within parcels in order to reduce the
    dimension of the images of the whole dataset.
    This function is used with Randomized Parcellation Based Inference, so we
    need to save the labels to further perform the inverse transformation
    operation. The function therefore needs an offset to be applied on the
    labels so that they are unique across parcellations.

    Parameters
    ----------
    all_subjects_data : array_like, shape=(n_samples, n_voxels)
      Masked subject images as an array.

    fit_samples_indices : array-like,
      Indices of the samples used to compute the parcellation.

    connectivity : scipy.sparse.coo_matrix,
      Graph representing the spatial structure of the images (i.e. connections
      between voxels).

    n_parcels : int,
      Number of parcels for the parcellations.

    offset_labels : int,
      Offset for labels numbering.
      The purpose is to have different labels in all the parcellations that
      can be built by multiple calls to the current function.

    Returns
    -------
    parcelled_data : numpy.ndarray, shape=(n_samples, n_parcels)
      Average signal within each parcel for each subject.

    labels : np.ndarray, shape=(n_voxels,)
      Labels giving the correspondance between voxels and parcels.

    """
    # XXX: Delayed import is a mega hack which is unfortunately
    # required. In scipy versions < 0.11, this import ends up
    # importing matplotlib.pyplot. This sets the matplotlib backend
    # which causes our matplotlib backend setting code in
    # nilearn/plotting/__init__.py to have no effect. In environment
    # without X, e.g. travis-ci, that means the tests will fail with
    # the usual "TclError: no display name and no $DISPLAY environment
    # variable". Note this is dependent on the order of import,
    # whichever comes first has the only shot at setting the
    # matplotlib backend.
    from sklearn.cluster import WardAgglomeration

    # fit part
    data_fit = all_subjects_data[fit_samples_indices]
    ward = WardAgglomeration(n_clusters=n_parcels, connectivity=connectivity)
    ward.fit(data_fit)
    # transform part
    labels = ward.labels_ + offset_labels  # unique labels across parcellations
    parcelled_data = ward.transform(all_subjects_data)
    return parcelled_data, labels
Esempio n. 37
0
rois = ["V1"]
masks = [cortex.get_roi_mask("MLfs",
                             "20121210ML_auto1",
                             roi=roi)[roi] > 0 for roi in rois]
roimask = reduce(lambda x, y: (x + y), masks)
wardmask = cort_mask - roimask

# Load training, test fMRI data
trndata_roi = np.nan_to_num(data.get_train(masked=roimask)[:numtime])
trndata_ward = np.nan_to_num(data.get_train(masked=wardmask)[:numtime])

connectivity = image.grid_to_graph(n_x=wardmask.shape[0],
                                   n_y=wardmask.shape[1],
                                   n_z=wardmask.shape[2],
    mask=wardmask)
ward = WardAgglomeration(n_clusters=numclusters, connectivity=connectivity,
                         memory='nilearn_cache')
ward.fit(trndata_ward)
labels = ward.labels_
trndata_collapsed = np.array([trndata_ward[:, labels == i].mean(1)
                              for i in range(numclusters)])
trndata = np.hstack((trndata_roi, trndata_collapsed.T))
valdata = data.get_val(masked=roimask)

from ridge import _RidgeGridCV

ridge = _RidgeGridCV(alpha_min=1., alpha_max=1000., n_grid_points=5,
                     n_grid_refinements=2, cv=2)

ridge_coefs = ridge.fit(sdeltrnstim, trndata).coef_.T
Uridge, sridge, VridgeT = np.linalg.svd(ridge_coefs, full_matrices=False)