def simple_sim1( shape, dissims, rois_arrangement='circle', roi_neighborhood=Sphere(5), nruns=1, nsubjects=1, # noise components -- we just add normal for now also with # spatial smoothing to possibly create difference in noise # characteristics across different kinds # # "Instrumental noise" -- generic nuisance noise_independent_std=0.4, noise_independent_smooth=3., # "Intrinsic signal", specific per each subject (due to # motion, whatever) -- might be fun for someone to cluster, # but irrelevant for us noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5, # "Intrinsic common signal" -- probably generalizes across # subjects and fun for someone studying veins to get those # reproducible clusters. It will be mixed in also with # different weights per each run. # Again -- might be fun for someone to cluster, but not for us # since it would not be representative of the original signal noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2.): """Simulate "data" containing similarity matrices with 3 noise components for multiple subjects Noise components are: - random normal noise, also spatially smoothed (should have smaller sigma for smoothing probably than for intrinsic noise) - intrinsic noise which is composed from a set of random fields, generated by random normal noise with subsequent spatial filtering, which are then mixed into each run data with random weights. They are to simulate subject-specific intrinsic signals such as artifacts due to motion, possible subject-specific physiological processes - intrinsic common noise across subjects intrinsic noise (e.g. all of them have similar blood distribution networks and other physiological parameters, and some intrinsic networks, which although similar in space would have different mix-in coefficients across subject/runs) Theoretically, decomposition methods (such as ICA, PCA, etc) should help to identify such common noise components and filter them out. Also methods which iteratively remove non-informative projections (such as GLMdenoise) should be effective to identify those mix-ins TODO: now mix-in happens with purely normal random weights, ideally we should color those as well """ ndissims = len(dissims) # first we fisher transform so we can add normal noise # check first that we don't have extreme values that might give infinity dissims = np.array(dissims) dissims = 1. - dissims dissims[dissims == 1] = 0.99 dissims[dissims == -1] = -0.99 # fisher dissims = np.arctanh(dissims) # generate target clean "picture" d = np.asanyarray(dissims[0]) signal_clean = np.zeros(shape + (len(vector_form(d)), )) # generate ground truth for clustering cluster_truth = np.zeros(shape, dtype='int') if rois_arrangement == 'circle': radius = min(shape[:2]) / 4. center = np.array((radius * 2, ) * len(shape)).astype(int) # arrange at quarter distance from center for i, dissim in enumerate(dissims): dissim = vector_form(dissim) # that is kinda boring -- the same dissimilarity to each # voxel??? # # TODO: come up with a better arrangement/idea, e.g. to # generate an MVPA pattern which would satisfy the # dissimilarity (not exactly but at least close). That # would make more sense roi_center = center.copy() roi_center[0] += int(radius * np.cos(2 * np.pi * i / ndissims)) roi_center[1] += int(radius * np.sin(2 * np.pi * i / ndissims)) for coords in roi_neighborhood(roi_center): acoords = np.asanyarray(coords) if np.all(acoords >= [0]*len(coords)) and \ np.all(acoords < signal_clean.shape[:len(coords)]): signal_clean.__setitem__(coords, dissim) cluster_truth.__setitem__(coords, i + 1) else: raise ValueError("I know only circle") # generated randomly and will be mixed into subjects with different weights # TODO: static across runs within subject?? if so -- would be no different # from having RSAs? common_noises = get_intrinsic_noises(signal_clean.shape, std=noise_common_std, sigma=noise_common_smooth, n=noise_common_n) assert common_noises[0].ndim == 3, "There should be no time comp" # Now lets generate per subject and per run data by adding some noise(s) # all_signals = [] dss = [] for isubject in xrange(nsubjects): # Interesting noise, simulating some underlying process which has nothing # to do with original design/similarity but having spatial structure which # repeats through runs with random weights (consider it to be a principal component) # generated randomly for each subject separately, but they should have # common structure across runs subj_specific_noises = get_intrinsic_noises(signal_clean.shape, std=noise_subject_std, sigma=noise_subject_smooth, n=noise_subject_n) assert subj_specific_noises[ 0].ndim == 3, "There should be no time comp" # subject_signals = [] dss_subject = [] subj_common_noises = [ noise * np.random.normal() for noise in common_noises ] subj_specific_mixins = generate_mixins(nruns) subj_common_mixins = generate_mixins(nruns) for run in range(nruns): signal_run = signal_clean.copy() for noise in subj_specific_noises: signal_run += noise * subj_specific_mixins[run] for noise in subj_common_noises: signal_run += noise * subj_common_mixins[run] # generic noise -- no common structure across subjects/runs signal_run += filter_each_2d( np.random.normal(size=signal_clean.shape) * noise_independent_std, noise_independent_smooth) # go back to correlations with inverse of fisher signal_run = np.tanh(signal_run) # rollaxis to bring similarities into leading dimension ds = Dataset(np.rollaxis(signal_run, 2, 0)) ds.sa['chunks'] = [run] ds.sa['dissimilarity'] = np.arange(len(dissim)) # Lame one for now ds_flat = ds.get_mapped( FlattenMapper(shape=ds.shape[1:], space='pixel_indices')) dss_subject.append(ds_flat) #subject_signals.append(signal_run) #all_signals.append(subject_signals) ds = dsvstack(dss_subject) ds.a['mapper'] = dss_subject[ 0].a.mapper # .a are not transferred by vstack dss.append(ds) # Instrumental noise -- the most banal assert (len(dss) == nsubjects) assert (len(dss) == nsubjects) assert (len(dss[0]) == nruns * len(dissim)) return np.tanh(signal_clean), cluster_truth, dss
def simple_sim1(shape, dissims, rois_arrangement='circle', roi_neighborhood=Sphere(5), nruns=1, nsubjects=1, # noise components -- we just add normal for now also with # spatial smoothing to possibly create difference in noise # characteristics across different kinds # # "Instrumental noise" -- generic nuisance noise_independent_std=0.4, noise_independent_smooth=3., # "Intrinsic signal", specific per each subject (due to # motion, whatever) -- might be fun for someone to cluster, # but irrelevant for us noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5, # "Intrinsic common signal" -- probably generalizes across # subjects and fun for someone studying veins to get those # reproducible clusters. It will be mixed in also with # different weights per each run. # Again -- might be fun for someone to cluster, but not for us # since it would not be representative of the original signal noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2. ): """Simulate "data" containing similarity matrices with 3 noise components for multiple subjects Noise components are: - random normal noise, also spatially smoothed (should have smaller sigma for smoothing probably than for intrinsic noise) - intrinsic noise which is composed from a set of random fields, generated by random normal noise with subsequent spatial filtering, which are then mixed into each run data with random weights. They are to simulate subject-specific intrinsic signals such as artifacts due to motion, possible subject-specific physiological processes - intrinsic common noise across subjects intrinsic noise (e.g. all of them have similar blood distribution networks and other physiological parameters, and some intrinsic networks, which although similar in space would have different mix-in coefficients across subject/runs) Theoretically, decomposition methods (such as ICA, PCA, etc) should help to identify such common noise components and filter them out. Also methods which iteratively remove non-informative projections (such as GLMdenoise) should be effective to identify those mix-ins TODO: now mix-in happens with purely normal random weights, ideally we should color those as well """ ndissims = len(dissims) # first we fisher transform so we can add normal noise # check first that we don't have extreme values that might give infinity dissims = np.array(dissims) dissims = 1. - dissims dissims[dissims==1] = 0.99 dissims[dissims==-1] = -0.99 # fisher dissims = np.arctanh(dissims) # generate target clean "picture" d = np.asanyarray(dissims[0]) signal_clean = np.zeros(shape + (len(vector_form(d)),)) # generate ground truth for clustering cluster_truth = np.zeros(shape, dtype='int') if rois_arrangement == 'circle': radius = min(shape[:2])/4. center = np.array((radius*2,) * len(shape)).astype(int) # arrange at quarter distance from center for i, dissim in enumerate(dissims): dissim = vector_form(dissim) # that is kinda boring -- the same dissimilarity to each # voxel??? # # TODO: come up with a better arrangement/idea, e.g. to # generate an MVPA pattern which would satisfy the # dissimilarity (not exactly but at least close). That # would make more sense roi_center = center.copy() roi_center[0] += int(radius * np.cos(2*np.pi*i/ndissims)) roi_center[1] += int(radius * np.sin(2*np.pi*i/ndissims)) for coords in roi_neighborhood(roi_center): acoords = np.asanyarray(coords) if np.all(acoords >= [0]*len(coords)) and \ np.all(acoords < signal_clean.shape[:len(coords)]): signal_clean.__setitem__(coords, dissim) cluster_truth.__setitem__(coords, i+1) else: raise ValueError("I know only circle") # generated randomly and will be mixed into subjects with different weights # TODO: static across runs within subject?? if so -- would be no different # from having RSAs? common_noises = get_intrinsic_noises( signal_clean.shape, std=noise_common_std, sigma=noise_common_smooth, n=noise_common_n) assert common_noises[0].ndim == 3, "There should be no time comp" # Now lets generate per subject and per run data by adding some noise(s) # all_signals = [] dss = [] for isubject in xrange(nsubjects): # Interesting noise, simulating some underlying process which has nothing # to do with original design/similarity but having spatial structure which # repeats through runs with random weights (consider it to be a principal component) # generated randomly for each subject separately, but they should have # common structure across runs subj_specific_noises = get_intrinsic_noises(signal_clean.shape, std=noise_subject_std, sigma=noise_subject_smooth, n=noise_subject_n) assert subj_specific_noises[0].ndim == 3, "There should be no time comp" # subject_signals = [] dss_subject = [] subj_common_noises = [noise * np.random.normal() for noise in common_noises] subj_specific_mixins = generate_mixins(nruns) subj_common_mixins = generate_mixins(nruns) for run in range(nruns): signal_run = signal_clean.copy() for noise in subj_specific_noises: signal_run += noise * subj_specific_mixins[run] for noise in subj_common_noises: signal_run += noise * subj_common_mixins[run] # generic noise -- no common structure across subjects/runs signal_run += filter_each_2d( np.random.normal(size=signal_clean.shape)*noise_independent_std, noise_independent_smooth) # go back to correlations with inverse of fisher signal_run = np.tanh(signal_run) # rollaxis to bring similarities into leading dimension ds = Dataset(np.rollaxis(signal_run, 2, 0)) ds.sa['chunks'] = [run] ds.sa['dissimilarity'] = np.arange(len(dissim)) # Lame one for now ds_flat = ds.get_mapped(FlattenMapper(shape=ds.shape[1:], space='pixel_indices')) dss_subject.append(ds_flat) #subject_signals.append(signal_run) #all_signals.append(subject_signals) ds = dsvstack(dss_subject) ds.a['mapper'] = dss_subject[0].a.mapper # .a are not transferred by vstack dss.append(ds) # Instrumental noise -- the most banal assert(len(dss) == nsubjects) assert(len(dss) == nsubjects) assert(len(dss[0]) == nruns*len(dissim)) return np.tanh(signal_clean), cluster_truth, dss
nsubjects=2, noise_subject_n=1, noise_subject_std=5, noise_subject_smooth=5, noise_independent_std=4, noise_independent_smooth=1.5, noise_common_n=1, noise_common_std=3) # just a little helper def get2d(ds): return dss[0].a.mapper.reverse(ds) import pylab as pl pl.clf() DS = dsvstack(dss) # Sample plots for s in [0, 1]: ds2 = get2d(dss[0]) for r in [0, 1]: pl.subplot(3, 3, 1 + r + s * 3) pl.imshow(ds2[ds2.sa.chunks == r].samples[0], interpolation='nearest') pl.ylabel('subj%d' % s) pl.xlabel('run1') pl.subplot(3, 3, 3 + s * 3) pl.imshow(get2d(mean_group_sample(['dissimilarity' ])(dss[0]).samples)[0], interpolation='nearest') pl.xlabel('mean')
if __name__ == '__main__': a_clean, cluster_truth, dss = simple_sim1( (64, 64), [[1], [0.8], [0.5], [0.3]], roi_neighborhood=Sphere(6), nruns=3, nsubjects=2, noise_subject_n=1, noise_subject_std=5, noise_subject_smooth=5, noise_independent_std=4, noise_independent_smooth=1.5, noise_common_n=1, noise_common_std=3) # just a little helper def get2d(ds): return dss[0].a.mapper.reverse(ds) import pylab as pl pl.clf() DS = dsvstack(dss) # Sample plots for s in [0, 1]: ds2 = get2d(dss[0]) for r in [0, 1]: pl.subplot(3,3,1+r+s*3); pl.imshow(ds2[ds2.sa.chunks == r].samples[0], interpolation='nearest'); pl.ylabel('subj%d' % s); pl.xlabel('run1'); pl.subplot(3,3,3+s*3); pl.imshow(get2d(mean_group_sample(['dissimilarity'])(dss[0]).samples)[0], interpolation='nearest'); pl.xlabel('mean'); ds = dsvstack(dss) ds.a['mapper'] = dss[0].a.mapper ds_mean = mean_group_sample(['dissimilarity', 'chunks'])(ds) for r in [0, 1]: ds_mean_run0 = ds.a.mapper.reverse(ds_mean[ds_mean.chunks == r]) pl.subplot(3,3,1+r+2*3); pl.imshow(ds_mean_run0.samples[0], interpolation='nearest'); pl.ylabel('mean(subj)'); pl.xlabel('run%d' % r) ds_global_mean = mean_group_sample(['dissimilarity'])(ds) pl.subplot(3,3,3+2*3); pl.imshow(get2d(ds_global_mean).samples[0], interpolation='nearest'); pl.xlabel('mean');