def test_featuregroup_mapper(): ds = Dataset(np.arange(24).reshape(3, 8)) ds.fa['roi'] = [0, 1] * 4 # just to check ds.sa['chunks'] = np.arange(3) # correct results csamples = [[3, 4], [11, 12], [19, 20]] m = mean_group_feature(['roi']) mds = m.forward(ds) assert_equal(mds.shape, (3, 2)) assert_array_equal(mds.samples, csamples) assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4)) # FAs should simply remain the same assert_array_equal(mds.sa.chunks, np.arange(3)) # now without grouping m = mean_feature() # forwarding just the samples should yield the same result assert_array_equal(m.forward(ds.samples), m.forward(ds).samples) # And when operating on a dataset with >1D samples, then operate # only across "features", i.e. 1st dimension ds = Dataset(np.arange(24).reshape(3, 2, 2, 2)) mapped = ds.get_mapped(m) assert_array_equal(m.forward(ds.samples), mapped.samples) assert_array_equal(mapped.samples.shape, (3, 2, 2)) assert_array_equal(mapped.samples, np.mean(ds.samples, axis=1)) # and still could map back? ;) not ATM, so just to ensure consistency assert_raises(NotImplementedError, mapped.a.mapper.reverse, mapped.samples) # but it should also work with standard 2d sample arrays ds = Dataset(np.arange(24).reshape(3, 8)) mapped = ds.get_mapped(m) assert_array_equal(mapped.samples.shape, (3, 1))
def test_featuregroup_mapper(): ds = Dataset(np.arange(24).reshape(3,8)) ds.fa['roi'] = [0, 1] * 4 # just to check ds.sa['chunks'] = np.arange(3) # correct results csamples = [[3, 4], [11, 12], [19, 20]] croi = [0, 1] cchunks = np.arange(3) m = mean_group_feature(['roi']) mds = m.forward(ds) assert_equal(mds.shape, (3, 2)) assert_array_equal(mds.samples, csamples) assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4)) # FAs should simply remain the same assert_array_equal(mds.sa.chunks, np.arange(3)) # now without grouping m = mean_feature() # forwarding just the samples should yield the same result assert_array_equal(m.forward(ds.samples), m.forward(ds).samples) # And when operating on a dataset with >1D samples, then operate # only across "features", i.e. 1st dimension ds = Dataset(np.arange(24).reshape(3,2,2,2)) mapped = ds.get_mapped(m) assert_array_equal(m.forward(ds.samples), mapped.samples) assert_array_equal(mapped.samples.shape, (3, 2, 2)) assert_array_equal(mapped.samples, np.mean(ds.samples, axis=1)) # and still could map back? ;) not ATM, so just to ensure consistency assert_raises(NotImplementedError, mapped.a.mapper.reverse, mapped.samples) # but it should also work with standard 2d sample arrays ds = Dataset(np.arange(24).reshape(3,8)) mapped = ds.get_mapped(m) assert_array_equal(mapped.samples.shape, (3, 1))
def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix='voxel', tprefix='time', add_fa=None,): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the image (e.g. NIfTI) header data (imghdr) * class of the image (e.g. Nifti1Image) (imgtype) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples imgdata, imghdr, img = _load_anyimg(samples, ensure=True, enforce_dim=4) # figure out what the mask is, but only handle known cases, the rest # goes directly into the mapper which maybe knows more maskimg = _load_anyimg(mask) if maskimg is None: pass else: # take just data and ignore the header mask = maskimg[0] # compile the samples attributes sa = {} if not targets is None: sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets') if not chunks is None: sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks') # create a dataset ds = Dataset(imgdata, sa=sa) if sprefix is None: space = None else: space = sprefix + '_indices' ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it #mapper = StaticFeatureSelection(flatmask) #ds = ds.get_mapped(StaticFeatureSelection(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _load_anyimg(add_fa[fattr], ensure=True)[0] ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting NIfTI props in the dataset in a more portable way ds.a['imgaffine'] = img.get_affine() ds.a['imgtype'] = img.__class__.__name__ # stick the header instance in as is, and ... ds.a['imghdr'] = imghdr # ... let strip_nibabel() be the central place to take care of any header # conversion into non-NiBabel dtypes strip_nibabel(ds) # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + '_dim'] = imgdata.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int') ds.sa[tprefix + '_coords'] = \ np.arange(len(ds), dtype='float') * _get_dt(imghdr) # TODO extend with the unit return ds
def eeglab_dataset(samples): '''Make a Dataset instance from EEGLAB input data Parameters ---------- samples: str Filename of EEGLAB text file Returns ------- ds: mvpa2.base.dataset.Dataset Dataset with the contents of the input file ''' if not isinstance(samples, basestring): raise ValueError("Samples should be a string") if _looks_like_filename(samples): if not os.path.exists(samples): raise ValueError("Input looks like a filename, but file" " %s does not exist" % samples) with open(samples) as f: samples = f.read() lines = samples.split('\n') samples = [] cur_sample = None for i, line in enumerate(lines): if not line: continue if i == 0: # first line contains the channel names channel_labels = line.split() n_channels = len(channel_labels) else: # first value is the time point, the remainders the value # for each channel values = map(float, line.split()) t = values[0] # time eeg = values[1:] # values for each electrode if len(eeg) != n_channels: raise ValueError("Line %d: expected %d values but found %d" % (n_channels, len(eeg))) if cur_sample is None or t < prev_t: # new sample cur_sample = [] samples.append(cur_sample) cur_sample.append((t, eeg)) prev_t = t # get and verify number of elements in each dimension n_samples = len(samples) n_timepoints_all = map(len, samples) n_timepoints_unique = set(n_timepoints_all) if len(n_timepoints_unique) != 1: raise ValueError("Different number of time points in different" "samples: found %d different lengths" % len(n_timepoints_unique)) n_timepoints = n_timepoints_all[0] shape = (n_samples, n_timepoints, n_channels) # allocate space for data data = np.zeros(shape) # make a list of all channels and timepoints channel_array = np.asarray(channel_labels) timepoint_array = np.asarray([samples[0][i][0] for i in xrange(n_timepoints)]) dts = timepoint_array[1:] - timepoint_array[:-1] if not np.all(dts == dts[0]): raise ValueError("Delta time points are different") # put the values in the data array for i, sample in enumerate(samples): for j, (t, values) in enumerate(sample): # check that the time is the same if i > 0 and timepoint_array[j] != t: raise ValueError("Sample %d, time point %s is different " "than the first sample (%s)" % (i, t, timepoint_array[j])) for k, value in enumerate(values): data[i, j, k] = value samples = None # and let gc do it's job # make a Dataset instance with the data ds = Dataset(data) # append a flatten_mapper to go from 3D (sample X time X channel) # to 2D (sample X (time X channel)) flatten_mapper = FlattenMapper(shape=shape[1:], space='time_channel_indices') ds = ds.get_mapped(flatten_mapper) # make this a 3D array of the proper size channel_array_3D = np.tile(channel_array, (1, n_timepoints, 1)) timepoint_array_3D = np.tile(np.reshape(timepoint_array, (-1, 1)), (1, 1, n_channels)) # for consistency use the flattan_mapper defined above to # flatten channel and timepoint names as well ds.fa['channelids'] = flatten_mapper.forward(channel_array_3D).ravel() ds.fa['timepoints'] = flatten_mapper.forward(timepoint_array_3D).ravel() # make some dynamic properties # XXX at the moment we don't have propert 'protection' in case # the feature space is sliced in a way so that some channels and/or # timepoints occur more often than others _eeglab_set_attributes(ds) return ds
def fmri_dataset( samples, targets=None, chunks=None, mask=None, sprefix='voxel', tprefix='time', add_fa=None, ): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the image (e.g. NIfTI) header data (imghdr) * class of the image (e.g. Nifti1Image) (imgtype) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples imgdata, imghdr, img = _load_anyimg(samples, ensure=True, enforce_dim=4) # figure out what the mask is, but only handle known cases, the rest # goes directly into the mapper which maybe knows more maskimg = _load_anyimg(mask) if maskimg is None: pass else: # take just data and ignore the header mask = maskimg[0] # compile the samples attributes sa = {} if targets is not None: sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets') if chunks is not None: sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks') # create a dataset ds = Dataset(imgdata, sa=sa) if sprefix is None: space = None else: space = sprefix + '_indices' ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space)) # now apply the mask if any if mask is not None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it #mapper = StaticFeatureSelection(flatmask) #ds = ds.get_mapped(StaticFeatureSelection(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if add_fa is not None: for fattr in add_fa: value = _load_anyimg(add_fa[fattr], ensure=True)[0] ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting NIfTI props in the dataset in a more portable way ds.a['imgaffine'] = img.affine ds.a['imgtype'] = img.__class__.__name__ # stick the header instance in as is, and ... ds.a['imghdr'] = imghdr # ... let strip_nibabel() be the central place to take care of any header # conversion into non-NiBabel dtypes strip_nibabel(ds) # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + '_dim'] = imgdata.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int') ds.sa[tprefix + '_coords'] = \ np.arange(len(ds), dtype='float') * _get_dt(imghdr) # TODO extend with the unit return ds
def simple_sim1( shape, dissims, rois_arrangement='circle', roi_neighborhood=Sphere(5), nruns=1, nsubjects=1, # noise components -- we just add normal for now also with # spatial smoothing to possibly create difference in noise # characteristics across different kinds # # "Instrumental noise" -- generic nuisance noise_independent_std=0.4, noise_independent_smooth=3., # "Intrinsic signal", specific per each subject (due to # motion, whatever) -- might be fun for someone to cluster, # but irrelevant for us noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5, # "Intrinsic common signal" -- probably generalizes across # subjects and fun for someone studying veins to get those # reproducible clusters. It will be mixed in also with # different weights per each run. # Again -- might be fun for someone to cluster, but not for us # since it would not be representative of the original signal noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2.): """Simulate "data" containing similarity matrices with 3 noise components for multiple subjects Noise components are: - random normal noise, also spatially smoothed (should have smaller sigma for smoothing probably than for intrinsic noise) - intrinsic noise which is composed from a set of random fields, generated by random normal noise with subsequent spatial filtering, which are then mixed into each run data with random weights. They are to simulate subject-specific intrinsic signals such as artifacts due to motion, possible subject-specific physiological processes - intrinsic common noise across subjects intrinsic noise (e.g. all of them have similar blood distribution networks and other physiological parameters, and some intrinsic networks, which although similar in space would have different mix-in coefficients across subject/runs) Theoretically, decomposition methods (such as ICA, PCA, etc) should help to identify such common noise components and filter them out. Also methods which iteratively remove non-informative projections (such as GLMdenoise) should be effective to identify those mix-ins TODO: now mix-in happens with purely normal random weights, ideally we should color those as well """ ndissims = len(dissims) # first we fisher transform so we can add normal noise # check first that we don't have extreme values that might give infinity dissims = np.array(dissims) dissims = 1. - dissims dissims[dissims == 1] = 0.99 dissims[dissims == -1] = -0.99 # fisher dissims = np.arctanh(dissims) # generate target clean "picture" d = np.asanyarray(dissims[0]) signal_clean = np.zeros(shape + (len(vector_form(d)), )) # generate ground truth for clustering cluster_truth = np.zeros(shape, dtype='int') if rois_arrangement == 'circle': radius = min(shape[:2]) / 4. center = np.array((radius * 2, ) * len(shape)).astype(int) # arrange at quarter distance from center for i, dissim in enumerate(dissims): dissim = vector_form(dissim) # that is kinda boring -- the same dissimilarity to each # voxel??? # # TODO: come up with a better arrangement/idea, e.g. to # generate an MVPA pattern which would satisfy the # dissimilarity (not exactly but at least close). That # would make more sense roi_center = center.copy() roi_center[0] += int(radius * np.cos(2 * np.pi * i / ndissims)) roi_center[1] += int(radius * np.sin(2 * np.pi * i / ndissims)) for coords in roi_neighborhood(roi_center): acoords = np.asanyarray(coords) if np.all(acoords >= [0]*len(coords)) and \ np.all(acoords < signal_clean.shape[:len(coords)]): signal_clean.__setitem__(coords, dissim) cluster_truth.__setitem__(coords, i + 1) else: raise ValueError("I know only circle") # generated randomly and will be mixed into subjects with different weights # TODO: static across runs within subject?? if so -- would be no different # from having RSAs? common_noises = get_intrinsic_noises(signal_clean.shape, std=noise_common_std, sigma=noise_common_smooth, n=noise_common_n) assert common_noises[0].ndim == 3, "There should be no time comp" # Now lets generate per subject and per run data by adding some noise(s) # all_signals = [] dss = [] for isubject in xrange(nsubjects): # Interesting noise, simulating some underlying process which has nothing # to do with original design/similarity but having spatial structure which # repeats through runs with random weights (consider it to be a principal component) # generated randomly for each subject separately, but they should have # common structure across runs subj_specific_noises = get_intrinsic_noises(signal_clean.shape, std=noise_subject_std, sigma=noise_subject_smooth, n=noise_subject_n) assert subj_specific_noises[ 0].ndim == 3, "There should be no time comp" # subject_signals = [] dss_subject = [] subj_common_noises = [ noise * np.random.normal() for noise in common_noises ] subj_specific_mixins = generate_mixins(nruns) subj_common_mixins = generate_mixins(nruns) for run in range(nruns): signal_run = signal_clean.copy() for noise in subj_specific_noises: signal_run += noise * subj_specific_mixins[run] for noise in subj_common_noises: signal_run += noise * subj_common_mixins[run] # generic noise -- no common structure across subjects/runs signal_run += filter_each_2d( np.random.normal(size=signal_clean.shape) * noise_independent_std, noise_independent_smooth) # go back to correlations with inverse of fisher signal_run = np.tanh(signal_run) # rollaxis to bring similarities into leading dimension ds = Dataset(np.rollaxis(signal_run, 2, 0)) ds.sa['chunks'] = [run] ds.sa['dissimilarity'] = np.arange(len(dissim)) # Lame one for now ds_flat = ds.get_mapped( FlattenMapper(shape=ds.shape[1:], space='pixel_indices')) dss_subject.append(ds_flat) #subject_signals.append(signal_run) #all_signals.append(subject_signals) ds = dsvstack(dss_subject) ds.a['mapper'] = dss_subject[ 0].a.mapper # .a are not transferred by vstack dss.append(ds) # Instrumental noise -- the most banal assert (len(dss) == nsubjects) assert (len(dss) == nsubjects) assert (len(dss[0]) == nruns * len(dissim)) return np.tanh(signal_clean), cluster_truth, dss
def simple_sim1(shape, dissims, rois_arrangement='circle', roi_neighborhood=Sphere(5), nruns=1, nsubjects=1, # noise components -- we just add normal for now also with # spatial smoothing to possibly create difference in noise # characteristics across different kinds # # "Instrumental noise" -- generic nuisance noise_independent_std=0.4, noise_independent_smooth=3., # "Intrinsic signal", specific per each subject (due to # motion, whatever) -- might be fun for someone to cluster, # but irrelevant for us noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5, # "Intrinsic common signal" -- probably generalizes across # subjects and fun for someone studying veins to get those # reproducible clusters. It will be mixed in also with # different weights per each run. # Again -- might be fun for someone to cluster, but not for us # since it would not be representative of the original signal noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2. ): """Simulate "data" containing similarity matrices with 3 noise components for multiple subjects Noise components are: - random normal noise, also spatially smoothed (should have smaller sigma for smoothing probably than for intrinsic noise) - intrinsic noise which is composed from a set of random fields, generated by random normal noise with subsequent spatial filtering, which are then mixed into each run data with random weights. They are to simulate subject-specific intrinsic signals such as artifacts due to motion, possible subject-specific physiological processes - intrinsic common noise across subjects intrinsic noise (e.g. all of them have similar blood distribution networks and other physiological parameters, and some intrinsic networks, which although similar in space would have different mix-in coefficients across subject/runs) Theoretically, decomposition methods (such as ICA, PCA, etc) should help to identify such common noise components and filter them out. Also methods which iteratively remove non-informative projections (such as GLMdenoise) should be effective to identify those mix-ins TODO: now mix-in happens with purely normal random weights, ideally we should color those as well """ ndissims = len(dissims) # first we fisher transform so we can add normal noise # check first that we don't have extreme values that might give infinity dissims = np.array(dissims) dissims = 1. - dissims dissims[dissims==1] = 0.99 dissims[dissims==-1] = -0.99 # fisher dissims = np.arctanh(dissims) # generate target clean "picture" d = np.asanyarray(dissims[0]) signal_clean = np.zeros(shape + (len(vector_form(d)),)) # generate ground truth for clustering cluster_truth = np.zeros(shape, dtype='int') if rois_arrangement == 'circle': radius = min(shape[:2])/4. center = np.array((radius*2,) * len(shape)).astype(int) # arrange at quarter distance from center for i, dissim in enumerate(dissims): dissim = vector_form(dissim) # that is kinda boring -- the same dissimilarity to each # voxel??? # # TODO: come up with a better arrangement/idea, e.g. to # generate an MVPA pattern which would satisfy the # dissimilarity (not exactly but at least close). That # would make more sense roi_center = center.copy() roi_center[0] += int(radius * np.cos(2*np.pi*i/ndissims)) roi_center[1] += int(radius * np.sin(2*np.pi*i/ndissims)) for coords in roi_neighborhood(roi_center): acoords = np.asanyarray(coords) if np.all(acoords >= [0]*len(coords)) and \ np.all(acoords < signal_clean.shape[:len(coords)]): signal_clean.__setitem__(coords, dissim) cluster_truth.__setitem__(coords, i+1) else: raise ValueError("I know only circle") # generated randomly and will be mixed into subjects with different weights # TODO: static across runs within subject?? if so -- would be no different # from having RSAs? common_noises = get_intrinsic_noises( signal_clean.shape, std=noise_common_std, sigma=noise_common_smooth, n=noise_common_n) assert common_noises[0].ndim == 3, "There should be no time comp" # Now lets generate per subject and per run data by adding some noise(s) # all_signals = [] dss = [] for isubject in xrange(nsubjects): # Interesting noise, simulating some underlying process which has nothing # to do with original design/similarity but having spatial structure which # repeats through runs with random weights (consider it to be a principal component) # generated randomly for each subject separately, but they should have # common structure across runs subj_specific_noises = get_intrinsic_noises(signal_clean.shape, std=noise_subject_std, sigma=noise_subject_smooth, n=noise_subject_n) assert subj_specific_noises[0].ndim == 3, "There should be no time comp" # subject_signals = [] dss_subject = [] subj_common_noises = [noise * np.random.normal() for noise in common_noises] subj_specific_mixins = generate_mixins(nruns) subj_common_mixins = generate_mixins(nruns) for run in range(nruns): signal_run = signal_clean.copy() for noise in subj_specific_noises: signal_run += noise * subj_specific_mixins[run] for noise in subj_common_noises: signal_run += noise * subj_common_mixins[run] # generic noise -- no common structure across subjects/runs signal_run += filter_each_2d( np.random.normal(size=signal_clean.shape)*noise_independent_std, noise_independent_smooth) # go back to correlations with inverse of fisher signal_run = np.tanh(signal_run) # rollaxis to bring similarities into leading dimension ds = Dataset(np.rollaxis(signal_run, 2, 0)) ds.sa['chunks'] = [run] ds.sa['dissimilarity'] = np.arange(len(dissim)) # Lame one for now ds_flat = ds.get_mapped(FlattenMapper(shape=ds.shape[1:], space='pixel_indices')) dss_subject.append(ds_flat) #subject_signals.append(signal_run) #all_signals.append(subject_signals) ds = dsvstack(dss_subject) ds.a['mapper'] = dss_subject[0].a.mapper # .a are not transferred by vstack dss.append(ds) # Instrumental noise -- the most banal assert(len(dss) == nsubjects) assert(len(dss) == nsubjects) assert(len(dss[0]) == nruns*len(dissim)) return np.tanh(signal_clean), cluster_truth, dss