def test_hyper_ref_ds_range_checks(self): # If supplied ref_ds can't be fit into non-negative int # it should thrown an exception with self.assertRaises(ValueError): ha = Hyperalignment(ref_ds=-1.5) # But work if it can fit, int(-0.5)=0 ha = Hyperalignment(ref_ds=0.5) # or int(3.5)=3 ha = Hyperalignment(ref_ds=3.5) # if ref_ds is out of range... ds_all = [datasets['uni4small'] for i in range(3)] # Making sure it raises error if ref_ds is out of range self.assertRaises(ValueError, ha, ds_all)
def hyperalignment(input_data, output_data, mask, output_suffix, training_runs, testing_runs, **kwargs): """ Parameters ---------- input_data: Input data path. output_data: Output data path. mask: Path to mask file. output_suffix: Filename suffix for saving aligned data. training_runs: List of runs to be used for training. testing_runs: List of runs to be used for testing. kwargs: Passed onto Hyperalignment Returns ------- Nothing """ # XXX TODO Use mask to load from nifti file dss_train = load_data(input_data, training_runs) #, mask) dss_test = load_data(input_data, testing_runs) #, mask) # Initialize hyperalignment ha = Hyperalignment(**kwargs) # Run hyperalignment on training data hmappers = ha(dss_train) # Align and save data dss_aligned = {} for split, dss in (('train', dss_train), ('test', dss_test)): dss_hyper = [hm.forward(sd) for hm, sd in zip(hmappers, dss)] if output_data is not None: save_data(dss_hyper, output_suffix + split) dss_aligned[split] = dss_hyper return dss_aligned
def test_hyper_input_dataset_check(self): # If supplied with only one dataset during training, # make sure it doesn't run multiple levels and crap out ha = Hyperalignment() ds_all = [datasets['uni4small'] for i in range(3)] # Make sure it raises TypeError if a list is not passed self.assertRaises(TypeError, ha, ds_all[0]) self.assertRaises(TypeError, ha.train, ds_all[0]) # And it doesn't crap out with a single dataset for training ha.train([ds_all[0]]) zscore(ds_all[0], chunks_attr=None) assert_array_equal(ha.commonspace, ds_all[0].samples) # make sure it accepts tuple of ndarray ha = Hyperalignment() m = ha(tuple(ds_all)) ha = Hyperalignment() dss_arr = np.empty(len(ds_all), dtype=object) for i in range(len(ds_all)): dss_arr[i] = ds_all[i] m = ha(dss_arr)
def fa(s_list): try: return Hyperalignment(alpha=alpha)(s_list) except: logger.warning('Hyperalignment failed for {hemi} {roi}.'.format( hemi=s_list[0].fa.hemi[0], roi=s_list[0].fa.annotation[0])) logger.warning('Inserting identity mappers.') return [ StaticProjectionMapper(numpy.eye(s.fa.attr_length)) for s in s_list ]
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4)] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) self.assertTrue( np.all([np.array_equal(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)]), msg="Mappers differ when using nproc>1.") assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]] * n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [ random_affine_transformation(dss_rotated[i]) for i in xrange(n) ] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [ m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated) ] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag(np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [ random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4) ] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) # not sure yet why on windows only is not precise cmp_ = assert_array_equal if ( not on_windows) else assert_array_almost_equal [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc) ] # "Mappers differ when using nproc>1." cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def __init__(self, ref_ds=0, hyperalignment=Hyperalignment(ref_ds=0), featsel=1.0, full_matrix=True, use_same_features=False, exclude_from_model=None, dtype='float32', **kwargs): """ For description of parameters see :class:`SearchlightHyperalignment` """ super(FeatureSelectionHyperalignment, self).__init__(**kwargs) self.ref_ds = ref_ds self.hyperalignment = hyperalignment self.featsel = featsel self.use_same_features = use_same_features self.exclude_from_model = exclude_from_model if self.exclude_from_model is None: self.exclude_from_model = [] self.full_matrix = full_matrix self.dtype = dtype
def test_timesegments_classification(): # TODO: RF our construction of fake datasets for testing hyperalignment # so we could reuse it here and test classification performance ds_orig = datasets['uni4large'] n = 3 dss = [ds_orig.copy(deep=True) for i in range(n)] def nohyper(dss): return [IdentityMapper() for ds in dss] # clean case, assume "nohyper" which would be by default errors = timesegments_classification(dss) for ds in dss: # must not add any attribute, such as subjects assert ('subjects' not in ds.sa) assert_array_equal(errors, 0) # very noisy case -- we must not be able to classify anything reasonably dss_noisy = [ds.copy() for ds in dss] for ds in dss_noisy: ds.samples = np.random.normal(size=ds.samples.shape) errors_nonoverlapping = timesegments_classification( dss_noisy, nohyper, overlapping_windows=False) assert (np.all(errors_nonoverlapping <= 1.)) assert (np.all(0.75 <= errors_nonoverlapping)) errors_overlapping = timesegments_classification(dss_noisy, nohyper) # nononverlapping error should be less for random result assert_array_lequal(np.mean(errors_nonoverlapping), np.mean(errors_overlapping)) # now the ultimate test with real hyperalignment on when we don't need much # of it anyways #import pdb; pdb.set_trace() dss_rotated = [ random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10) for _ in dss ] errors_hyper = timesegments_classification(dss_rotated, Hyperalignment()) # Hyperalignment must not screw up and rotated and classify perfectly # since we didn't add any noise whatsoever assert_array_equal(errors, 0)
def test_hypal_michael_caused_problem(self): from mvpa2.misc import data_generators from mvpa2.mappers.zscore import zscore # Fake data ds = data_generators.normal_feature_dataset(nfeatures=20) ds_all = [data_generators.random_affine_transformation(ds) for i in range(3)] _ = [zscore(sd, chunks_attr=None) for sd in ds_all] # Making random data per subject for testing with bias added to first subject ds_test = [np.random.rand(1, ds.nfeatures) for i in range(len(ds_all))] ds_test[0] += np.arange(1, ds.nfeatures + 1) * 100 assert(np.corrcoef(ds_test[2], ds_test[1])[0, 1] < 0.99) # that would have been ridiculous if it was # Test with varying alpha so we for sure to not have that issue now for alpha in (0, 0.01, 0.5, 0.99, 1.0): hyper09 = Hyperalignment(alpha=alpha) mappers = hyper09([sd for sd in ds_all]) ds_test_a = [m.forward(sd) for m, sd in zip(mappers, ds_test)] ds_test_a = [mappers[0].reverse(sd) for sd in ds_test_a] corr = np.corrcoef(ds_test_a[2], ds_test_a[1])[0, 1] assert(corr < 0.99)
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4)] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) # not sure yet why on windows only is not precise cmp_ = assert_array_equal if (not on_windows) else assert_array_almost_equal [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)] # "Mappers differ when using nproc>1." cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [ slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes) ] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
class SearchlightHyperalignment(ClassWithCollections): """ Given a list of datasets, provide a list of mappers into common space using searchlight based hyperalignment. :ref:`Guntupalli et al., Cerebral Cortex (2016)` 1) Input datasets should all be of the same size in terms of nsamples and nfeatures, and be coarsely aligned (using anatomy). 2) All features in all datasets should be zscored. 3) Datasets should have feature attribute `voxel_indices` containing spatial coordinates of all features """ # TODO: add {training_,}residual_errors .ca ? ## Parameters common with Hyperalignment but overriden ref_ds = Parameter( 0, constraints=EnsureInt() & EnsureRange(min=0), doc="""Index of a dataset to use as a reference. First dataset is used as default. If you supply exclude_from_model list, you should supply the ref_ds index as index before you remove those excluded datasets. Note that unlike regular Hyperalignment, there is no automagic choosing of the "best" ref_ds by default.""") ## Parameters specific to SearchlightHyperalignment queryengine = Parameter( None, doc="""A single (or a list of query engines, one per each dataset) to be used. If not provided, volumetric searchlight, with spherical neighborhood as instructed by radius parameter will be used.""") radius = Parameter( 3, constraints=EnsureInt() & EnsureRange(min=1), doc="""Radius of a searchlight sphere in number of voxels to be used if no `queryengine` argument was provided.""") nproc = Parameter(1, constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(), doc="""Number of cores to use.""") nblocks = Parameter( None, constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(), doc="""Number of blocks to divide to process. Higher number results in smaller memory consumption.""") sparse_radius = Parameter( None, constraints=(EnsureRange(min=1) & EnsureInt() | EnsureNone()), doc="""Radius supplied to scatter_neighborhoods in units of voxels. This is effectively the distance between the centers where hyperalignment is performed in searchlights. ATM applicable only if no custom queryengine was provided. If None, hyperalignment is performed at every voxel (default).""") hyperalignment = Parameter( Hyperalignment(ref_ds=None), doc="""Hyperalignment instance to be used in each searchlight sphere. Default is just the Hyperalignment instance with default parameters. Its `ref_ds` parameter would be overridden by the `ref_ds` parameter of this SearchlightHyperalignment instance because we want to be consistent and only need one `ref_ds`.""") combine_neighbormappers = Parameter( True, constraints=EnsureBool(), doc="""This param determines whether to combine mappers for each voxel from its neighborhood searchlights or just use the mapper for which it is the center voxel. This will not be applicable for certain queryengines whose ids and neighborhoods are from different spaces, such as for SurfaceVerticesQueryEngine""") compute_recon = Parameter( True, constraints=EnsureBool(), doc="""This param determines whether to compute reverse mappers for each subject from common-space to subject space. These will be stored in the StaticProjectionMapper() and used when reverse() is called. Enabling it will double the size of the mappers returned.""") featsel = Parameter( 1.0, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0) | EnsureInt() & EnsureRange(min=2), doc= """Determines if feature selection will be performed in each searchlight. 1.0: Use all features. < 1.0 is understood as selecting that proportion of features in each searchlight of ref_ds using feature scores; > 1.0 is understood as selecting at most that many features in each searchlight.""") # TODO: Should we get rid of this feature? use_same_features = Parameter( False, constraints=EnsureBool(), doc="""Select the same (best) features when doing feature selection for all datasets.""") exclude_from_model = Parameter( [], constraints=EnsureListOf(int), doc="""List of dataset indices that will not participate in building common model. These will still get mappers back but they don't influence the model or voxel selection.""") mask_node_ids = Parameter( None, constraints=EnsureListOf(int) | EnsureNone(), doc="""You can specify a mask to compute searchlight hyperalignment only within this mask. These would be a list of voxel indices.""") dtype = Parameter( 'float32', constraints='str', doc="""dtype of elements transformation matrices to save on memory for big datasets""") results_backend = Parameter( 'hdf5', constraints=EnsureChoice('hdf5', 'native'), doc="""'hdf5' or 'native'. See Searchlight documentation.""") tmp_prefix = Parameter( 'tmpsl', constraints='str', doc="""Prefix for temporary files. See Searchlight documentation.""") def __init__(self, **kwargs): _shpaldebug("Initializing.") ClassWithCollections.__init__(self, **kwargs) self.ndatasets = 0 self.nfeatures = 0 self.projections = None # This option makes the roi_seed in each SL to be selected during feature selection self.force_roi_seed = True if self.params.nproc is not None and self.params.nproc > 1 \ and not externals.exists('pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i) or set to default None" % self.params.nproc) if not externals.exists('scipy'): raise RuntimeError("The 'scipy' module is required for " "searchlight hyperalignment.") if self.params.results_backend == 'native': raise NotImplementedError( "'native' mode to handle results is still a " "work in progress.") #warning("results_backend is set to 'native'. This has been known" # "to result in longer run time when working with big datasets.") if self.params.results_backend == 'hdf5' and \ not externals.exists('h5py'): raise RuntimeError("The 'hdf5' module is required for " "when results_backend is set to 'hdf5'") def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, iblock='main'): if seed is not None: mvpa2.seed(seed) if __debug__: debug('SLC', 'Starting computing block for %i elements' % len(block)) bar = ProgressBar() projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=self.params.dtype) for isub in range(self.ndatasets) ] for i, node_id in enumerate(block): # retrieve the feature ids of all features in the ROI from the query # engine # Find the neighborhood for that selected nearest node roi_feature_ids_all = [qe[node_id] for qe in queryengines] # handling queryengines that return AttrDatasets for isub in range(len(roi_feature_ids_all)): if is_datasetlike(roi_feature_ids_all[isub]): # making sure queryengine returned proper shaped output assert (roi_feature_ids_all[isub].nsamples == 1) roi_feature_ids_all[isub] = roi_feature_ids_all[ isub].samples[0, :].tolist() if len(roi_feature_ids_all) == 1: # just one was provided to be "broadcasted" roi_feature_ids_all *= len(datasets) # if qe returns zero-sized ROI for any subject, pass... if any(len(x) == 0 for x in roi_feature_ids_all): continue # selecting neighborhood for all subject for hyperalignment ds_temp = [ sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all) ] if self.force_roi_seed: roi_seed = np.array( roi_feature_ids_all[self.params.ref_ds]) == node_id ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed if __debug__: msg = 'ROI (%i/%i), %i features' % ( i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures) debug('SLC', bar(float(i + 1) / len(block), msg), cr=True) hmappers = featselhyper(ds_temp) assert (len(hmappers) == len(datasets)) roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds] for isub, roi_feature_ids in enumerate(roi_feature_ids_all): if not self.params.combine_neighbormappers: I = roi_feature_ids #J = [roi_feature_ids[node_id]] * len(roi_feature_ids) J = [node_id] * len(roi_feature_ids) V = hmappers[isub].tolist() if np.isscalar(V): V = [V] else: I, J, V = [], [], [] for f2, roi_feature_id_ref_ds in enumerate( roi_feature_ids_ref_ds): I += roi_feature_ids J += [roi_feature_id_ref_ds] * len(roi_feature_ids) V += hmappers[isub][:, f2].tolist() proj = coo_matrix( (V, (I, J)), shape=(max(self.nfeatures, max(I) + 1), max(self.nfeatures, max(J) + 1)), dtype=self.params.dtype) proj = proj.tocsc() # Cleaning up the current subject's projections to free up memory hmappers[isub] = [[] for _ in hmappers] projections[isub] = projections[isub] + proj if self.params.results_backend == 'native': return projections elif self.params.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = mktemp(prefix=self.params.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, projections) if __debug__: debug('SLC_', "Results stored") return results_file else: raise RuntimeError("Must not reach this point") def __handle_results(self, results): if self.params.results_backend == 'hdf5': # 'results' must be just a filename assert (isinstance(results, str)) if __debug__: debug('SLC', "Loading results from %s" % results) results_data = h5load(results) os.unlink(results) if __debug__: debug('SLC_', "Loaded results of len=%d from" % len(results_data)) for isub, res in enumerate(results_data): self.projections[isub] = self.projections[isub] + res if __debug__: debug('SLC_', "Finished adding results") return def __handle_all_results(self, results): """Helper generator to decorate passing the results out to results_fx """ for r in results: yield self.__handle_results(r) @due.dcite( Doi('10.1093/cercor/bhw068'), description="Full cortex hyperalignment of data to a common space", tags=["implementation"]) def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) selected = [ _ for _ in range(ndatasets) if _ not in params.exclude_from_model ] ref_ds_train = selected.index(params.ref_ds) params.hyperalignment.params.ref_ds = ref_ds_train warning('Using %dth dataset as the reference dataset (%dth after ' 'excluding datasets)' % (params.ref_ds, ref_ds_train)) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning( '%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max( np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( ref_ds=params.ref_ds, featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines(datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)] if isinstance(qe, SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError( "Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True." ) self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[ params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets) ] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [ self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks ] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug( 'Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections ] return self.projections def _get_verified_ids(self, queryengines): """Helper to return ids of queryengines, verifying that they are the same""" qe0 = queryengines[0] roi_ids = qe0.ids for qe in queryengines: if qe is not qe0: # if a different query engine (so wasn't just replicated) if np.any(qe.ids != qe0.ids): raise RuntimeError( "Query engine %s provided different ids than %s. Not supported" % (qe0, qe)) return roi_ids def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds): """Helper to return trained query engine(s), either list of one or one per each dataset if queryengine is None then IndexQueryEngine based on radius is created """ ndatasets = len(datasets) if queryengine: if isinstance(queryengine, (list, tuple)): queryengines = queryengine if len(queryengines) != ndatasets: raise ValueError( "%d query engines were specified although %d datasets " "provided" % (len(queryengines), ndatasets)) _shpaldebug("Training provided query engines") for qe, ds in zip(queryengines, datasets): qe.train(ds) else: queryengine.train(datasets[ref_ds]) queryengines = [queryengine] else: _shpaldebug( 'No custom query engines were provided. Setting up the ' 'volumetric query engine on voxel_indices.') queryengine = IndexQueryEngine(voxel_indices=Sphere(radius)) queryengine.train(datasets[ref_ds]) queryengines = [queryengine] return queryengines
def test_basic_functioning(self, ref_ds, zscore_common, zscore_all): ha = Hyperalignment(ref_ds=ref_ds, zscore_all=zscore_all, zscore_common=zscore_common) if ref_ds is None: ref_ds = 0 # by default should be this one # get a dataset with some prominent trends in it ds4l = datasets['uni4large'] # lets select for now only meaningful features ds_orig = ds4l[:, ds4l.a.nonbogus_features] nf = ds_orig.nfeatures n = 4 # # of datasets to generate Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \ = [], [], [], [], [] # now lets compose derived datasets by using some random # rotation(s) for i in xrange(n): ## if False: # i == ref_ds: # # Do not rotate the target space so we could check later on # # if we transform back nicely # R = np.eye(ds_orig.nfeatures) ## else: ds_ = random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10) Rs.append(ds_.a.random_rotation) # reusing random data from dataset itself random_scales += [ds_.a.random_scale] random_shifts += [ds_.a.random_shift] random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]] ## if (zscore_common or zscore_all): ## # for later on testing of "precise" reconstruction ## zscore(ds_, chunks_attr=None) dss_rotated_clean.append(ds_) ds_ = ds_.copy() ds_.samples = ds_.samples + 0.1 * random_noise dss_rotated.append(ds_) # Lets test two scenarios -- in one with no noise -- we should get # close to perfect reconstruction. If noise was added -- not so good for noisy, dss in ((False, dss_rotated_clean), (True, dss_rotated)): # to verify that original datasets didn't get changed by # Hyperalignment store their idhashes of samples idhashes = [idhash(ds.samples) for ds in dss] idhashes_targets = [idhash(ds.targets) for ds in dss] mappers = ha(dss) idhashes_ = [idhash(ds.samples) for ds in dss] idhashes_targets_ = [idhash(ds.targets) for ds in dss] self.assertEqual( idhashes, idhashes_, msg="Hyperalignment must not change original data.") self.assertEqual( idhashes_targets, idhashes_targets_, msg="Hyperalignment must not change original data targets.") self.assertEqual(ref_ds, ha.ca.chosen_ref_ds) # Map data back dss_clean_back = [ m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated_clean) ] ds_norm = np.linalg.norm(dss[ref_ds].samples) nddss = [] ndcss = [] ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \ * random_scales[ref_ds] \ + random_shifts[ref_ds] if zscore_common or zscore_all: zscore(Dataset(ds_orig_Rref), chunks_attr=None) for ds_back in dss_clean_back: # if we used zscoring of common, we cannot rely # that range/offset could be matched, so lets use # corrcoef ndcs = np.diag(np.corrcoef(ds_back.samples.T, ds_orig_Rref.T)[nf:, :nf], k=0) ndcss += [ndcs] dds = ds_back.samples - ds_orig_Rref ndds = np.linalg.norm(dds) / ds_norm nddss += [ndds] snoisy = ('clean', 'noisy')[int(noisy)] do_labile = cfg.getboolean('tests', 'labile', default='yes') if not noisy or do_labile: # First compare correlations self.assertTrue( np.all(np.array(ndcss) >= (0.9, 0.85)[int(noisy)]), msg="Should have reconstructed original dataset more or" " less. Got correlations %s in %s case." % (ndcss, snoisy)) if not (zscore_all or zscore_common): # if we didn't zscore -- all of them should be really close self.assertTrue( np.all(np.array(nddss) <= (1e-10, 1e-1)[int(noisy)]), msg="Should have reconstructed original dataset well " "without zscoring. Got normed differences %s in %s case." % (nddss, snoisy)) elif do_labile: # otherwise they all should be somewhat close self.assertTrue( np.all(np.array(nddss) <= (.2, 3)[int(noisy)]), msg="Should have reconstructed original dataset more or" " less for all. Got normed differences %s in %s case." % (nddss, snoisy)) self.assertTrue( np.all(nddss[ref_ds] <= .09), msg="Should have reconstructed original dataset quite " "well even with zscoring. Got normed differences %s " "in %s case." % (nddss, snoisy)) # yoh: and leave 5% of difference for a chance and numerical # fluctuations ;) self.assertTrue( np.all(np.array(nddss) >= 0.95 * nddss[ref_ds]), msg="Should have reconstructed orig_ds best of all. " "Got normed differences %s in %s case with ref_ds=%d." % (nddss, snoisy, ref_ds)) # Lets see how well we do if asked to compute residuals ha = Hyperalignment( ref_ds=ref_ds, level2_niter=2, enable_ca=['training_residual_errors', 'residual_errors']) mappers = ha(dss_rotated_clean) self.assertTrue( np.all(ha.ca.training_residual_errors.sa.levels == ['1', '2:0', '2:1'])) rterrors = ha.ca.training_residual_errors.samples # just basic tests: self.assertEqual(rterrors[0, ref_ds], 0) self.assertEqual(rterrors.shape, (3, n)) rerrors = ha.ca.residual_errors.samples self.assertEqual(rerrors.shape, (1, n))
def _test_on_swaroop_data(self): # pragma: no cover # print "Running swaroops test on data we don't have" #from mvpa2.datasets.miscfx import zscore #from mvpa2.featsel.helpers import FixedNElementTailSelector # or just for lazy ones like yarik atm #enable to test from mvpa2.suite import * subj = ['cb', 'dm', 'hj', 'kd', 'kl', 'mh', 'ph', 'rb', 'se', 'sm'] ds = [] for sub in subj: ds.append( fmri_dataset(samples=sub + '_movie.nii.gz', mask=sub + '_mask_vt.nii.gz')) ''' Compute feature ranks in each dataset based on correlation with other datasets ''' feature_scores = [np.zeros(d.nfeatures) for d in ds] ''' for i in range(len(subj)): ds_temp = ds[i].samples - np.mean(ds[i].samples, axis=0) ds_temp = ds_temp / np.sqrt( np.sum( np.square(ds_temp), axis=0) ) for j in range(i+1,len(subj)): ds_temp2 = ds[j].samples - np.mean(ds[j].samples, axis=0) ds_temp2 = ds_temp2 / np.sqrt( np.sum( np.square(ds_temp2), axis=0) ) corr_temp= np.asarray(np.mat(np.transpose(ds_temp))*np.mat(ds_temp2)) feature_scores[i] = feature_scores[i] + np.max(corr_temp, axis = 1) feature_scores[j] = feature_scores[j] + np.max(corr_temp, axis = 0) ''' for i, sd in enumerate(ds): ds_temp = sd.copy() zscore(ds_temp, chunks_attr=None) for j, sd2 in enumerate(ds[i + 1:]): ds_temp2 = sd2.copy() zscore(ds_temp2, chunks_attr=None) corr_temp = np.dot(ds_temp.samples.T, ds_temp2.samples) feature_scores[i] = feature_scores[i] + \ np.max(corr_temp, axis = 1) feature_scores[j+i+1] = feature_scores[j+i+1] + \ np.max(corr_temp, axis = 0) for i, sd in enumerate(ds): sd.fa['bsc_scores'] = feature_scores[i] fselector = FixedNElementTailSelector(2000, tail='upper', mode='select') ds_fs = [sd[:, fselector(sd.fa.bsc_scores)] for sd in ds] hyper = Hyperalignment() mapper_results = hyper(ds_fs) md_cd = ColumnData('labels.txt', header=['label']) md_labels = [int(x) for x in md_cd['label']] for run in range(8): md_labels[192 * run:192 * run + 3] = [-1] * 3 mkdg_ds = [] for sub in subj: mkdg_ds.append( fmri_dataset(samples=sub + '_mkdg.nii.gz', targets=md_labels, chunks=np.repeat(range(8), 192), mask=sub + '_mask_vt.nii.gz')) m = mean_group_sample(['targets', 'chunks']) mkdg_ds = [ds_.get_mapped(m) for ds_ in mkdg_ds] mkdg_ds = [ds_[ds_.sa.targets != -1] for ds_ in mkdg_ds] [zscore(ds_, param_est=('targets', [0])) for ds_ in mkdg_ds] mkdg_ds = [ds_[ds_.sa.targets != 0] for ds_ in mkdg_ds] for i, sd in enumerate(mkdg_ds): sd.fa['bsc_scores'] = feature_scores[i] mkdg_ds_fs = [sd[:, fselector(sd.fa.bsc_scores)] for sd in mkdg_ds] mkdg_ds_mapped = [ sd.get_mapped(mapper_results[i]) for i, sd in enumerate(mkdg_ds_fs) ] # within-subject classification within_acc = [] clf = clfswh['multiclass', 'linear', 'NU_SVC'][0] cvterr = CrossValidation(clf, NFoldPartitioner(), enable_ca=['confusion']) for sd in mkdg_ds_fs: wsc = cvterr(sd) within_acc.append(1 - np.mean(wsc)) within_acc_mapped = [] for sd in mkdg_ds_mapped: wsc = cvterr(sd) within_acc_mapped.append(1 - np.mean(wsc)) print np.mean(within_acc) print np.mean(within_acc_mapped) mkdg_ds_all = vstack(mkdg_ds_mapped) mkdg_ds_all.sa['subject'] = np.repeat(range(10), 56) mkdg_ds_all.sa['chunks'] = mkdg_ds_all.sa['subject'] bsc = cvterr(mkdg_ds_all) print 1 - np.mean(bsc) mkdg_all = vstack(mkdg_ds_fs) mkdg_all.sa['chunks'] = np.repeat(range(10), 56) bsc_orig = cvterr(mkdg_all) print 1 - np.mean(bsc_orig) pass
aligned_dirname = os.path.join(utils.common_space_dir, 'parcel_{n:03d}'.format(n=parcel_num)) mapper_dirname = os.path.join(utils.trans_matrices, 'parcel_{n:03d}'.format(n=parcel_num)) for d in [aligned_dirname, mapper_dirname]: if not os.path.exists(d): os.makedirs(d) train_dss = [utils.prep_parcelwise_data(sub, parcel_num, 'sponpain') for sub in sub_list] print('-------- size of training data sets {A} -------------'.format(A=train_dss[0].shape)) print('-------- beginning hyperalignment parcel {A} --------'.format(A=parcel_num)) # train hyperalignment model on all subject's sponpain data for this parcel print('-------- length of train subjects={A} '.format(A=str(len(train_dss)))) ha = Hyperalignment(nproc=NPROC, joblib_backend='multiprocessing') debug.active += ['HPAL'] t0 = time.time() ha.train(train_dss) mappers = ha(train_dss) t1 = time.time() print('-------- done training hyperalignment at {B} --------'.format(B=str(timedelta(seconds=t1-t0)))) del train_dss pool = mp.Pool(NPROC) data_fns = [os.path.join(aligned_dirname,'{s}_aligned_cleaned_bladder_ts_noZ.hdf5'.format(s=s)) for s in sub_list] mapper_fns = [os.path.join(mapper_dirname,'{s}_trained_mapper.hdf5noZ.gz'.format(s=s)) for s in sub_list] iterable = zip(data_fns, mapper_fns, sub_list, mappers, np.repeat(parcel_num, len(mappers))) pool.map(apply_mappers, iterable) t2=time.time() print('-------- done aligning & saving test data at {B} --------'.format(B=str(timedelta(seconds=t2-t1))))
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) if params.ref_ds != params.hyperalignment.params.ref_ds: warning( 'Supplied ref_ds & hyperalignment instance ref_ds:%d differ.' % params.hyperalignment.params.ref_ds) warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds) params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning( '%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max( np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines(datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError( "Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True." ) self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[ params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets) ] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [ self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks ] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug( 'Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections ] return self.projections
df_results.loc[parcel, 'Voxels_in_parcel'] = sum(indices[PARCEL_NUMBER]) myvoxels = np.nonzero(indices[PARCEL_NUMBER]) dss = [] for sub in range(len(mats)): ds = mats[sub][:, myvoxels[0]] ds = mv.Dataset(ds) ds.fa['voxel_indices'] = range(ds.shape[1]) mv.zscore(ds, chunks_attr=None) dss.append(ds) print('Size of Training data sets: {0}'.format(dss[0].shape)) print('Beginning Hyperalignment.') # create hyperalignment instance hyper = Hyperalignment(nproc=1, ) hyper.train(dss) # get mappers to common space created by hyper.train (2x procrustes iteration) mappers = hyper(dss) # apply mappers back onto training data ds_hyper = [h.forward(sd) for h, sd in zip(mappers, dss)] train_aa_isc = compute_average_similarity(dss) train_ha_isc = compute_average_similarity(ds_hyper) df_results.loc[parcel, 'Train_AA_ISC'] = np.mean(train_aa_isc) df_results.loc[parcel, 'Train_HA_ISC'] = np.mean(train_ha_isc) # create test dss
def run(args): print(args.data) dss = [arg2ds(d)[:, :100] for d in args.data] verbose(1, "Loaded %i input datasets" % len(dss)) if __debug__: for i, ds in enumerate(dss): debug('CMDLINE', "dataset %i: %s" % (i, str(ds))) # TODO at this point more check could be done, e.g. ref_ds > len(dss) # assemble parameters params = dict([(param, getattr(args, param)) for param in _supported_parameters]) if __debug__: debug('CMDLINE', "configured parameters: '%s'" % params) # assemble CAs enabled_ca = [ca for ca in _supported_cas if getattr(args, ca)] if __debug__: debug('CMDLINE', "enabled conditional attributes: '%s'" % enabled_ca) hyper = Hyperalignment(enable_ca=enabled_ca, alignment=ProcrusteanMapper(svd='dgesvd', space='commonspace'), **params) verbose(1, "Running hyperalignment") promappers = hyper(dss) verbose(2, "Alignment reference is dataset %i" % hyper.ca.chosen_ref_ds) verbose(1, "Writing output") # save on memory and remove the training data del dss if args.commonspace: if __debug__: debug('CMDLINE', "write commonspace as hdf5") h5save('%s%s.hdf5' % (args.output_prefix, _output_specs['commonspace']['output_suffix']), hyper.commonspace, compression=args.hdf5_compression) for ca in _supported_cas: if __debug__: debug('CMDLINE', "check conditional attribute: '%s'" % ca) if getattr(args, ca): if __debug__: debug('CMDLINE', "store conditional attribute: '%s'" % ca) np.savetxt( '%s%s' % (args.output_prefix, _supported_cas[ca]['output_suffix']), hyper.ca[ca].value.samples) if args.store_transformation: for i, pm in enumerate(promappers): if __debug__: debug('CMDLINE', "store mapper %i: %s" % (i, str(pm))) h5save('%s%s.hdf5' % (args.output_prefix, '_map%.3i' % i), pm, compression=args.hdf5_compression) if args.transform: tdss, dss = _transform_dss(args.transform, promappers, args) del dss verbose(1, "Store transformed datasets") for i, td in enumerate(tdss): if __debug__: debug('CMDLINE', "store transformed data %i: %s" % (i, str(td))) h5save('%s%s.hdf5' % (args.output_prefix, '_transformed%.3i' % i), td, compression=args.hdf5_compression)
def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]]*n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [random_affine_transformation(dss_rotated[i]) for i in xrange(n)] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag( np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)