def test_verbose_below(self): """Test if outputs at lower levels and indents by default with spaces """ verbose(2, self.msg) self.failUnlessEqual(self.sout.getvalue(), " %s\n" % self.msg)
def test_verbose_below(self): """Test if outputs at lower levels and indents by default with spaces """ verbose(2, self.msg) self.assertEqual(self.sout.getvalue(), " %s\n" % self.msg)
def bxplot(stats, label): stats = concat_ts_boxplot_stats(stats) # XXX need some way to expose whether there were missing subjects and # report proper IDs -- for now resort to whining verbose( 0, "List of outlier time series follows (if any) [note, subject IDs are enumarations and may differ from dataset subject IDs in case of missing subjects]" ) for i, run in enumerate([ np.where(np.sum(np.logical_not(o.mask), axis=0)) for o in stats[1] ]): sids = run[0] if len(sids): verbose(0, "%s r%.3i: %s" % (label, i + 1, [s + 1 for s in sids])) timeseries_boxplot(stats[0]['median'], mean=stats[0]['mean'], std=stats[0]['std'], n=stats[0]['n'], min=stats[0]['min'], max=stats[0]['max'], p25=stats[0]['p25'], p75=stats[0]['p75'], outlierd=stats[1], segment_sizes=segment_sizes) pl.title(label) xp, xl = pl.xticks() pl.xticks(xp, ['' for i in xl]) pl.xlim((0, len(stats[0]['n']))) pl.ylabel(plt_props[label])
def test_verbose_indent(self): """Test indent symbol """ verbose.indent = "." verbose(2, self.msg) self.assertEqual(self.sout.getvalue(), "..%s\n" % self.msg) verbose.indent = " " # restore
def run(args): verbose(1, "Loading %d result files" % len(args.data)) # TODO: support hdf5 datasets nis = [nib.load(f) for f in args.data] data = np.asarray([ni.get_data() for ni in nis]) if args.mask: mask = nib.load(args.mask).get_data() out_of_mask = mask == 0 else: # just take where no voxel had a value out_of_mask = np.sum(data != 0, axis=0)==0 t, p = ttest_1samp(data, popmean=args.chance_level, axis=0, alternative=args.alternative) if args.stat == 'z': if args.alternative == 'two-sided': s = stats.norm.isf(p/2) else: s = stats.norm.isf(p) # take the sign of the original t s = np.abs(s) * np.sign(t) elif args.stat == 'p': s = p elif args.stat == 't': s = t else: raise ValueError('WTF you gave me? have no clue about %r' % (args.stat,)) s[out_of_mask] = 0 verbose(1, "Saving to %s" % args.output) nib.Nifti1Image(s, None, header=nis[0].header).to_filename(args.output) return s
def run(args): """Run it""" verbose(1, "Loading %d result files" % len(args.data)) filetype_in = guess_backend(args.data[0]) if filetype_in == 'nifti': dss = [fmri_dataset(f) for f in args.data] elif filetype_in == 'hdf5': dss = [h5load(f) for f in args.data] data = np.asarray([d.samples[args.isample] for d in dss]) if args.mask: filetype_mask = guess_backend(args.mask) if filetype_mask == 'nifti': mask = nib.load(args.mask).get_data() elif filetype_mask == 'hdf5': mask = h5load(args.mask).samples out_of_mask = mask == 0 else: # just take where no voxel had a value out_of_mask = np.sum(data != 0, axis=0) == 0 t, p = ttest_1samp(data, popmean=args.chance_level, axis=0, alternative=args.alternative) if args.stat == 'z': if args.alternative == 'two-sided': s = stats.norm.isf(p / 2) else: s = stats.norm.isf(p) # take the sign of the original t s = np.abs(s) * np.sign(t) elif args.stat == 'p': s = p elif args.stat == 't': s = t else: raise ValueError('WTF you gave me? have no clue about %r' % (args.stat, )) if s.shape != out_of_mask.shape: try: out_of_mask = out_of_mask.reshape(s.shape) except ValueError: raise ValueError('Cannot use mask of shape {0} with ' 'data of shape {1}'.format( out_of_mask.shape, s.shape)) s[out_of_mask] = 0 verbose(1, "Saving to %s" % args.output) filetype_out = guess_backend(args.output) if filetype_out == 'nifti': map2nifti(dss[0], data=s).to_filename(args.output) else: # filetype_out is hdf5 s = Dataset(np.atleast_2d(s), fa=dss[0].fa, a=dss[0].a) h5save(args.output, s) return s
def test_verbose_indent(self): """Test indent symbol """ verbose.indent = "." verbose(2, self.msg) self.failUnlessEqual(self.sout.getvalue(), "..%s\n" % self.msg) verbose.indent = " " # restore
def run(args): print args.data dss = [arg2ds(d)[:,:100] for d in args.data] verbose(1, "Loaded %i input datasets" % len(dss)) if __debug__: for i, ds in enumerate(dss): debug('CMDLINE', "dataset %i: %s" % (i, str(ds))) # TODO at this point more check could be done, e.g. ref_ds > len(dss) # assemble parameters params = dict([(param, getattr(args, param)) for param in _supported_parameters]) if __debug__: debug('CMDLINE', "configured parameters: '%s'" % params) # assemble CAs enabled_ca = [ca for ca in _supported_cas if getattr(args, ca)] if __debug__: debug('CMDLINE', "enabled conditional attributes: '%s'" % enabled_ca) hyper = Hyperalignment(enable_ca=enabled_ca, alignment=ProcrusteanMapper(svd='dgesvd', space='commonspace'), **params) verbose(1, "Running hyperalignment") promappers = hyper(dss) verbose(2, "Alignment reference is dataset %i" % hyper.ca.chosen_ref_ds) verbose(1, "Writing output") # save on memory and remove the training data del dss if args.commonspace: if __debug__: debug('CMDLINE', "write commonspace as hdf5") h5save('%s%s.hdf5' % (args.output_prefix, _output_specs['commonspace']['output_suffix']), hyper.commonspace, compression=args.hdf5_compression) for ca in _supported_cas: if __debug__: debug('CMDLINE', "check conditional attribute: '%s'" % ca) if getattr(args, ca): if __debug__: debug('CMDLINE', "store conditional attribute: '%s'" % ca) np.savetxt('%s%s' % (args.output_prefix, _supported_cas[ca]['output_suffix']), hyper.ca[ca].value.samples) if args.store_transformation: for i, pm in enumerate(promappers): if __debug__: debug('CMDLINE', "store mapper %i: %s" % (i, str(pm))) h5save('%s%s.hdf5' % (args.output_prefix, '_map%.3i' % i), pm, compression=args.hdf5_compression) if args.transform: tdss, dss = _transform_dss(args.transform, promappers, args) del dss verbose(1, "Store transformed datasets") for i, td in enumerate(tdss): if __debug__: debug('CMDLINE', "store transformed data %i: %s" % (i, str(td))) h5save('%s%s.hdf5' % (args.output_prefix, '_transformed%.3i' % i), td, compression=args.hdf5_compression)
def _transform_dss(srcs, mappers, args): if __debug__: debug('CMDLINE', "loading to-be-transformed data from %s" % srcs) dss = [arg2ds(d) for d in srcs] verbose(1, "Loaded %i to-be-transformed datasets" % len(dss)) if __debug__: debug('CMDLINE', "transform datasets") tdss = [ mappers[i].forward(td) for i, td in enumerate(dss)] return tdss, dss
def run(args): ds = arg2ds(args.data) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) if args.numpy_xfm is not None: from mvpa2.mappers.fx import FxMapper fx, axis = args.numpy_xfm mapper = FxMapper(axis, fx) ds = ds.get_mapped(mapper) info_fx[args.report](ds, args)
def _transform_dss(srcs, mappers, args): if __debug__: debug('CMDLINE', "loading to-be-transformed data from %s" % srcs) dss = [arg2ds(d) for d in srcs] verbose(1, "Loaded %i to-be-transformed datasets" % len(dss)) if __debug__: debug('CMDLINE', "transform datasets") tdss = [mappers[i].forward(td) for i, td in enumerate(dss)] return tdss, dss
def run(args): """Run it""" verbose(1, "Loading %d result files" % len(args.data)) filetype_in = guess_backend(args.data[0]) if filetype_in == 'nifti': dss = [fmri_dataset(f) for f in args.data] elif filetype_in == 'hdf5': dss = [h5load(f) for f in args.data] data = np.asarray([d.samples[args.isample] for d in dss]) if args.mask: filetype_mask = guess_backend(args.mask) if filetype_mask == 'nifti': mask = nib.load(args.mask).get_data() elif filetype_mask == 'hdf5': mask = h5load(args.mask).samples out_of_mask = mask == 0 else: # just take where no voxel had a value out_of_mask = np.sum(data != 0, axis=0)==0 t, p = ttest_1samp(data, popmean=args.chance_level, axis=0, alternative=args.alternative) if args.stat == 'z': if args.alternative == 'two-sided': s = stats.norm.isf(p/2) else: s = stats.norm.isf(p) # take the sign of the original t s = np.abs(s) * np.sign(t) elif args.stat == 'p': s = p elif args.stat == 't': s = t else: raise ValueError('WTF you gave me? have no clue about %r' % (args.stat,)) if s.shape != out_of_mask.shape: try: out_of_mask = out_of_mask.reshape(s.shape) except ValueError: raise ValueError('Cannot use mask of shape {0} with ' 'data of shape {1}'.format(out_of_mask.shape, s.shape)) s[out_of_mask] = 0 verbose(1, "Saving to %s" % args.output) filetype_out = guess_backend(args.output) if filetype_out == 'nifti': map2nifti(dss[0], data=s).to_filename(args.output) else: # filetype_out is hdf5 s = Dataset(np.atleast_2d(s), fa=dss[0].fa, a=dss[0].a) h5save(args.output, s) return s
def test_cr(self): """Test if works fine with carriage return (cr) symbol""" verbose(2, self.msg, cr=True) verbose(2, "rewrite", cr=True) verbose(1, "rewrite 2", cr=True) verbose(1, " add", cr=False, lf=False) verbose(1, " finish") target = '\r %s\r \rrewrite' % self.msg + \ '\r \rrewrite 2 add finish\n' self.failUnlessEqual(self.sout.getvalue(), target)
def run(args): ds = arg2ds(args.data) verbose(3, "Concatenation yielded %i samples with %i features" % ds.shape) if args.numpy_xfm is not None: from mvpa2.mappers.fx import FxMapper fx, axis = args.numpy_xfm mapper = FxMapper(axis, fx) ds = ds.get_mapped(mapper) info_fx[args.report](ds, args)
def test_cr(self): """Test if works fine with carriage return (cr) symbol""" verbose(2, self.msg, cr=True) verbose(2, "rewrite", cr=True) verbose(1, "rewrite 2", cr=True) verbose(1, " add", cr=False, lf=False) verbose(1, " finish") target = '\r %s\r \rrewrite' % self.msg + \ '\r \rrewrite 2 add finish\n' self.assertEqual(self.sout.getvalue(), target)
def _set_active(self, active): """Set active logging set """ # just unique entries... we could have simply stored Set I guess, # but then smth like debug.active += ["BLAH"] would not work from mvpa2.base import verbose self.__active = [] registered_keys = self.__registered.keys() for item in list(set(active)): if item == '': continue if isinstance(item, string_types): if item in ['?', 'list', 'help']: self.print_registered(detailed=(item != '?')) raise SystemExit(0) if item.upper() == "ALL": verbose(2, "Enabling all registered debug handlers") self.__active = registered_keys break # try to match item as it is regexp regexp_str = "^%s$" % item try: regexp = re.compile(regexp_str) except: raise ValueError( "Unable to create regular expression out of %s" % item) matching_keys = filter(regexp.match, registered_keys) toactivate = matching_keys if len(toactivate) == 0: ids = self.registered.keys() ids.sort() raise ValueError( "Unknown debug ID '%s' was asked to become active," \ " or regular expression '%s' did not get any match" \ " among known ids: %s" \ % (item, regexp_str, ids)) else: toactivate = [item] # Lets check if asked items are known for item_ in toactivate: if not (item_ in registered_keys): raise ValueError( "Unknown debug ID %s was asked to become active" \ % item_) self.__active += toactivate self.__active = list(set(self.__active)) # select just unique ones self.__maxstrlength = max([len(str(x)) for x in self.__active] + [0]) if len(self.__active): verbose(2, "Enabling debug handlers: %s" % repr(self.__active))
def _set_active(self, active): """Set active logging set """ # just unique entries... we could have simply stored Set I guess, # but then smth like debug.active += ["BLAH"] would not work from mvpa2.base import verbose self.__active = [] registered_keys = self.__registered.keys() for item in list(set(active)): if item == '': continue if isinstance(item, basestring): if item in ['?', 'list', 'help']: self.print_registered(detailed=(item != '?')) raise SystemExit(0) if item.upper() == "ALL": verbose(2, "Enabling all registered debug handlers") self.__active = registered_keys break # try to match item as it is regexp regexp_str = "^%s$" % item try: regexp = re.compile(regexp_str) except: raise ValueError, \ "Unable to create regular expression out of %s" % item matching_keys = filter(regexp.match, registered_keys) toactivate = matching_keys if len(toactivate) == 0: ids = self.registered.keys() ids.sort() raise ValueError, \ "Unknown debug ID '%s' was asked to become active," \ " or regular expression '%s' did not get any match" \ " among known ids: %s" \ % (item, regexp_str, ids) else: toactivate = [item] # Lets check if asked items are known for item_ in toactivate: if not (item_ in registered_keys): raise ValueError, \ "Unknown debug ID %s was asked to become active" \ % item_ self.__active += toactivate self.__active = list(set(self.__active)) # select just unique ones self.__maxstrlength = max([len(str(x)) for x in self.__active] + [0]) if len(self.__active): verbose(2, "Enabling debug handlers: %s" % `self.__active`)
def run(args): import pylab as pl from mvpa2.base import verbose # segments x [subjects x timepoints x properties] data = [np.array(s) for s in args.segment] # put in standard property order: first translation, then rotation if args.estimate_order == 'rottrans': data = [d[:, :, (3, 4, 5, 0, 1, 2)] for d in data] # convert rotations, now known to be last if args.rad2deg: for d in data: v = d[:, :, 3:] np.rad2deg(v, v) # and plot # figure setup fig = pl.figure(figsize=(12, 5)) # translation ax = pl.subplot(211) outlier = motionqc_plot( [d[..., :3] for d in data], args.outlier_minthresh, args.outlier_stdthresh, "translation\nestimate L2-norm") if outlier: verbose( 0, "Detected per-segment translation outlier input samples {0} (zero-based)".format( outlier)) # rotation ax = pl.subplot(212) outlier = motionqc_plot( [d[..., 3:] for d in data], args.outlier_minthresh, args.outlier_stdthresh, "rotation\nestimate L2-norm") if outlier: verbose( 0, "Detected per-segment rotation outlier input samples {0} (zero-based)".format( outlier)) if args.savefig is None: pl.show() else: pl.savefig(args.savefig[0])
def test_no_lf(self): """Test if it works fine with no newline (LF) symbol""" verbose(2, self.msg, lf=False) verbose(2, " continue ", lf=False) verbose(2, "end") verbose(0, "new %s" % self.msg) self.assertEqual(self.sout.getvalue(), " %s continue end\nnew %s\n" % \ (self.msg, self.msg))
def test_no_lf(self): """Test if it works fine with no newline (LF) symbol""" verbose(2, self.msg, lf=False) verbose(2, " continue ", lf=False) verbose(2, "end") verbose(0, "new %s" % self.msg) self.failUnlessEqual(self.sout.getvalue(), " %s continue end\nnew %s\n" % \ (self.msg, self.msg))
def bxplot(stats, label): stats = concat_ts_boxplot_stats(stats) verbose(0, "List of outlier time series follows (if any)") for i, run in enumerate([np.where(np.sum(np.logical_not(o.mask), axis=0)) for o in stats[1]]): sids = run[0] if len(sids): verbose(0, "%s r%.3i: %s" % (label, i + 1, [s + 1 for s in sids])) timeseries_boxplot(stats[0]['median'], mean=stats[0]['mean'], std=stats[0]['std'], n=stats[0]['n'], min=stats[0]['min'], max=stats[0]['max'], p25=stats[0]['p25'], p75=stats[0]['p75'], outlierd=stats[1], segment_sizes=segment_sizes) pl.title(label) xp, xl = pl.xticks() pl.xticks(xp, ['' for i in xl]) pl.xlim((0, len(stats[0]['n']))) pl.ylabel(plt_props[label])
def ds2hdf5(ds, fname, compression=None): """Save one or more datasets into an HDF5 file. Parameters ---------- ds : Dataset or list(Dataset) One or more datasets to store fname : str Filename of the output file. If it doesn't end with '.hdf5', such an extension will be appended. compression : {'gzip','lzf','szip'} or 1-9 compression type for HDF5 storage. Available values depend on the specific HDF5 installation. """ # this one doesn't actually check what it stores from mvpa2.base.hdf5 import h5save if not fname.endswith('.hdf5'): fname = '%s.hdf5' % fname verbose(1, "Save dataset to '%s'" % fname) h5save(fname, ds, mkdir=True, compression=compression)
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # get CV instance cv = get_crossvalidation_instance(args.learner, args.partitioner, args.errorfx, args.sampling_repetitions, args.learner_space, args.balance_training, args.permutations, args.avg_datafold_results, args.prob_tail) res = cv(ds) # some meaningful output # XXX make condition on classification analysis only? print cv.ca.stats print 'Results\n-------' if args.permutations > 0: nprob = cv.ca.null_prob.samples if res.shape[1] == 1: # simple result structure if args.permutations > 0: p = ', p-value (%s tail)' % args.prob_tail else: p = '' print 'Fold, Result%s' % p for i in xrange(len(res)): if args.permutations > 0: p = ', %f' % nprob[i, 0] else: p = '' print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p) # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) if args.permutations > 0: if args.output.endswith('.hdf5'): args.output = args.output[:-5] ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output, compression=args.hdf5_compression) return res
def bxplot(stats, label): stats = concat_ts_boxplot_stats(stats) # XXX need some way to expose whether there were missing subjects and # report proper IDs -- for now resort to whining verbose(0, "List of outlier time series follows (if any) [note, subject IDs are enumarations and may differ from dataset subject IDs in case of missing subjects]") for i, run in enumerate([np.where(np.sum(np.logical_not(o.mask), axis=0)) for o in stats[1]]): sids = run[0] if len(sids): verbose(0, "%s r%.3i: %s" % (label, i + 1, [s + 1 for s in sids])) timeseries_boxplot(stats[0]['median'], mean=stats[0]['mean'], std=stats[0]['std'], n=stats[0]['n'], min=stats[0]['min'], max=stats[0]['max'], p25=stats[0]['p25'], p75=stats[0]['p75'], outlierd=stats[1], segment_sizes=segment_sizes) pl.title(label) xp, xl = pl.xticks() pl.xticks(xp, ['' for i in xl]) pl.xlim((0, len(stats[0]['n']))) pl.ylabel(plt_props[label])
def run(args): verbose(1, "Loading %d result files" % len(args.data)) # TODO: support hdf5 datasets nis = [nib.load(f) for f in args.data] data = np.asarray([ni.get_data() for ni in nis]) if args.mask: mask = nib.load(args.mask).get_data() out_of_mask = mask == 0 else: # just take where no voxel had a value out_of_mask = np.sum(data != 0, axis=0) == 0 t, p = ttest_1samp(data, popmean=args.chance_level, axis=0, alternative=args.alternative) if args.stat == 'z': if args.alternative == 'two-sided': s = stats.norm.isf(p / 2) else: s = stats.norm.isf(p) # take the sign of the original t s = np.abs(s) * np.sign(t) elif args.stat == 'p': s = p elif args.stat == 't': s = t else: raise ValueError('WTF you gave me? have no clue about %r' % (args.stat, )) s[out_of_mask] = 0 verbose(1, "Saving to %s" % args.output) nib.Nifti1Image(s, None, header=nis[0].get_header()).to_filename(args.output) return s
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # get CV instance cv = get_crossvalidation_instance( args.learner, args.partitioner, args.errorfx, args.sampling_repetitions, args.learner_space, args.balance_training, args.permutations, args.avg_datafold_results, args.prob_tail) res = cv(ds) # some meaningful output # XXX make condition on classification analysis only? print cv.ca.stats print 'Results\n-------' if args.permutations > 0: nprob = cv.ca.null_prob.samples if res.shape[1] == 1: # simple result structure if args.permutations > 0: p=', p-value (%s tail)' % args.prob_tail else: p='' print 'Fold, Result%s' % p for i in xrange(len(res)): if args.permutations > 0: p = ', %f' % nprob[i, 0] else: p = '' print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p) # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) if args.permutations > 0: if args.output.endswith('.hdf5'): args.output = args.output[:-5] ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output, compression=args.hdf5_compression) return res
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # slicing sliceme = {'samples': slice(None), 'features': slice(None)} # indices for opt, col, which in ((args.samples_by_index, ds.sa, 'samples'), (args.features_by_index, ds.fa, 'features')): if opt is None: continue if len(opt) == 1 and opt[0].count(':'): # slice spec arg = opt[0].split(':') spec = [] for a in arg: if not len(a): spec.append(None) else: spec.append(int(a)) sliceme[which] = slice(*spec) else: # actual indices sliceme[which] = [int(o) for o in opt] # attribute evaluation for opt, col, which in ((args.samples_by_attr, ds.sa, 'samples'), (args.features_by_attr, ds.fa, 'features')): if opt is None: continue sliceme[which] = _eval_attr_expr(opt, col) # apply selection ds = ds.__getitem__((sliceme['samples'], sliceme['features'])) verbose(1, 'Selected %i samples with %i features' % ds.shape) # strip attributes for attrarg, col, descr in ((args.strip_sa, ds.sa, 'sample '), (args.strip_fa, ds.fa, 'feature '), (args.strip_da, ds.a, '')): if not attrarg is None: for attr in attrarg: try: del col[attr] except KeyError: warning("dataset has no %sattribute '%s' to remove" % (descr, attr)) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
def process_common_dsattr_opts(ds, args): """Goes through an argument namespace and processes attribute options""" # legacy support if not args.add_sa_attr is None: from mvpa2.misc.io.base import SampleAttributes smpl_attrs = SampleAttributes(args.add_sa_attr) for a in ('targets', 'chunks'): verbose( 2, "Add sample attribute '%s' from sample attributes file" % a) ds.sa[a] = getattr(smpl_attrs, a) # loop over all attribute configurations that we know attr_cfgs = ( # var, dst_collection, loader ('--add-sa', args.add_sa, ds.sa, _load_from_cmdline), ('--add-fa', args.add_fa, ds.fa, _load_from_cmdline), ('--add-sa-txt', args.add_sa_txt, ds.sa, _load_from_txt), ('--add-fa-txt', args.add_fa_txt, ds.fa, _load_from_txt), ('--add-sa-npy', args.add_sa_npy, ds.sa, _load_from_npy), ('--add-fa-npy', args.add_fa_npy, ds.fa, _load_from_npy), ) for varid, srcvar, dst_collection, loader in attr_cfgs: if not srcvar is None: for spec in srcvar: attr_name = spec[0] if not len(spec) > 1: raise argparse.ArgumentTypeError( "%s option need at least two values " % varid + "(attribute name and source filename (got: %s)" % spec) if dst_collection is ds.sa: verbose( 2, "Add sample attribute '%s' from '%s'" % (attr_name, spec[1])) else: verbose( 2, "Add feature attribute '%s' from '%s'" % (attr_name, spec[1])) attr = loader(spec[1:]) try: dst_collection[attr_name] = attr except ValueError, e: # try making the exception more readable e_str = str(e) if e_str.startswith('Collectable'): raise ValueError('attribute %s' % e_str[12:]) else: raise e
def process_common_dsattr_opts(ds, args): """Goes through an argument namespace and processes attribute options""" # legacy support if not args.add_sa_attr is None: from mvpa2.misc.io.base import SampleAttributes smpl_attrs = SampleAttributes(args.add_sa_attr) for a in ('targets', 'chunks'): verbose(2, "Add sample attribute '%s' from sample attributes file" % a) ds.sa[a] = getattr(smpl_attrs, a) # loop over all attribute configurations that we know attr_cfgs = (# var, dst_collection, loader ('--add-sa', args.add_sa, ds.sa, _load_from_cmdline), ('--add-fa', args.add_fa, ds.fa, _load_from_cmdline), ('--add-sa-txt', args.add_sa_txt, ds.sa, _load_from_txt), ('--add-fa-txt', args.add_fa_txt, ds.fa, _load_from_txt), ('--add-sa-npy', args.add_sa_npy, ds.sa, _load_from_npy), ('--add-fa-npy', args.add_fa_npy, ds.fa, _load_from_npy), ) for varid, srcvar, dst_collection, loader in attr_cfgs: if not srcvar is None: for spec in srcvar: attr_name = spec[0] if not len(spec) > 1: raise argparse.ArgumentTypeError( "%s option need at least two values " % varid + "(attribute name and source filename (got: %s)" % spec) if dst_collection is ds.sa: verbose(2, "Add sample attribute '%s' from '%s'" % (attr_name, spec[1])) else: verbose(2, "Add feature attribute '%s' from '%s'" % (attr_name, spec[1])) attr = loader(spec[1:]) try: dst_collection[attr_name] = attr except ValueError, e: # try making the exception more readable e_str = str(e) if e_str.startswith('Collectable'): raise ValueError('attribute %s' % e_str[12:]) else: raise e
def aux_basic(self, dirname, rc): """Helper function -- to assure that all filehandlers get closed so we could remove trash directory. Otherwise -- .nfs* files on NFS-mounted drives cause problems """ report = rc('UnitTest report', title="Sample report for testing", path=dirname) isdummy = isinstance(report, DummyReport) verbose.handlers = [report] verbose.level = 3 verbose(1, "Starting") verbose(2, "Level 2") if not isdummy: self.failUnless(len(report._story) == 2, msg="We should have got some lines from verbose") if __debug__: odhandlers = debug.handlers debug.handlers = [report] oactive = debug.active debug.active = ['TEST'] + debug.active debug('TEST', "Testing report as handler for debug") if not isdummy: self.failUnless(len(report._story) == 4, msg="We should have got some lines from debug") debug.active = oactive debug.handlers = odhandlers os.makedirs(dirname) if externals.exists('pylab plottable'): if not isdummy: clen = len(report._story) import pylab as pl pl.ioff() pl.close('all') pl.figure() pl.plot([1, 2], [3, 2]) pl.figure() pl.plot([2, 10], [3, 2]) pl.title("Figure 2 must be it") report.figures() if not isdummy: self.failUnless( len(report._story) == clen+2, msg="We should have got some lines from figures") report.text("Dugi bugi") # make sure we don't puke on xml like text with crap report.text("<kaj>$lkj&*()^$%#%</kaj>") report.text("locals:\n%s globals:\n%s" % (`locals()`, `globals()`)) # bloody XML - just to check that there is no puke report.xml("<b>Dugi bugi</b>") report.save() if externals.exists('pylab'): import pylab as pl pl.close('all') pl.ion() pass
def run(args): ds = arg2ds(args.data) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # build list of events events = [] timebased_events = False if args.event_attrs is not None: def_attrs = dict([(k, ds.sa[k].value) for k in args.event_attrs]) events = find_events(**def_attrs) elif args.csv_events is not None: if args.csv_events == '-': csv = sys.stdin.read() import cStringIO csv = cStringIO.StringIO(csv) else: csv = open(args.csv_events, 'rU') csvt = _load_csv_table(csv) if not len(csvt): raise ValueError("no CSV columns found") if args.onset_column: csvt['onset'] = csvt[args.onset_column] nevents = len(csvt[csvt.keys()[0]]) events = [] for ev in xrange(nevents): events.append(dict([(k, v[ev]) for k, v in csvt.iteritems()])) elif args.onsets is not None: if not len(args.onsets): args.onsets = [i for i in sys.stdin] # time or sample-based? if args.time_attr is None: oconv = int else: oconv = float events = [{'onset': oconv(o)} for o in args.onsets] elif args.fsl_ev3 is not None: timebased_events = True from mvpa2.misc.fsl import FslEV3 events = [] for evsrc in args.fsl_ev3: events.extend(FslEV3(evsrc).to_events()) if not len(events): raise ValueError("no events defined") verbose(2, 'Extracting %i events' % len(events)) if args.event_compression is None: evmap = None elif args.event_compression == 'mean': evmap = FxMapper('features', np.mean, attrfx=merge2first) elif args.event_compression == 'median': evmap = FxMapper('features', np.median, attrfx=merge2first) elif args.event_compression == 'min': evmap = FxMapper('features', np.min, attrfx=merge2first) elif args.event_compression == 'max': evmap = FxMapper('features', np.max, attrfx=merge2first) # convert to event-related ds evds = eventrelated_dataset(ds, events, time_attr=args.time_attr, match=args.match_strategy, event_offset=args.offset, event_duration=args.duration, event_mapper=evmap) # act on all attribute options evds = process_common_dsattr_opts(evds, args) # and store ds2hdf5(evds, args.output, compression=args.hdf5_compression) return evds
def run(args): #atlas.relativeToOrigin = args.coordRelativeToOrigin fileIn = None coordT = None niftiInput = None # define data type for coordinates if args.input_voxels: ctype = int query_voxel = True else: ctype = float query_voxel = False # Setup coordinates read-in volQForm = None # # compatibility with older talairachlabel if args.inputCoordFile: fileIn = open(args.inputCoordFile) coordsIterator = parsed_coordinates_iterator( args.inputLineFormat, fileIn, ctype=ctype) if args.inputVolFile: infile = args.inputVolFile # got a volume/file to process if __debug__: debug('ATL', "Testing if 0th element in the list a volume") niftiInput = None try: niftiInput = nb.load(infile) if __debug__: debug('ATL', "Yes it is") except Exception as e: if __debug__: debug('ATL', "No it is not due to %s. Trying to parse the file" % e) if niftiInput: # if we got here -- it is a proper volume # XXX ask Michael to remove nasty warning message coordsIterator = select_from_volume_iterator( infile, args.lowerThreshold, args.upperThreshold) assert(coordT is None) coordT = Linear(niftiInput.header.get_qform()) # lets store volumeQForm for possible conversion of voxels into coordinates volQForm = coordT # previous iterator returns space coordinates args.coordRelativeToOrigin = True else: raise ValueError('could not open volumetric input file') # input is stdin else: coordsIterator = parsed_coordinates_iterator( args.inputLineFormat, ctype=ctype) # Open and initialize atlas lookup if args.atlasFile is None: if args.atlasPath is None: args.atlasPath = KNOWN_ATLASES[args.atlasName] args.atlasFile = args.atlasPath % ( {'name': args.atlasName} ) akwargs_common = {} if args.atlasImageFile: akwargs_common['image_file'] = args.atlasImageFile if not args.forbidDirectMapping \ and niftiInput is not None and not args.transformationFile: akwargs = {'resolution': niftiInput.header.get_zooms()[0]} query_voxel = True # if we can query directly by voxel, do so akwargs.update(akwargs_common) verbose(1, "Will attempt direct mapping from input voxels into atlas " "voxels at resolution %.2f" % akwargs['resolution']) atlas = Atlas(args.atlasFile, **akwargs) # verify that we got the same qforms in atlas and in the data file if atlas.space != args.inputSpace: verbose(0, "Cannot do direct mapping between input image in %s space and" " atlas in %s space. Use -I switch to override input space if" " it misspecified, or use -T to provide transformation. Trying" " to proceed" %(args.inputSpace, atlas.space)) query_voxel = False elif not (niftiInput.header.get_qform() == atlas._image.header.get_qform()).all(): if args.atlasImageFile is None: warning( "Cannot do direct mapping between files with different qforms." " Please provide original transformation (-T)." "\n Input qform:\n%s\n Atlas qform: \n%s" %(niftiInput.header.get_qform(), atlas._image.header.get_qform), 1) # reset ability to query by voxels query_voxel = False else: warning( "QForms are different between input image and " "provided atlas image." "\n Input qform of %s:\n%s\n Atlas qform of %s:\n%s" %(infile, niftiInput.header.get_qform(), args.atlasImageFile, atlas._image.header.get_qform()), 1) else: coordT = None else: atlas = Atlas(args.atlasFile, **akwargs_common) if isinstance(atlas, ReferencesAtlas): args.referenceLevel = args.referenceLevel.replace('/', ' ') atlas.set_reference_level(args.referenceLevel) atlas.distance = args.maxDistance else: args.showReferencedCoordinates = False if isinstance(atlas, FSLProbabilisticAtlas): atlas.strategy = args.probStrategy atlas.thr = args.probThr ## If not in Talairach -- in MNI with voxel size 2x2x2 # Original talairachlabel assumed that if respective to origin -- voxels were # scaled already. #if args.coordInTalairachSpace: # voxelSizeOriginal = np.array([1, 1, 1]) #else: # voxelSizeOriginal = np.array([2, 2, 2]) if args.coordInTalairachSpace: args.inputSpace = "Talairach" if not (args.inputSpace == atlas.space or (args.inputSpace in ["MNI", "Talairach"] and atlas.space == "Talairach")): raise XMLAtlasException("Unknown space '%s' which is not the same as atlas " "space '%s' either" % ( args.inputSpace, atlas.space )) if query_voxel: # we do direct mapping coordT = None else: verbose(2, "Chaining needed transformations") # by default -- no transformation if args.transformationFile: #externals.exists('scipy', raise_=True) # scipy.io.read_array was deprecated a while back (around 0.8.0) from numpy import loadtxt transfMatrix = loadtxt(args.transformationFile) coordT = Linear(transfMatrix, previous=coordT) verbose(2, "coordT got linear transformation from file %s" % args.transformationFile) voxelOriginOriginal = None voxelSizeOriginal = None if not args.coordRelativeToOrigin: if args.inputSpace == "Talairach": # assume that atlas is in Talairach space already voxelOriginOriginal = atlas.origin voxelSizeOriginal = np.array([1, 1, 1]) elif args.inputSpace == "MNI": # need to adjust for MNI origin as it was thought to be at # in terms of voxels #voxelOriginOriginal = np.array([46, 64, 37]) voxelOriginOriginal = np.array([45, 63, 36]) voxelSizeOriginal = np.array([2.0, 2.0, 2.0]) warning("Assuming elderly sizes for MNI volumes with" " origin %s and sizes %s" %\ ( repr(voxelOriginOriginal), repr(voxelSizeOriginal))) if not (voxelOriginOriginal is None and voxelSizeOriginal is None): verbose(2, "Assigning origin adjusting transformation with"+\ " origin=%s and voxelSize=%s" %\ ( repr(voxelOriginOriginal), repr(voxelSizeOriginal))) coordT = SpaceTransformation(origin=voxelOriginOriginal, voxelSize=voxelSizeOriginal, to_real_space=True, previous=coordT) # besides adjusting for different origin we need to transform into # Talairach space if args.inputSpace == "MNI" and atlas.space == "Talairach": verbose(2, "Assigning transformation %s" % args.MNI2TalTransformation) # What transformation to use coordT = {"matthewbrett": MNI2Tal_MatthewBrett, "lancaster07fsl": mni_to_tal_lancaster07_fsl, "lancaster07pooled": mni_to_tal_lancaster07pooled, "meyerlindenberg98": mni_to_tal_meyer_lindenberg98, "yohflirt": mni_to_tal_yohflirt }\ [args.MNI2TalTransformation](previous=coordT) if args.inputSpace == "MNI" and args.halfVoxelCorrection: originCorrection = np.array([0.5, 0.5, 0.5]) else: # perform transformation any way to convert to voxel space (integers) originCorrection = None # To be closer to what original talairachlabel did -- add 0.5 to each coord coordT = SpaceTransformation(origin=originCorrection, voxelSize=None, to_real_space=False, previous = coordT) if args.createSummary: summary = {} if args.levels is None: args.levels = str(min(4, atlas.nlevels-1)) if args.levels is None: args.levels = list(range(atlas.nlevels)) elif isinstance(args.levels, str): if args.levels == 'list': print("Known levels and their indicies:\n" + atlas.levels_listing()) sys.exit(0) slevels = args.levels.split(',') args.levels = [] for level in slevels: try: int_level = int(level) except: if level in atlas.levels: int_level = atlas.levels[level].index else: raise RuntimeError( "Unknown level '%s'. " % level + "Known levels and their indicies:\n" + atlas.levels_listing()) args.levels += [int_level] else: raise ValueError("Don't know how to handle list of levels %s." "Example is '1,2,3'" % (args.levels,)) verbose(3, "Operating on following levels: %s" % args.levels) # assign levels to the atlas atlas.default_levels = args.levels if args.outputFile: output = open(args.outputFile, 'w') else: output = sys.stdout # validity check if args.dumpmapFile: if niftiInput is None: raise RuntimeError("You asked to dump indexes into the volume, " \ "but input wasn't a volume") sys.exit(1) ni_dump = nb.load(infile) ni_dump_data = np.zeros(ni_dump.header.get_data_shape()[:3] + (len(args.levels),)) # Also check if we have provided voxels but not querying by voxels if args.input_voxels: if coordT is not None: raise NotImplementedError("Cannot perform voxels querying having coordT defined") if not query_voxel: raise NotImplementedError("query_voxel was reset to False, can't do queries by voxel") # Read coordinates numVoxels = 0 for c in coordsIterator: value, coord_orig, t = c[0], c[1:4], c[4] if __debug__: debug('ATL', "Obtained coord_orig=%s with value %s" % (repr(coord_orig), value)) lt, ut = args.lowerThreshold, args.upperThreshold if lt is not None and value < lt: verbose(5, "Value %s is less than lower threshold %s, thus voxel " "is skipped" % (value, args.lowerThreshold)) continue if ut is not None and value > ut: verbose(5, "Value %s is greater than upper threshold %s, thus voxel " "is skipped" % (value, args.upperThreshold)) continue numVoxels += 1 # Apply necessary transformations coord = coord_orig = np.array(coord_orig) if coordT: coord = coordT[ coord_orig ] # Query label if query_voxel: voxel = atlas[coord] else: voxel = atlas(coord) voxel['coord_orig'] = coord_orig voxel['value'] = value voxel['t'] = t if args.createSummary: summaryIndex = "" voxel_labels = voxel["labels"] for i,ind in enumerate(args.levels): voxel_label = voxel_labels[i] text = present_labels(args, voxel_label) #if len(voxel_label): # assert(voxel_label['index'] == ind) summaryIndex += text + " / " if not summaryIndex in summary: summary[summaryIndex] = {'values':[], 'max':value, 'maxcoord':coord_orig} if 'voxel_referenced' in voxel: summary[summaryIndex]['distances'] = [] summary_ = summary[summaryIndex] summary_['values'].append(value) if summary_['max'] < value: summary_['max'] = value summary_['maxcoord'] = coord_orig if 'voxel_referenced' in voxel: if voxel['voxel_referenced'] and voxel['distance']>=1e-3: verbose(5, 'Appending distance %e for voxel at %s' % (voxel['distance'], voxel['coord_orig'])) summary_['distances'].append(voxel['distance']) else: # Display while reading/processing first, out = True, "" if args.showValues: out += "%(value)5.2f " if args.showOriginalCoordinates: out += "%(coord_orig)s ->" if args.showReferencedCoordinates: out += " %(voxel_referenced)s=>%(distance).2f=>%(voxel_queried)s ->" if args.showTargetCoordinates: out += " %(coord_queried)s: " #out += "(%d,%d,%d): " % tuple(map(lambda x:int(round(x)),coord)) if args.showTargetVoxel: out += " %(voxel_queried)s ->" if args.levels is None: args.levels = list(range(len(voxel['labels']))) labels = [present_labels(args, voxel['labels'][i]) for i in args.levels] out += ','.join(labels) #if args.abbreviatedLabels: # out += ','.join([l.abbr for l in labels]) #else: # out += ','.join([l.text for l in labels]) #try: output.write(out % voxel + "\n") #except: # import pydb # pydb.debugger() if args.dumpmapFile: try: ni_dump_data[coord_orig[0], coord_orig[1], coord_orig[2]] = \ [voxel['labels'][i]['label'].index for i,ind in enumerate(args.levels)] except Exception as e: import pydb pydb.debugger() # if we opened any file -- close it if fileIn: fileIn.close() if args.dumpmapFile: ni_dump = nb.Nifti1Image(ni_dump_data, None, ni_dump.header) ni_dump.to_filename(args.dumpmapFile) if args.createSummary: if numVoxels == 0: verbose(1, "No matching voxels were found.") else: get_summary(args, summary, output) if args.outputFile: output.close()
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError('cross-validation payload requires --learner and --partitioner') # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if args.ds_preproc_fx is not None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose(3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res
def aux_basic(self, dirname, rc): """Helper function -- to assure that all filehandlers get closed so we could remove trash directory. Otherwise -- .nfs* files on NFS-mounted drives cause problems """ report = rc('UnitTest report', title="Sample report for testing", path=dirname) isdummy = isinstance(report, DummyReport) verbose.handlers = [report] verbose.level = 3 verbose(1, "Starting") verbose(2, "Level 2") if not isdummy: self.assertTrue(len(report._story) == 2, msg="We should have got some lines from verbose") if __debug__: odhandlers = debug.handlers debug.handlers = [report] oactive = debug.active debug.active = ['TEST'] + debug.active debug('TEST', "Testing report as handler for debug") if not isdummy: self.assertTrue(len(report._story) == 4, msg="We should have got some lines from debug") debug.active = oactive debug.handlers = odhandlers os.makedirs(dirname) if externals.exists('pylab plottable'): if not isdummy: clen = len(report._story) import pylab as pl pl.ioff() pl.close('all') pl.figure() pl.plot([1, 2], [3, 2]) pl.figure() pl.plot([2, 10], [3, 2]) pl.title("Figure 2 must be it") report.figures() if not isdummy: self.assertTrue( len(report._story) == clen + 2, msg="We should have got some lines from figures") report.text("Dugi bugi") # make sure we don't puke on xml like text with crap report.text("<kaj>$lkj&*()^$%#%</kaj>") report.text("locals:\n%s globals:\n%s" % ( ` locals() `, ` globals() `)) # bloody XML - just to check that there is no puke report.xml("<b>Dugi bugi</b>") report.save() if externals.exists('pylab'): import pylab as pl pl.close('all') pl.ion() pass
def plot_scatter(dataXd, mask=None, masked_opacity=0., labels=None, colors=True, dimcolor=1, title=None, limits='auto', thresholds=None, hint_opacity=0.9, x_jitter=None, y_jitter=None, fig=None, ax_scatter=None, ax_hist_x=None, ax_hist_y=None, bp_location='scatter', xlim=None, ylim=None, rasterized=None, uniq=False, include_stats=False, ): """ Parameters ---------- dataXd: array The volumetric (or not) data to plot where first dimension should only have 2 items mask: array, optional Additional mask to specify which values do not consider to plot. By default values with 0s in both dimensions are not plotted. masked_opacity: float, optional By default masked out values are not plotted at all. Value in (0,1] will make them visible with this specified opacity labels: list of str, optional Labels to place for x and y axes colors: bool or string or colormap, optional Either to use colors to associate with physical location and what colormap to use (jet by default if colors=True) dimcolor: int If `colors`, then which dimension (within given 3D volume) to "track" limits: 'auto', 'same', 'per-axis' or (min, max) Limits for axes: when 'auto' if data ranges overlap is more than 50% of the union range, 'same' is considered. When 'same' -- the same limits on both axes as determined by data. If two-element tuple or list is provided, then that range is applied to both axes. hint_opacity: float, optional If `colors` is True, to then a "slice" of the volumetric data is plotted in the specified opacity to hint about the location of points in the original Xd data in `dimcolor` dimension x_jitter: float, optional Half-width of uniform noise added to x values. Might be useful if data is quantized so it is valuable to jitter points a bit. y_jitter: float, optional Half-width of uniform noise added to y values. Might be useful if data is quantized so it is valuable to jitter points a bit fig : Figure, optional Figure to plot on, otherwise new one created ax_*: axes, optional Axes for the scatter plot and histograms. If none of them is specified (which is the default) then 'classical' plot is rendered with histograms above and to the right bp_location: ('scatter', 'hist', None), optional Where to place boxplots depicting data range xlim: tuple, optional ylim: tuple, optional To fix plotted range rasterized: bool, optional Passed to scatter call, to allow rasterization of heavy scatter plots uniq: bool, optional Plot uniq values (those present in one but not in the other) along each axis with crosses include_stats: bool, optional Whether to report additional statistics on the data. Stats are also reported via verbose at level 2 """ if len(dataXd) != 2: raise ValueError("First axis of dataXd can only have two dimensions, " "got {0}".format(len(dataXd))) dataXd = np.asanyarray(dataXd) # TODO: allow to operate on list of arrays to not waste RAM/cycles data = dataXd.reshape((2, -1)) if dataXd.ndim < 5: ntimepoints = 1 elif dataXd.ndim == 5: ntimepoints = dataXd.shape[-1] else: raise ValueError("Do not know how to handle data with %d dimensions" % (dataXd.ndim - 1)) if x_jitter or y_jitter: data = data.copy() # lazy and wasteful def jitter_me(x, w): x += np.random.uniform(-w, w, size=data.shape[-1]) if x_jitter: jitter_me(data[0, :], x_jitter) if y_jitter: jitter_me(data[1, :], y_jitter) finites = np.isfinite(data) nz = np.logical_and(data != 0, finites) # TODO : avoid doing data !=0 and just use provided utter mask #nz[:, 80000:] = False # for quick testing nzsum = np.sum(nz, axis=0) intersection = nzsum == 2 # for coloring we would need to know all the indices union = nzsum > 0 x, y = datainter = data[:, intersection] if mask is not None: if mask.size * ntimepoints == intersection.size: # we have got a single mask applicable to both x and y pass elif mask.size * ntimepoints == 2 * intersection.size: # we have got a mask per each, let's get an intersection assert mask.shape[0] == 2, "had to get 1 for x, 1 for y" mask = np.logical_and(mask[0], mask[1]) else: raise ValueError( "mask of shape %s. data of shape %s. ntimepoints=%d. " "Teach me how to apply it" % (mask.shape, data.shape, ntimepoints) ) # replicate mask ntimepoints times mask = np.repeat(mask.ravel(), ntimepoints)[intersection] != 0 x_masked = x[mask] y_masked = y[mask] xnoty = (nz[0].astype(int) - nz[1].astype(int))>0 ynotx = (nz[1].astype(int) - nz[0].astype(int))>0 msg = '' if not np.all(finites): msg = " non-finite x: %d, y: %d" % (np.sum(~finites[0]), np.sum(~finites[1])) verbose(1, "total: %d union: %d%s intersection: %d x_only: %d y_only: %d%s" % (len(nzsum), np.sum(union), mask is not None and ' masked: %d' % np.sum(mask) or '', np.sum(intersection), np.sum(xnoty), np.sum(ynotx), msg)) if include_stats: # report some statistics as well import scipy.stats as ss r, p = ss.pearsonr(x, y) d = np.linalg.norm(x-y) statsline = "r=%.2f p=%.4g ||x-y||=%.4g" % (r, p, d) try: from mvpa2.misc.dcov import dcorcoef nmax = min(1000, len(x)) idx = np.random.permutation(np.arange(len(x)))[:nmax] dcor = dcorcoef(x[idx], y[idx]) dcor_s = '' if len(x) == nmax else '[%d random]' % nmax statsline += ' dcorr%s=%.4g' % (dcor_s, dcor) except ImportError: pass verbose(2, statsline) else: statsline = '' #fig=pl.figure() #pl.plot(datainter[0], datainter[1], '.') #fig.show() nullfmt = pl.NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left+width+0.02 if not (bool(ax_scatter) or bool(ax_hist_x) or bool(ax_hist_y)): # no custom axes specified # our default setup rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] # start with a rectangular Figure if fig is None: fig = pl.figure(figsize=(10,10)) ax_scatter = pl.axes(rect_scatter) ax_hist_x = pl.axes(rect_histx) ax_hist_y = pl.axes(rect_histy) else: # check if all not None? # assert(len(axes) == 3) ax_bp_x, ax_bp_y = None, None if ax_scatter is None: raise ValueError("Makes no sense to do not have scatter plot") ax_bp_x = ax_bp_y = None if bp_location is not None: ax_bp_x_parent = ax_bp_y_parent = None if bp_location == 'scatter': # place boxplots into histogram plots ax_bp_x_parent = ax_scatter ax_bp_y_parent = ax_scatter elif bp_location == 'hist': ax_bp_x_parent = ax_hist_x ax_bp_y_parent = ax_hist_y else: raise ValueError("bp_location needs to be from (None, 'scatter', 'hist')") if ax_bp_x_parent: hist_x_pos = ax_bp_x_parent.get_position() ax_bp_x = pl_axes( [hist_x_pos.x0, hist_x_pos.y0 + hist_x_pos.height * 0.9, hist_x_pos.width, hist_x_pos.height * 0.1], facecolor='y' ) if ax_bp_y_parent: hist_y_pos = ax_bp_y_parent.get_position() ax_bp_y = pl_axes( [hist_y_pos.x0 + hist_y_pos.width*0.9, hist_y_pos.y0, hist_y_pos.width * 0.1, hist_y_pos.height], facecolor='y' ) # ax_bp_y = pl_axes( [left + width * 0.9, bottom, width/10, height], facecolor='y' ) if ax_hist_y else None sc_kwargs = dict(facecolors='none', s=1, rasterized=rasterized) # common kwargs # let's use colormap to get non-boring colors cm = colors # e.g. if it is None if colors is True: cm = pl.matplotlib.cm.get_cmap('jet') elif isinstance(colors, str): cm = pl.matplotlib.cm.get_cmap(colors) if cm and len(dataXd.shape) > dimcolor+1: cm.set_under((1, 1, 1, 0.1)) # transparent what is not in range # we need to get our indices back for those we are going to plot. probably this is the least efficient way: ndindices_all = np.array(list(np.ndindex(dataXd.shape[1:]))) ndindices_nz = ndindices_all[intersection] # choose color based on dimcolor dimcolor_len = float(dataXd.shape[1+dimcolor]) edgecolors = cm(((cm.N-1) * ndindices_nz[:, dimcolor] / dimcolor_len).astype(int)) if mask is not None: # Plot first those which might be masked out if masked_opacity: mask_inv = np.logical_not(mask) mask_edgecolors = edgecolors[mask_inv].copy() # Adjust alpha value mask_edgecolors[:, -1] *= masked_opacity ax_scatter.scatter(x[mask_inv], y[mask_inv], edgecolors=mask_edgecolors, alpha=masked_opacity, **sc_kwargs) # Plot (on top) those which are not masked-out if mask.size: x_plot, y_plot, edgecolors_plot = x[mask], y[mask], edgecolors[mask] else: # older numpys blow here x_plot, y_plot, edgecolors_plot = (np.array([]),) * 3 else: # Just plot all of them at once x_plot, y_plot, edgecolors_plot = x, y, edgecolors if len(x_plot): ax_scatter.scatter(x_plot, y_plot, edgecolors=edgecolors_plot, **sc_kwargs) # for orientation we need to plot 1 slice... assume that the last dimension is z -- figure out a slice with max # of non-zeros zdim_entries = ndindices_nz[:, -1] if np.size(zdim_entries): zdim_counts, _ = np.histogram(zdim_entries, bins=np.arange(0, np.max(zdim_entries)+1)) zdim_max = np.argmax(zdim_counts) if hint_opacity: # now we need to plot that zdim_max slice taking into account our colormap # create new axes axslice = pl_axes([left, bottom+height * 0.72, width/4., height/5.], facecolor='y') axslice.axis('off') sslice = np.zeros(dataXd.shape[1:3]) # XXX hardcoded assumption on dimcolor =1 sslice[:, : ] = np.arange(dimcolor_len)[None, :] # if there is time dimension -- choose minimal value across all values dataXd_mint = np.min(dataXd, axis=-1) if dataXd.ndim == 5 else dataXd sslice[dataXd_mint[0, ..., zdim_max] == 0] = -1 # reset those not in the picture to be "under" range axslice.imshow(sslice, alpha=hint_opacity, cmap=cm) else: # the scatter plot without colors to distinguish location ax_scatter.scatter(x, y, **sc_kwargs) if labels: ax_scatter.set_xlabel(labels[0]) ax_scatter.set_ylabel(labels[1]) # "unique" points on each of the axes if uniq: if np.sum(xnoty): ax_scatter.scatter(fill_nonfinites(data[0, np.where(xnoty)[0]]), fill_nonfinites(data[1, np.where(xnoty)[0]]), edgecolor='b', **sc_kwargs) if np.sum(ynotx): ax_scatter.scatter(fill_nonfinites(data[0, np.where(ynotx)[0]]), fill_nonfinites(data[1, np.where(ynotx)[0]]), edgecolor='g', **sc_kwargs) # Axes if np.size(x): ax_scatter.plot((np.min(x), np.max(x)), (0, 0), 'r', alpha=0.5) else: warning("There is nothing to plot, returning early") return pl.gcf() ax_scatter.plot((0, 0), (np.min(y), np.max(y)), 'r', alpha=0.5) if (mask is not None and not masked_opacity and np.sum(mask)): # if there is a non-degenerate mask which was not intended to be plotted, # take those values away while estimating min/max range _ = x[mask]; minx, maxx = np.min(_), np.max(_) _ = y[mask]; miny, maxy = np.min(_), np.max(_) del _ # no need to consume RAM # print "Here y range", miny, maxy else: minx, maxx = np.min(x), np.max(x) miny, maxy = np.min(y), np.max(y) # Process 'limits' option if isinstance(limits, str): limits = limits.lower() if limits == 'auto': overlap = min(maxx, maxy) - max(minx, miny) range_ = max(maxx, maxy) - min(minx, miny) limits = {True: 'same', False: 'per-axis'}[not range_ or overlap/float(range_) > 0.5] if limits == 'per-axis': same_range = False if xlim is None: # add some white border dx = (maxx - minx)/20. xlim = (minx-dx, maxx+dx) if ylim is None: dy = (maxy - miny)/20. ylim = (miny-dy, maxy+dy) elif limits == 'same': same_range = True # assign limits the numerical range limits = (np.min( [minx, miny] ), np.max( [maxx, maxy] )) else: raise ValueError("Do not know how to handle same_range=%r" % (limits,)) else: same_range = True # Let's now plot threshold lines if provided if thresholds is not None: stylekwargs = dict(colors='k', linestyles='dotted') if len(thresholds): ax_scatter.vlines(thresholds[0], ax_scatter.get_xlim()[0]*0.9, ax_scatter.get_xlim()[1]*0.9, **stylekwargs) if len(thresholds)>1: ax_scatter.hlines(thresholds[1], ax_scatter.get_ylim()[0]*0.9, ax_scatter.get_ylim()[1]*0.9, **stylekwargs) if same_range: # now determine nice limits by hand: binwidthx = binwidthy = binwidth = np.max(datainter)/51. # 0.25 minxy, maxxy = limits sgn = np.sign(minxy) xyrange = maxxy - minxy xyamax = np.max( [np.max(np.fabs(x)), np.max(np.fabs(y))] ) limn = sgn*( int(sgn*minxy/binwidth) - sgn) * binwidth limp = ( int(maxxy/binwidth) + 1) * binwidth ax_scatter.plot((limn*0.9, limp*0.9), (limn*0.9, limp*0.9), 'y--') if xlim is None: xlim = (limn, limp) if ylim is None: ylim = (limn, limp) binsx = binsy = bins = np.arange(limn, limp + binwidth, binwidth) else: binwidthx = (maxx - minx)/51. binwidthy = (maxy - miny)/51. try: binsx = np.arange(minx, maxx + binwidthx, binwidthx) binsy = np.arange(miny, maxy + binwidthy, binwidthy) except Exception as exc: warning( "Received following exception while trying to get bins for " "minx=%(minx)f maxx=%(maxx)f binwidthx=%(binwidthx)s " "miny=%(miny)f maxy=%(maxy)f binwidthy=%(binwidthy)s: %(exc)s. " "Returning early" % locals() ) return pl.gcf() if xlim is not None: ax_scatter.set_xlim( xlim ) if ylim is not None: ax_scatter.set_ylim( ylim ) # get values to plot for histogram and boxplot x_hist, y_hist = (x, y) if (mask is None or not np.sum(mask)) else (x_masked, y_masked) if np.any(binsx) and ax_hist_x is not None: ax_hist_x.xaxis.set_major_formatter(nullfmt) histx = ax_hist_x.hist(x_hist, bins=binsx, facecolor='b') ax_hist_x.set_xlim( ax_scatter.get_xlim() ) ax_hist_x.vlines(0, 0, 0.9*np.max(histx[0]), 'r') if np.any(binsy) and ax_hist_y is not None: ax_hist_y.yaxis.set_major_formatter(nullfmt) histy = ax_hist_y.hist(y_hist, bins=binsy, orientation='horizontal', facecolor='g') ax_hist_y.set_ylim( ax_scatter.get_ylim() ) ax_hist_y.hlines(0, 0, 0.9*np.max(histy[0]), 'r') rect_scatter = [left, bottom, width, height] # Box plots if ax_bp_x is not None: ax_bp_x.axis('off') bpx = ax_bp_x.boxplot(x_hist, vert=0) #'r', 0) ax_bp_x.set_xlim(ax_scatter.get_xlim()) if ax_bp_y is not None: ax_bp_y.axis('off') bpy = ax_bp_y.boxplot(y_hist, sym='g+') ax_bp_y.set_ylim(ax_scatter.get_ylim()) if statsline: # draw the text based on gca y1, y2 = ax_scatter.get_ylim(); x1, x2 = ax_scatter.get_xlim(); ax_scatter.text(0.5*(x1+x2), # center y2 - 0.02*(y2-y1), statsline, verticalalignment = "top", horizontalalignment="center") if title: pl.title(title) return pl.gcf()
if args.atlasFile is None: if args.atlasPath is None: args.atlasPath = KNOWN_ATLASES[args.atlasName] args.atlasFile = args.atlasPath % ( {'name': args.atlasName} ) akwargs_common = {} if args.atlasImageFile: akwargs_common['image_file'] = args.atlasImageFile if not args.forbidDirectMapping \ and niftiInput is not None and not args.transformationFile: akwargs = {'resolution': niftiInput.get_header().get_zooms()[0]} query_voxel = True # if we can query directly by voxel, do so akwargs.update(akwargs_common) verbose(1, "Will attempt direct mapping from input voxels into atlas " "voxels at resolution %.2f" % akwargs['resolution']) atlas = Atlas(args.atlasFile, **akwargs) # verify that we got the same qforms in atlas and in the data file if atlas.space != args.inputSpace: verbose(0, "Cannot do direct mapping between input image in %s space and" " atlas in %s space. Use -I switch to override input space if" " it misspecified, or use -T to provide transformation. Trying" " to proceed" %(args.inputSpace, atlas.space)) query_voxel = False elif not (niftiInput.get_header().get_qform() == atlas._image.get_header().get_qform()).all(): if args.atlasImageFile is None: warning( "Cannot do direct mapping between files with different qforms."
def imread_(f): verbose(3, f) return imread(f).mean(axis=2)[::2, ::2]
def run(args): if not args.chunks is None: # apply global "chunks" setting for cattr in ('detrend_chunks', 'zscore_chunks'): if getattr(args, cattr) is None: # only overwrite if individual option is not given args.__setattr__(cattr, args.chunks) ds = arg2ds(args.data) if not args.poly_detrend is None: if not args.detrend_chunks is None \ and not args.detrend_chunks in ds.sa: raise ValueError( "--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks) from mvpa2.mappers.detrend import poly_detrend verbose(1, "Detrend") poly_detrend(ds, polyord=args.poly_detrend, chunks_attr=args.detrend_chunks, opt_regs=args.detrend_regrs, space=args.detrend_coords) if args.filter_passband is not None: from mvpa2.mappers.filters import iir_filter from scipy.signal import butter, buttord if args.sampling_rate is None or args.filter_stopband is None: raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate") # determine filter type nyquist = args.sampling_rate / 2.0 if len(args.filter_passband) > 1: btype = 'bandpass' if not len(args.filter_passband) == len(args.filter_stopband): raise ValueError( "passband and stopband specifications have to " "match in size") wp = [v / nyquist for v in args.filter_passband] ws = [v / nyquist for v in args.filter_stopband] elif args.filter_passband[0] < args.filter_stopband[0]: btype = 'lowpass' wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist elif args.filter_passband[0] > args.filter_stopband[0]: btype = 'highpass' wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist else: raise ValueError("invalid specification of Butterworth filter") # create filter verbose(1, "Spectral filtering (%s)" % (btype, )) try: ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False) b, a = butter(ord, wn, btype=btype) except OverflowError: raise ValueError( "cannot contruct Butterworth filter for the given " "specification") ds = iir_filter(ds, b, a) if args.zscore: from mvpa2.mappers.zscore import zscore verbose(1, "Z-score") zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params) verbose(3, "Dataset summary %s" % (ds.summary())) # invariants? if not args.strip_invariant_features is None: from mvpa2.datasets.miscfx import remove_invariant_features ds = remove_invariant_features(ds) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
f for i, f in enumerate(match_files) if i in set(match_files_select) ] def get_idx(fname): return int(basename(fname).replace('.jpeg', '')) assert (get_idx('/123/0012.jpeg') == 12) assert (get_idx('/123/0000.jpeg') == 0) target_times = np.array(map(get_idx, target_files)).astype(float) / FPS match_times = np.array(map(get_idx, match_files)).astype(float) / FPS verbose( 1, "There are %d target files for %d files to match" % (len(target_files), len(match_files))) # <codecell> def imread_(f): verbose(3, f) return imread(f).mean(axis=2)[::2, ::2] verbose(1, "Loading %d targets" % len(target_files)) targets = [imread_(f) for f in target_files] # <codecell>
def run(args): print(args.data) dss = [arg2ds(d)[:, :100] for d in args.data] verbose(1, "Loaded %i input datasets" % len(dss)) if __debug__: for i, ds in enumerate(dss): debug('CMDLINE', "dataset %i: %s" % (i, str(ds))) # TODO at this point more check could be done, e.g. ref_ds > len(dss) # assemble parameters params = dict([(param, getattr(args, param)) for param in _supported_parameters]) if __debug__: debug('CMDLINE', "configured parameters: '%s'" % params) # assemble CAs enabled_ca = [ca for ca in _supported_cas if getattr(args, ca)] if __debug__: debug('CMDLINE', "enabled conditional attributes: '%s'" % enabled_ca) hyper = Hyperalignment(enable_ca=enabled_ca, alignment=ProcrusteanMapper(svd='dgesvd', space='commonspace'), **params) verbose(1, "Running hyperalignment") promappers = hyper(dss) verbose(2, "Alignment reference is dataset %i" % hyper.ca.chosen_ref_ds) verbose(1, "Writing output") # save on memory and remove the training data del dss if args.commonspace: if __debug__: debug('CMDLINE', "write commonspace as hdf5") h5save('%s%s.hdf5' % (args.output_prefix, _output_specs['commonspace']['output_suffix']), hyper.commonspace, compression=args.hdf5_compression) for ca in _supported_cas: if __debug__: debug('CMDLINE', "check conditional attribute: '%s'" % ca) if getattr(args, ca): if __debug__: debug('CMDLINE', "store conditional attribute: '%s'" % ca) np.savetxt( '%s%s' % (args.output_prefix, _supported_cas[ca]['output_suffix']), hyper.ca[ca].value.samples) if args.store_transformation: for i, pm in enumerate(promappers): if __debug__: debug('CMDLINE', "store mapper %i: %s" % (i, str(pm))) h5save('%s%s.hdf5' % (args.output_prefix, '_map%.3i' % i), pm, compression=args.hdf5_compression) if args.transform: tdss, dss = _transform_dss(args.transform, promappers, args) del dss verbose(1, "Store transformed datasets") for i, td in enumerate(tdss): if __debug__: debug('CMDLINE', "store transformed data %i: %s" % (i, str(td))) h5save('%s%s.hdf5' % (args.output_prefix, '_transformed%.3i' % i), td, compression=args.hdf5_compression)
def run(args): from mvpa2.base.hdf5 import h5save ds = None vol_attr = dict() if args.add_vol_attr is not None: # XXX add a way to use the mapper of an existing dataset to # add a volume attribute without having to load the entire # mri data again vol_attr = dict(args.add_vol_attr) if not len(args.add_vol_attr) == len(vol_attr): warning("--vol-attr option with duplicate attribute name: " "check arguments!") verbose(2, "Prepare to add volumetric feature attributes: %s" % vol_attr) if args.txt_data is not None: verbose(1, "Load data from TXT file '%s'" % args.txt_data) samples = _load_from_txt(args.txt_data) ds = Dataset(samples) elif args.npy_data is not None: verbose(1, "Load data from NPY file '%s'" % args.npy_data) samples = _load_from_npy(args.npy_data) ds = Dataset(samples) elif args.mri_data is not None: verbose(1, "Load data from MRI image(s) %s" % args.mri_data) from mvpa2.datasets.mri import fmri_dataset ds = fmri_dataset(args.mri_data, mask=args.mask, add_fa=vol_attr) elif args.openfmri_modelbold is not None: verbose(1, "Load data from OpenFMRI model specification %s" % args.openfmri_modelbold) if not len(args.openfmri_modelbold[3]): args.openfmri_modelbold[3] = None # load openfmri dataset from mvpa2.datasets.sources.openfmri import OpenFMRIDataset of = OpenFMRIDataset(args.openfmri_modelbold[0]) ds = of.get_model_bold_dataset( int(args.openfmri_modelbold[1]), int(args.openfmri_modelbold[2]), flavor=args.openfmri_modelbold[3], mask=args.mask, add_fa=vol_attr, add_sa=args.add_fsl_mcpar, ) if ds is None: if args.data is None: raise RuntimeError("no data source specific") else: ds = hdf2ds(args.data)[0] else: if args.data is not None: verbose(1, "ignoring dataset input in favor of other data source -- remove either one to disambiguate") # act on all attribute options ds = process_common_dsattr_opts(ds, args) if args.openfmri_modelbold is None and args.add_fsl_mcpar is not None: from mvpa2.misc.fsl.base import McFlirtParams mc_par = McFlirtParams(args.add_fsl_mcpar) for param in mc_par: verbose(2, "Add motion regressor as sample attribute '%s'" % ("mc_" + param)) ds.sa["mc_" + param] = mc_par[param] verbose(3, "Dataset summary %s" % (ds.summary())) # and store outfilename = args.output if not outfilename.endswith(".hdf5"): outfilename += ".hdf5" verbose(1, "Save dataset to '%s'" % outfilename) h5save(outfilename, ds, mkdir=True, compression=args.hdf5_compression)
def run(args): from mvpa2.base.hdf5 import h5save ds = None vol_attr = dict() if args.add_vol_attr is not None: # XXX add a way to use the mapper of an existing dataset to # add a volume attribute without having to load the entire # mri data again vol_attr = dict(args.add_vol_attr) if not len(args.add_vol_attr) == len(vol_attr): warning("--vol-attr option with duplicate attribute name: " "check arguments!") verbose(2, "Prepare to add volumetric feature attributes: %s" % vol_attr) if args.txt_data is not None: verbose(1, "Load data from TXT file '%s'" % args.txt_data) samples = _load_from_txt(args.txt_data) ds = Dataset(samples) elif args.npy_data is not None: verbose(1, "Load data from NPY file '%s'" % args.npy_data) samples = _load_from_npy(args.npy_data) ds = Dataset(samples) elif args.mri_data is not None: verbose(1, "Load data from MRI image(s) %s" % args.mri_data) from mvpa2.datasets.mri import fmri_dataset ds = fmri_dataset(args.mri_data, mask=args.mask, add_fa=vol_attr) elif args.openfmri_modelbold is not None: verbose( 1, "Load data from OpenFMRI model specification %s" % args.openfmri_modelbold) if not len(args.openfmri_modelbold[3]): args.openfmri_modelbold[3] = None # load openfmri dataset from mvpa2.datasets.sources.openfmri import OpenFMRIDataset of = OpenFMRIDataset(args.openfmri_modelbold[0]) ds = of.get_model_bold_dataset(int(args.openfmri_modelbold[1]), int(args.openfmri_modelbold[2]), flavor=args.openfmri_modelbold[3], mask=args.mask, add_fa=vol_attr, add_sa=args.add_fsl_mcpar) if ds is None: if args.data is None: raise RuntimeError('no data source specific') else: ds = hdf2ds(args.data)[0] else: if args.data is not None: verbose( 1, 'ignoring dataset input in favor of other data source -- remove either one to disambiguate' ) # act on all attribute options ds = process_common_dsattr_opts(ds, args) if args.openfmri_modelbold is None and args.add_fsl_mcpar is not None: from mvpa2.misc.fsl.base import McFlirtParams mc_par = McFlirtParams(args.add_fsl_mcpar) for param in mc_par: verbose( 2, "Add motion regressor as sample attribute '%s'" % ('mc_' + param)) ds.sa['mc_' + param] = mc_par[param] verbose(3, "Dataset summary %s" % (ds.summary())) # and store outfilename = args.output if not outfilename.endswith('.hdf5'): outfilename += '.hdf5' verbose(1, "Save dataset to '%s'" % outfilename) h5save(outfilename, ds, mkdir=True, compression=args.hdf5_compression)
def test_verbose_above(self): """Test if it doesn't output at higher levels""" verbose(5, self.msg) self.assertEqual(self.sout.getvalue(), "")
def test_verbose_above(self): """Test if it doesn't output at higher levels""" verbose(5, self.msg) self.failUnlessEqual(self.sout.getvalue(), "")
def extract_lsall(data, TR, time_res, hrf_gen, F, good_ons, good_evs, desmat, extract_evs=None): ntp, nvox = data.shape hrf = hrf_gen(time_res) # Set up the high time-resolution design matrix time_up = N.arange(0, TR*ntp+time_res, time_res) all_onsets = [] all_durations = [] all_conds = [] # condition marker if extract_evs is None: extract_evs = range(len(good_evs)) nuisance_evs = sorted(list(set(range(desmat.mat.shape[1])).difference( [good_evs[e] for e in extract_evs]))) for e in extract_evs: ev = good_evs[e] all_onsets = N.hstack((all_onsets, good_ons[e].onsets)) all_durations = N.hstack((all_durations, good_ons[e].durations)) # yoh: ad-hoc warning -- it is marking with (ev/2)+1 (I guess) # assuming presence of derivatives EVs all_conds = N.hstack((all_conds, N.ones(len(good_ons[e].onsets))*((ev/2)+1))) #all_onsets=N.round(all_onsets/TR) # round to nearest TR number ntrials = len(all_onsets) glm_res_full = N.zeros((nvox, ntrials)) dm_trials = N.zeros((ntp, ntrials)) dm_full = [] for t in range(ntrials): verbose(2, "Estimating for trial %d" % t) ## yoh: TODO -- filter outside ## if all_onsets[t] > max_evtime: ## continue # build model for each trial dm_trial = N.zeros(len(time_up)) window_ons = [N.where(time_up==x)[0][0] for x in time_up if all_onsets[t] <= x < all_onsets[t] + all_durations[t]] dm_trial[window_ons] = 1 dm_trial_up = N.convolve(dm_trial, hrf) dm_trial_down = dm_trial_up[0:ntp/time_res*TR:(TR/time_res)] dm_trials[:, t] = dm_trial_down # filter the desmtx, except for the nuisance part (which is already filtered) # since it is taken from a loaded FSL dm_full = N.dot(F, dm_trials) # mean center trials models dm_trials -= dm_trials.mean(0) if len(nuisance_evs) > 0: # and stick nuisance evs if any to the back dm_full = N.hstack((dm_full, desmat.mat[:, nuisance_evs])) dm_full = N.hstack((dm_full, N.ones((ntp, 1)))) glm_res_full = N.dot(N.linalg.pinv(dm_full), data.samples) glm_res_full = glm_res_full[:ntrials] return all_conds, glm_res_full
if args.atlasPath is None: args.atlasPath = KNOWN_ATLASES[args.atlasName] args.atlasFile = args.atlasPath % ({'name': args.atlasName}) akwargs_common = {} if args.atlasImageFile: akwargs_common['image_file'] = args.atlasImageFile if not args.forbidDirectMapping \ and niftiInput is not None and not args.transformationFile: akwargs = {'resolution': niftiInput.get_header().get_zooms()[0]} query_voxel = True # if we can query directly by voxel, do so akwargs.update(akwargs_common) verbose( 1, "Will attempt direct mapping from input voxels into atlas " "voxels at resolution %.2f" % akwargs['resolution']) atlas = Atlas(args.atlasFile, **akwargs) # verify that we got the same qforms in atlas and in the data file if atlas.space != args.inputSpace: verbose( 0, "Cannot do direct mapping between input image in %s space and" " atlas in %s space. Use -I switch to override input space if" " it misspecified, or use -T to provide transformation. Trying" " to proceed" % (args.inputSpace, atlas.space)) query_voxel = False elif not (niftiInput.get_header().get_qform() == atlas._image.get_header().get_qform()).all():
def pybetaseries(fsfdir, methods=['lsall', 'lsone'], time_res=0.1, modeldir=None, outdir=None, designdir=None, design_fsf_file='design.fsf', design_mat_file='design.mat', data_file=None, mask_file=None, extract_evs=None, collapse_other_conditions=True): """Compute beta-series regression on a feat directory Required arguments: fsfdir: full path of a feat directory Optional arguments: method: list of methods to be used, can include: 'lsone': single-trial iterative least squares estimation from Turner & Ashby 'lsall': standard beta-series regression from Rissman et al. time_res: time resolution of the model used to generate the convolved design matrix outdir: where to store the results designdir: location of design_mat_file (e.g. design.mat). if None -- the same as fsfdir collapse_other_conditions: collapse all other conditions into a single regressor for the lsone model. Jeanette's analyses suggest that it's better than leaving them separate. data_file: allows to override path of the 4D datafile instead of specified in design.fsf 'feat_files(1)' """ known_methods = ['lsall', 'lsone'] assert set(methods).issubset(set(known_methods)), \ "Unknown method(s): %s" % (set(methods).difference(set(known_methods))) if not os.path.exists(fsfdir): print 'ERROR: %s does not exist!' % fsfdir #return if not fsfdir.endswith('/'): fsfdir = ''.join([fsfdir, '/']) if modeldir is None: modeldir = fsfdir # load design using pymvpa tools fsffile = pjoin(fsfdir, design_fsf_file) desmatfile = pjoin(modeldir, design_mat_file) verbose(1, "Loading design") design = read_fsl_design(fsffile) desmat = FslGLMDesign(desmatfile) ntp, nevs = desmat.mat.shape TR = design['fmri(tr)'] # yoh: theoretically it should be identical to the one read from # the nifti file, but in this sample data those manage to differ: # bold_mcf_brain.nii.gz int16 [ 64, 64, 30, 182] 3.12x3.12x5.00x1.00 sform # filtered_func_data.nii.gz float32 [ 64, 64, 30, 182] 3.12x3.12x5.00x2.00 sform #assert(abs(data.a.imghdr.get_zooms()[-1] - TR) < 0.001) # it is the filtered_func_data.nii.gz which was used for analysis, # and it differs from bold_mcf_brain.nii.gz ... # exclude events that occur within two TRs of the end of the run, due to the # inability to accurately estimate the response to them. max_evtime = TR*ntp - 2; # TODO: filter out here the trials jumping outside good_evs = [] nuisance_evs = [] # yoh: ev_td marks temporal derivatives (of good EVs or of nuisance -- all) # replacing with deriv_evs for consistency withderiv_evs = [] # ev_td = N.zeros(design['fmri(evs_real)']) good_ons = [] if outdir is None: outdir = pjoin(fsfdir, 'betaseries') if not os.path.exists(outdir): os.mkdir(outdir) # create smoothing kernel for design cutoff = design['fmri(paradigm_hp)']/TR verbose(1, "Creating smoothing kernel based on the original analysis cutoff %.2f" % cutoff) # yoh: Verify that the kernel is correct since it looks # quite ... F = get_smoothing_kernel(cutoff, ntp) verbose(1, "Determining non-motion conditions") # loop through and find the good (non-motion) conditions # NB: this assumes that the name of the motion EV includes "motpar" # ala the openfmri convention. # TO DO: add ability to manually specify motion regressors (currently assumes # that any EV that includes "motpar" in its name is a motion regressor) evctr = 0 for ev in range(1, design['fmri(evs_orig)']+1): # filter out motion parameters evtitle = design['fmri(evtitle%d)' % ev] verbose(2, "Loading EV %s" % evtitle) if not evtitle.startswith('mot'): good_evs.append(evctr) evctr += 1 if design['fmri(deriv_yn%d)' % ev] == 1: withderiv_evs.append(evctr-1) # skip temporal derivative evctr += 1 ev_events = FslEV3(pjoin(fsfdir, design['fmri(custom%d)' % ev])) good_ons.append(ev_events) else: nuisance_evs.append(evctr) evctr += 1 if design['fmri(deriv_yn%d)' % ev] == 1: # skip temporal derivative withderiv_evs.append(evctr) nuisance_evs.append(evctr) evctr += 1 # load data verbose(1, "Loading data") maskimg = pjoin(fsfdir, mask_file or 'mask.nii.gz') # yoh: TODO design['feat_files'] is not the one "of interest" since it is # the input file, while we would like to operate on pre-processed version # which is usually stored as filtered_func_data.nii.gz data_file_fullname = complete_filename( pjoin(fsfdir, data_file or "filtered_func_data.nii.gz")) data = fmri_dataset(data_file_fullname, mask=maskimg) assert(len(data) == ntp) for method in methods: verbose(1, 'Estimating %(method)s model...' % locals()) if method == 'lsone': all_conds, glm_res_full = extract_lsone( data, TR, time_res, spm_hrf, F, good_ons, good_evs, nuisance_evs, withderiv_evs, desmat, extract_evs=extract_evs, collapse_other_conditions=collapse_other_conditions) elif method == 'lsall': all_conds, glm_res_full = extract_lsall( data, TR, time_res, spm_hrf, F, good_ons, good_evs, desmat, extract_evs=extract_evs, ) else: raise ValueError(method) all_conds = N.asanyarray(all_conds) # assure array here # map the data into images and save to betaseries directory for e in range(1, len(good_evs)+1): ni = map2nifti(data, data=glm_res_full[N.where(all_conds==e)[0], :]) ni.to_filename(pjoin(outdir, 'ev%d_%s.nii.gz' % (e, method)))
pymvpa_datadbroot = \ cfg.get('datadb', 'root', default=pathjoin(os.getcwd(), 'datadb')) # # Debugging and optimization # if not __debug__: try: import psyco psyco.profile() except ImportError: from mvpa2.base import verbose verbose(2, "Psyco online compilation is not enabled") else: # Controllable seeding of random number generator from mvpa2.base import debug debug('INIT', 'mvpa') # # RNGs control # from mvpa2._random import _random_seed, seed, get_random_seed # # Testing #
def extract_lsone(data, TR, time_res, hrf_gen, F, good_ons, good_evs, nuisance_evs, withderiv_evs, desmat, extract_evs=None, collapse_other_conditions=True): # loop through the good evs and build the ls-one model # design matrix for each trial/ev ntp, nvox = data.shape hrf = hrf_gen(time_res) # Set up the high time-resolution design matrix time_up = N.arange(0, TR*ntp+time_res, time_res) n_up = len(time_up) dm_nuisanceevs = desmat.mat[:, nuisance_evs] ntrials_total = sum(len(o['onsets']) for o in good_ons) verbose(1, "Have %d trials total to process" % ntrials_total) trial_ctr = 0 all_conds = [] beta_maker = N.zeros((ntrials_total, ntp)) if extract_evs is None: extract_evs = range(len(good_evs)) for e in extract_evs: # range(len(good_evs)): ev = good_evs[e] # first, take the original desmtx and remove the ev of interest other_good_evs = [x for x in good_evs if x != ev] # put the temporal derivatives into other_good_evs # start with its own derivative. This accounts for # a significant amount of divergence from matlab implementation if ev in withderiv_evs: other_good_evs.append(ev+1) for x in other_good_evs: if x in withderiv_evs: other_good_evs.append(x+1) dm_otherevs = desmat.mat[:, other_good_evs] cond_ons = N.array(good_ons[e].onsets) cond_dur = N.array(good_ons[e].durations) ntrials = len(cond_ons) glm_res_full = N.zeros((nvox, ntrials)) verbose(2, 'processing ev %d: %d trials' % (e+1, ntrials)) for t in range(ntrials): verbose(3, "processing trial %d" % t) ## ad-hoc warning -- assumes interleaved presence of ## derivatives' EVs all_conds.append((ev/2)+1) ## yoh: handle outside ## if cond_ons[t] > max_evtime: ## verbose(1, 'TOI: skipping ev %d trial %d: %f %f' ## % (ev, t, cond_ons[t], max_evtime)) ## trial_ctr += 1 ## continue # first build model for the trial of interest at high resolution dm_toi = N.zeros(n_up) window_ons = [N.where(time_up==x)[0][0] for x in time_up if (x >= cond_ons[t]) & (x < cond_ons[t] + cond_dur[t])] dm_toi[window_ons] = 1 dm_toi = N.convolve(dm_toi, hrf)[0:ntp/time_res*TR:(TR/time_res)] other_trial_ons = cond_ons[N.where(cond_ons!=cond_ons[t])[0]] other_trial_dur = cond_dur[N.where(cond_ons!=cond_ons[t])[0]] dm_other = N.zeros(n_up) # process the other trials for o in other_trial_ons: ## yoh: handle outside ## if o > max_evtime: ## continue # find the timepoints that fall within the window b/w onset and onset + duration window_ons = [N.where(time_up==x)[0][0] for x in time_up if o <= x < o + other_trial_dur[N.where(other_trial_ons==o)[0][0]]] dm_other[window_ons] = 1 # Put together the design matrix dm_other = N.convolve(dm_other, hrf)[0:ntp/time_res*TR:(TR/time_res)] if collapse_other_conditions: dm_other = N.hstack((N.dot(F, dm_other[0:ntp, N.newaxis]), dm_otherevs)) dm_other = N.sum(dm_other, 1) dm_full = N.hstack((N.dot(F, dm_toi[0:ntp, N.newaxis]), dm_other[:, N.newaxis], dm_nuisanceevs)) else: dm_full = N.hstack((N.dot(F, dm_toi[0:ntp, N.newaxis]), N.dot(F, dm_other[0:ntp, N.newaxis]), dm_otherevs, dm_nuisanceevs)) dm_full -= dm_full.mean(0) dm_full = N.hstack((dm_full, N.ones((ntp, 1)))) beta_maker_loop = N.linalg.pinv(dm_full) beta_maker[trial_ctr, :] = beta_maker_loop[0, :] trial_ctr += 1 # this uses Jeanette's trick of extracting the beta-forming vector for each # trial and putting them together, which allows estimation for all trials # at once glm_res_full = N.dot(beta_maker, data.samples) return all_conds, glm_res_full
def run(args): if args.chunks is not None: # apply global "chunks" setting for cattr in ("detrend_chunks", "zscore_chunks"): if getattr(args, cattr) is None: # only overwrite if individual option is not given args.__setattr__(cattr, args.chunks) ds = arg2ds(args.data) if args.poly_detrend is not None: if args.detrend_chunks is not None and not args.detrend_chunks in ds.sa: raise ValueError("--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks) from mvpa2.mappers.detrend import poly_detrend verbose(1, "Detrend") poly_detrend( ds, polyord=args.poly_detrend, chunks_attr=args.detrend_chunks, opt_regs=args.detrend_regrs, space=args.detrend_coords, ) if args.filter_passband is not None: from mvpa2.mappers.filters import iir_filter from scipy.signal import butter, buttord if args.sampling_rate is None or args.filter_stopband is None: raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate") # determine filter type nyquist = args.sampling_rate / 2.0 if len(args.filter_passband) > 1: btype = "bandpass" if not len(args.filter_passband) == len(args.filter_stopband): raise ValueError("passband and stopband specifications have to " "match in size") wp = [v / nyquist for v in args.filter_passband] ws = [v / nyquist for v in args.filter_stopband] elif args.filter_passband[0] < args.filter_stopband[0]: btype = "lowpass" wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist elif args.filter_passband[0] > args.filter_stopband[0]: btype = "highpass" wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist else: raise ValueError("invalid specification of Butterworth filter") # create filter verbose(1, "Spectral filtering (%s)" % (btype,)) try: ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False) b, a = butter(ord, wn, btype=btype) except OverflowError: raise ValueError("cannot contruct Butterworth filter for the given " "specification") ds = iir_filter(ds, b, a) if args.zscore: from mvpa2.mappers.zscore import zscore verbose(1, "Z-score") zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params) verbose(3, "Dataset summary %s" % (ds.summary())) # invariants? if args.strip_invariant_features is not None: from mvpa2.datasets.miscfx import remove_invariant_features ds = remove_invariant_features(ds) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
def plot_scatter( dataXd, mask=None, masked_opacity=0., labels=None, colors=True, dimcolor=1, title=None, limits='auto', thresholds=None, hint_opacity=0.9, x_jitter=None, y_jitter=None, fig=None, ax_scatter=None, ax_hist_x=None, ax_hist_y=None, bp_location='scatter', xlim=None, ylim=None, rasterized=None, uniq=False, include_stats=False, ): """ Parameters ---------- dataXd: array The volumetric (or not) data to plot where first dimension should only have 2 items mask: array, optional Additional mask to specify which values do not consider to plot. By default values with 0s in both dimensions are not plotted. masked_opacity: float, optional By default masked out values are not plotted at all. Value in (0,1] will make them visible with this specified opacity labels: list of str, optional Labels to place for x and y axes colors: bool or string or colormap, optional Either to use colors to associate with physical location and what colormap to use (jet by default if colors=True) dimcolor: int If `colors`, then which dimension (within given 3D volume) to "track" limits: 'auto', 'same', 'per-axis' or (min, max) Limits for axes: when 'auto' if data ranges overlap is more than 50% of the union range, 'same' is considered. When 'same' -- the same limits on both axes as determined by data. If two-element tuple or list is provided, then that range is applied to both axes. hint_opacity: float, optional If `colors` is True, to then a "slice" of the volumetric data is plotted in the specified opacity to hint about the location of points in the original Xd data in `dimcolor` dimension x_jitter: float, optional Half-width of uniform noise added to x values. Might be useful if data is quantized so it is valuable to jitter points a bit. y_jitter: float, optional Half-width of uniform noise added to y values. Might be useful if data is quantized so it is valuable to jitter points a bit fig : Figure, optional Figure to plot on, otherwise new one created ax_*: axes, optional Axes for the scatter plot and histograms. If none of them is specified (which is the default) then 'classical' plot is rendered with histograms above and to the right bp_location: ('scatter', 'hist', None), optional Where to place boxplots depicting data range xlim: tuple, optional ylim: tuple, optional To fix plotted range rasterized: bool, optional Passed to scatter call, to allow rasterization of heavy scatter plots uniq: bool, optional Plot uniq values (those present in one but not in the other) along each axis with crosses include_stats: bool, optional Whether to report additional statistics on the data. Stats are also reported via verbose at level 2 """ if len(dataXd) != 2: raise ValueError("First axis of dataXd can only have two dimensions, " "got {0}".format(len(dataXd))) dataXd = np.asanyarray( dataXd ) # TODO: allow to operate on list of arrays to not waste RAM/cycles data = dataXd.reshape((2, -1)) if dataXd.ndim < 5: ntimepoints = 1 elif dataXd.ndim == 5: ntimepoints = dataXd.shape[-1] else: raise ValueError("Do not know how to handle data with %d dimensions" % (dataXd.ndim - 1)) if x_jitter or y_jitter: data = data.copy() # lazy and wasteful def jitter_me(x, w): x += np.random.uniform(-w, w, size=data.shape[-1]) if x_jitter: jitter_me(data[0, :], x_jitter) if y_jitter: jitter_me(data[1, :], y_jitter) finites = np.isfinite(data) nz = np.logical_and(data != 0, finites) # TODO : avoid doing data !=0 and just use provided utter mask #nz[:, 80000:] = False # for quick testing nzsum = np.sum(nz, axis=0) intersection = nzsum == 2 # for coloring we would need to know all the indices union = nzsum > 0 x, y = datainter = data[:, intersection] if mask is not None: if mask.size * ntimepoints == intersection.size: # we have got a single mask applicable to both x and y pass elif mask.size * ntimepoints == 2 * intersection.size: # we have got a mask per each, let's get an intersection assert mask.shape[0] == 2, "had to get 1 for x, 1 for y" mask = np.logical_and(mask[0], mask[1]) else: raise ValueError( "mask of shape %s. data of shape %s. ntimepoints=%d. " "Teach me how to apply it" % (mask.shape, data.shape, ntimepoints)) # replicate mask ntimepoints times mask = np.repeat(mask.ravel(), ntimepoints)[intersection] != 0 x_masked = x[mask] y_masked = y[mask] xnoty = (nz[0].astype(int) - nz[1].astype(int)) > 0 ynotx = (nz[1].astype(int) - nz[0].astype(int)) > 0 msg = '' if not np.all(finites): msg = " non-finite x: %d, y: %d" % (np.sum(~finites[0]), np.sum(~finites[1])) verbose( 1, "total: %d union: %d%s intersection: %d x_only: %d y_only: %d%s" % (len(nzsum), np.sum(union), mask is not None and ' masked: %d' % np.sum(mask) or '', np.sum(intersection), np.sum(xnoty), np.sum(ynotx), msg)) if include_stats: # report some statistics as well import scipy.stats as ss r, p = ss.pearsonr(x, y) d = np.linalg.norm(x - y) statsline = "r=%.2f p=%.4g ||x-y||=%.4g" % (r, p, d) try: from mvpa2.misc.dcov import dcorcoef nmax = min(1000, len(x)) idx = np.random.permutation(np.arange(len(x)))[:nmax] dcor = dcorcoef(x[idx], y[idx]) dcor_s = '' if len(x) == nmax else '[%d random]' % nmax statsline += ' dcorr%s=%.4g' % (dcor_s, dcor) except ImportError: pass verbose(2, statsline) else: statsline = '' #fig=pl.figure() #pl.plot(datainter[0], datainter[1], '.') #fig.show() nullfmt = pl.NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left + width + 0.02 if not (bool(ax_scatter) or bool(ax_hist_x) or bool(ax_hist_y)): # no custom axes specified # our default setup rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] # start with a rectangular Figure if fig is None: fig = pl.figure(figsize=(10, 10)) ax_scatter = pl.axes(rect_scatter) ax_hist_x = pl.axes(rect_histx) ax_hist_y = pl.axes(rect_histy) else: # check if all not None? # assert(len(axes) == 3) ax_bp_x, ax_bp_y = None, None if ax_scatter is None: raise ValueError("Makes no sense to do not have scatter plot") ax_bp_x = ax_bp_y = None if bp_location is not None: ax_bp_x_parent = ax_bp_y_parent = None if bp_location == 'scatter': # place boxplots into histogram plots ax_bp_x_parent = ax_scatter ax_bp_y_parent = ax_scatter elif bp_location == 'hist': ax_bp_x_parent = ax_hist_x ax_bp_y_parent = ax_hist_y else: raise ValueError( "bp_location needs to be from (None, 'scatter', 'hist')") if ax_bp_x_parent: hist_x_pos = ax_bp_x_parent.get_position() ax_bp_x = pl_axes([ hist_x_pos.x0, hist_x_pos.y0 + hist_x_pos.height * 0.9, hist_x_pos.width, hist_x_pos.height * 0.1 ], facecolor='y') if ax_bp_y_parent: hist_y_pos = ax_bp_y_parent.get_position() ax_bp_y = pl_axes([ hist_y_pos.x0 + hist_y_pos.width * 0.9, hist_y_pos.y0, hist_y_pos.width * 0.1, hist_y_pos.height ], facecolor='y') # ax_bp_y = pl_axes( [left + width * 0.9, bottom, width/10, height], facecolor='y' ) if ax_hist_y else None sc_kwargs = dict(facecolors='none', s=1, rasterized=rasterized) # common kwargs # let's use colormap to get non-boring colors cm = colors # e.g. if it is None if colors is True: cm = pl.matplotlib.cm.get_cmap('jet') elif isinstance(colors, str): cm = pl.matplotlib.cm.get_cmap(colors) if cm and len(dataXd.shape) > dimcolor + 1: cm.set_under((1, 1, 1, 0.1)) # transparent what is not in range # we need to get our indices back for those we are going to plot. probably this is the least efficient way: ndindices_all = np.array(list(np.ndindex(dataXd.shape[1:]))) ndindices_nz = ndindices_all[intersection] # choose color based on dimcolor dimcolor_len = float(dataXd.shape[1 + dimcolor]) edgecolors = cm(((cm.N - 1) * ndindices_nz[:, dimcolor] / dimcolor_len).astype(int)) if mask is not None: # Plot first those which might be masked out if masked_opacity: mask_inv = np.logical_not(mask) mask_edgecolors = edgecolors[mask_inv].copy() # Adjust alpha value mask_edgecolors[:, -1] *= masked_opacity ax_scatter.scatter(x[mask_inv], y[mask_inv], edgecolors=mask_edgecolors, alpha=masked_opacity, **sc_kwargs) # Plot (on top) those which are not masked-out if mask.size: x_plot, y_plot, edgecolors_plot = x[mask], y[mask], edgecolors[ mask] else: # older numpys blow here x_plot, y_plot, edgecolors_plot = (np.array([]), ) * 3 else: # Just plot all of them at once x_plot, y_plot, edgecolors_plot = x, y, edgecolors if len(x_plot): ax_scatter.scatter(x_plot, y_plot, edgecolors=edgecolors_plot, **sc_kwargs) # for orientation we need to plot 1 slice... assume that the last dimension is z -- figure out a slice with max # of non-zeros zdim_entries = ndindices_nz[:, -1] if np.size(zdim_entries): zdim_counts, _ = np.histogram(zdim_entries, bins=np.arange( 0, np.max(zdim_entries) + 1)) zdim_max = np.argmax(zdim_counts) if hint_opacity: # now we need to plot that zdim_max slice taking into account our colormap # create new axes axslice = pl_axes( [left, bottom + height * 0.72, width / 4., height / 5.], facecolor='y') axslice.axis('off') sslice = np.zeros(dataXd.shape[1:3] ) # XXX hardcoded assumption on dimcolor =1 sslice[:, :] = np.arange(dimcolor_len)[None, :] # if there is time dimension -- choose minimal value across all values dataXd_mint = np.min(dataXd, axis=-1) if dataXd.ndim == 5 else dataXd sslice[ dataXd_mint[0, ..., zdim_max] == 0] = -1 # reset those not in the picture to be "under" range axslice.imshow(sslice, alpha=hint_opacity, cmap=cm) else: # the scatter plot without colors to distinguish location ax_scatter.scatter(x, y, **sc_kwargs) if labels: ax_scatter.set_xlabel(labels[0]) ax_scatter.set_ylabel(labels[1]) # "unique" points on each of the axes if uniq: if np.sum(xnoty): ax_scatter.scatter(fill_nonfinites(data[0, np.where(xnoty)[0]]), fill_nonfinites(data[1, np.where(xnoty)[0]]), edgecolor='b', **sc_kwargs) if np.sum(ynotx): ax_scatter.scatter(fill_nonfinites(data[0, np.where(ynotx)[0]]), fill_nonfinites(data[1, np.where(ynotx)[0]]), edgecolor='g', **sc_kwargs) # Axes if np.size(x): ax_scatter.plot((np.min(x), np.max(x)), (0, 0), 'r', alpha=0.5) else: warning("There is nothing to plot, returning early") return pl.gcf() ax_scatter.plot((0, 0), (np.min(y), np.max(y)), 'r', alpha=0.5) if (mask is not None and not masked_opacity and np.sum(mask)): # if there is a non-degenerate mask which was not intended to be plotted, # take those values away while estimating min/max range _ = x[mask] minx, maxx = np.min(_), np.max(_) _ = y[mask] miny, maxy = np.min(_), np.max(_) del _ # no need to consume RAM # print "Here y range", miny, maxy else: minx, maxx = np.min(x), np.max(x) miny, maxy = np.min(y), np.max(y) # Process 'limits' option if isinstance(limits, str): limits = limits.lower() if limits == 'auto': overlap = min(maxx, maxy) - max(minx, miny) range_ = max(maxx, maxy) - min(minx, miny) limits = { True: 'same', False: 'per-axis' }[not range_ or overlap / float(range_) > 0.5] if limits == 'per-axis': same_range = False if xlim is None: # add some white border dx = (maxx - minx) / 20. xlim = (minx - dx, maxx + dx) if ylim is None: dy = (maxy - miny) / 20. ylim = (miny - dy, maxy + dy) elif limits == 'same': same_range = True # assign limits the numerical range limits = (np.min([minx, miny]), np.max([maxx, maxy])) else: raise ValueError("Do not know how to handle same_range=%r" % (limits, )) else: same_range = True # Let's now plot threshold lines if provided if thresholds is not None: stylekwargs = dict(colors='k', linestyles='dotted') if len(thresholds): ax_scatter.vlines(thresholds[0], ax_scatter.get_xlim()[0] * 0.9, ax_scatter.get_xlim()[1] * 0.9, **stylekwargs) if len(thresholds) > 1: ax_scatter.hlines(thresholds[1], ax_scatter.get_ylim()[0] * 0.9, ax_scatter.get_ylim()[1] * 0.9, **stylekwargs) if same_range: # now determine nice limits by hand: binwidthx = binwidthy = binwidth = np.max(datainter) / 51. # 0.25 minxy, maxxy = limits sgn = np.sign(minxy) xyrange = maxxy - minxy xyamax = np.max([np.max(np.fabs(x)), np.max(np.fabs(y))]) limn = sgn * (int(sgn * minxy / binwidth) - sgn) * binwidth limp = (int(maxxy / binwidth) + 1) * binwidth ax_scatter.plot((limn * 0.9, limp * 0.9), (limn * 0.9, limp * 0.9), 'y--') if xlim is None: xlim = (limn, limp) if ylim is None: ylim = (limn, limp) binsx = binsy = bins = np.arange(limn, limp + binwidth, binwidth) else: binwidthx = (maxx - minx) / 51. binwidthy = (maxy - miny) / 51. try: binsx = np.arange(minx, maxx + binwidthx, binwidthx) binsy = np.arange(miny, maxy + binwidthy, binwidthy) except Exception as exc: warning( "Received following exception while trying to get bins for " "minx=%(minx)f maxx=%(maxx)f binwidthx=%(binwidthx)s " "miny=%(miny)f maxy=%(maxy)f binwidthy=%(binwidthy)s: %(exc)s. " "Returning early" % locals()) return pl.gcf() if xlim is not None: ax_scatter.set_xlim(xlim) if ylim is not None: ax_scatter.set_ylim(ylim) # get values to plot for histogram and boxplot x_hist, y_hist = (x, y) if (mask is None or not np.sum(mask)) else (x_masked, y_masked) if np.any(binsx) and ax_hist_x is not None: ax_hist_x.xaxis.set_major_formatter(nullfmt) histx = ax_hist_x.hist(x_hist, bins=binsx, facecolor='b') ax_hist_x.set_xlim(ax_scatter.get_xlim()) ax_hist_x.vlines(0, 0, 0.9 * np.max(histx[0]), 'r') if np.any(binsy) and ax_hist_y is not None: ax_hist_y.yaxis.set_major_formatter(nullfmt) histy = ax_hist_y.hist(y_hist, bins=binsy, orientation='horizontal', facecolor='g') ax_hist_y.set_ylim(ax_scatter.get_ylim()) ax_hist_y.hlines(0, 0, 0.9 * np.max(histy[0]), 'r') rect_scatter = [left, bottom, width, height] # Box plots if ax_bp_x is not None: ax_bp_x.axis('off') bpx = ax_bp_x.boxplot(x_hist, vert=0) #'r', 0) ax_bp_x.set_xlim(ax_scatter.get_xlim()) if ax_bp_y is not None: ax_bp_y.axis('off') bpy = ax_bp_y.boxplot(y_hist, sym='g+') ax_bp_y.set_ylim(ax_scatter.get_ylim()) if statsline: # draw the text based on gca y1, y2 = ax_scatter.get_ylim() x1, x2 = ax_scatter.get_xlim() ax_scatter.text( 0.5 * (x1 + x2), # center y2 - 0.02 * (y2 - y1), statsline, verticalalignment="top", horizontalalignment="center") if title: pl.title(title) return pl.gcf()
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError( 'cross-validation payload requires --learner and --partitioner' ) # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if not args.ds_preproc_fx is None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose( 3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res
# it should be at this location pymvpa_datadbroot = \ cfg.get('datadb', 'root', default=os.path.join(os.curdir, 'datadb')) # # Debugging and optimization # if not __debug__: try: import psyco psyco.profile() except ImportError: from mvpa2.base import verbose verbose(2, "Psyco online compilation is not enabled") else: # Controllable seeding of random number generator from mvpa2.base import debug debug('INIT', 'mvpa') # # RNGs control # from mvpa2._random import _random_seed, seed, get_random_seed # # Testing #