def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # get CV instance cv = get_crossvalidation_instance(args.learner, args.partitioner, args.errorfx, args.sampling_repetitions, args.learner_space, args.balance_training, args.permutations, args.avg_datafold_results, args.prob_tail) res = cv(ds) # some meaningful output # XXX make condition on classification analysis only? print cv.ca.stats print 'Results\n-------' if args.permutations > 0: nprob = cv.ca.null_prob.samples if res.shape[1] == 1: # simple result structure if args.permutations > 0: p = ', p-value (%s tail)' % args.prob_tail else: p = '' print 'Fold, Result%s' % p for i in xrange(len(res)): if args.permutations > 0: p = ', %f' % nprob[i, 0] else: p = '' print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p) # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) if args.permutations > 0: if args.output.endswith('.hdf5'): args.output = args.output[:-5] ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output, compression=args.hdf5_compression) return res
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # get CV instance cv = get_crossvalidation_instance( args.learner, args.partitioner, args.errorfx, args.sampling_repetitions, args.learner_space, args.balance_training, args.permutations, args.avg_datafold_results, args.prob_tail) res = cv(ds) # some meaningful output # XXX make condition on classification analysis only? print cv.ca.stats print 'Results\n-------' if args.permutations > 0: nprob = cv.ca.null_prob.samples if res.shape[1] == 1: # simple result structure if args.permutations > 0: p=', p-value (%s tail)' % args.prob_tail else: p='' print 'Fold, Result%s' % p for i in xrange(len(res)): if args.permutations > 0: p = ', %f' % nprob[i, 0] else: p = '' print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p) # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) if args.permutations > 0: if args.output.endswith('.hdf5'): args.output = args.output[:-5] ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output, compression=args.hdf5_compression) return res
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError('cross-validation payload requires --learner and --partitioner') # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if args.ds_preproc_fx is not None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose(3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError( 'cross-validation payload requires --learner and --partitioner' ) # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if not args.ds_preproc_fx is None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose( 3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res