Python get_crossvalidation_instance Examples, mvpa2.cmdline.helpers.get_crossvalidation_instance Python Examples

Example #1

0

Show file

File: cmd_crossval.py Project: lydiawawa/Machine-Learning

def run(args):
    dss = hdf2ds(args.data)
    verbose(3, 'Loaded %i dataset(s)' % len(dss))
    ds = vstack(dss)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # get CV instance
    cv = get_crossvalidation_instance(args.learner, args.partitioner,
                                      args.errorfx, args.sampling_repetitions,
                                      args.learner_space,
                                      args.balance_training, args.permutations,
                                      args.avg_datafold_results,
                                      args.prob_tail)
    res = cv(ds)
    # some meaningful output
    # XXX make condition on classification analysis only?
    print cv.ca.stats
    print 'Results\n-------'
    if args.permutations > 0:
        nprob = cv.ca.null_prob.samples
    if res.shape[1] == 1:
        # simple result structure
        if args.permutations > 0:
            p = ', p-value (%s tail)' % args.prob_tail
        else:
            p = ''
        print 'Fold, Result%s' % p
        for i in xrange(len(res)):
            if args.permutations > 0:
                p = ', %f' % nprob[i, 0]
            else:
                p = ''
            print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p)
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    if args.permutations > 0:
        if args.output.endswith('.hdf5'):
            args.output = args.output[:-5]
        ds2hdf5(cv.ca.null_prob,
                '%s_nullprob' % args.output,
                compression=args.hdf5_compression)
    return res

Example #2

0

Show file

File: cmd_crossval.py Project: Anhmike/PyMVPA

def run(args):
    dss = hdf2ds(args.data)
    verbose(3, 'Loaded %i dataset(s)' % len(dss))
    ds = vstack(dss)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # get CV instance
    cv = get_crossvalidation_instance(
            args.learner, args.partitioner, args.errorfx, args.sampling_repetitions,
            args.learner_space, args.balance_training, args.permutations,
            args.avg_datafold_results, args.prob_tail)
    res = cv(ds)
    # some meaningful output
    # XXX make condition on classification analysis only?
    print cv.ca.stats
    print 'Results\n-------'
    if args.permutations > 0:
        nprob =  cv.ca.null_prob.samples
    if res.shape[1] == 1:
        # simple result structure
        if args.permutations > 0:
            p=', p-value (%s tail)' % args.prob_tail
        else:
            p=''
        print 'Fold, Result%s' % p
        for i in xrange(len(res)):
            if args.permutations > 0:
                p = ', %f' % nprob[i, 0]
            else:
                p = ''
            print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p)
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    if args.permutations > 0:
        if args.output.endswith('.hdf5'):
            args.output = args.output[:-5]
        ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output,
                compression=args.hdf5_compression)
    return res

Example #3

0

Show file

File: cmd_searchlight.py Project: Anhmike/PyMVPA

def run(args):
    if os.path.isfile(args.payload) and args.payload.endswith('.py'):
        measure = script2obj(args.payload)
    elif args.payload == 'cv':
        if args.cv_learner is None or args.cv_partitioner is None:
            raise ValueError('cross-validation payload requires --learner and --partitioner')
        # get CV instance
        measure = get_crossvalidation_instance(
                    args.cv_learner, args.cv_partitioner, args.cv_errorfx,
                    args.cv_sampling_repetitions, args.cv_learner_space,
                    args.cv_balance_training, args.cv_permutations,
                    args.cv_avg_datafold_results, args.cv_prob_tail)
    else:
        raise RuntimeError("this should not happen")
    ds = arg2ds(args.data)
    if args.ds_preproc_fx is not None:
        ds = args.ds_preproc_fx(ds)
    # setup neighborhood
    # XXX add big switch to allow for setting up surface-based neighborhoods
    from mvpa2.misc.neighborhood import IndexQueryEngine
    qe = IndexQueryEngine(**dict(args.neighbors))
    # determine ROIs
    rids = None     # all by default
    aggregate_fx = args.aggregate_fx
    if args.roi_attr is not None:
        # first figure out which roi features should be processed
        if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys():
            # name of an attribute -> pull non-zeroes
            rids = ds.fa[args.roi_attr[0]].value.nonzero()[0]
        else:
            # an expression?
            from .cmd_select import _eval_attr_expr
            rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0]

    seed_ids = None
    if args.scatter_rois is not None:
        # scatter_neighborhoods among available ids if was requested
        from mvpa2.misc.neighborhood import scatter_neighborhoods
        attr, nb = args.scatter_rois
        coords = ds.fa[attr].value
        if rids is not None:
            # select only those which were chosen by ROI
            coords = coords[rids]
        _, seed_ids = scatter_neighborhoods(nb, coords)
        if aggregate_fx is None:
            # no custom one given -> use default "fill in" function
            aggregate_fx = _fill_in_scattered_results
            if args.enable_ca is None:
                args.enable_ca = ['roi_feature_ids']
            elif 'roi_feature_ids' not in args.enable_ca:
                args.enable_ca += ['roi_feature_ids']

    if seed_ids is None:
        roi_ids = rids
    else:
        if rids is not None:
            # we had to sub-select by scatterring among available rids
            # so we would need to get original ids
            roi_ids = rids[seed_ids]
        else:
            # scattering happened on entire feature-set
            roi_ids = seed_ids

    verbose(3, 'Attempting %i ROI analyses'
               % ((roi_ids is None) and ds.nfeatures or len(roi_ids)))

    from mvpa2.measures.searchlight import Searchlight

    sl = Searchlight(measure,
                     queryengine=qe,
                     roi_ids=roi_ids,
                     nproc=args.nproc,
                     results_backend=args.multiproc_backend,
                     results_fx=aggregate_fx,
                     enable_ca=args.enable_ca,
                     disable_ca=args.disable_ca)
    # XXX support me too!
    #                 add_center_fa
    #                 tmp_prefix
    #                 nblocks
    #                 null_dist
    # run
    res = sl(ds)
    if (seed_ids is not None) and ('mapper' in res.a):
        # strip the last mapper link in the chain, which would be the seed ID selection
        res.a['mapper'] = res.a.mapper[:-1]
    # XXX create more output
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    return res

Example #4

0

Show file

def run(args):
    if os.path.isfile(args.payload) and args.payload.endswith('.py'):
        measure = script2obj(args.payload)
    elif args.payload == 'cv':
        if args.cv_learner is None or args.cv_partitioner is None:
            raise ValueError(
                'cross-validation payload requires --learner and --partitioner'
            )
        # get CV instance
        measure = get_crossvalidation_instance(
            args.cv_learner, args.cv_partitioner, args.cv_errorfx,
            args.cv_sampling_repetitions, args.cv_learner_space,
            args.cv_balance_training, args.cv_permutations,
            args.cv_avg_datafold_results, args.cv_prob_tail)
    else:
        raise RuntimeError("this should not happen")
    ds = arg2ds(args.data)
    if not args.ds_preproc_fx is None:
        ds = args.ds_preproc_fx(ds)
    # setup neighborhood
    # XXX add big switch to allow for setting up surface-based neighborhoods
    from mvpa2.misc.neighborhood import IndexQueryEngine
    qe = IndexQueryEngine(**dict(args.neighbors))
    # determine ROIs
    rids = None  # all by default
    aggregate_fx = args.aggregate_fx
    if args.roi_attr is not None:
        # first figure out which roi features should be processed
        if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys():
            # name of an attribute -> pull non-zeroes
            rids = ds.fa[args.roi_attr[0]].value.nonzero()[0]
        else:
            # an expression?
            from .cmd_select import _eval_attr_expr
            rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0]

    seed_ids = None
    if args.scatter_rois is not None:
        # scatter_neighborhoods among available ids if was requested
        from mvpa2.misc.neighborhood import scatter_neighborhoods
        attr, nb = args.scatter_rois
        coords = ds.fa[attr].value
        if rids is not None:
            # select only those which were chosen by ROI
            coords = coords[rids]
        _, seed_ids = scatter_neighborhoods(nb, coords)
        if aggregate_fx is None:
            # no custom one given -> use default "fill in" function
            aggregate_fx = _fill_in_scattered_results
            if args.enable_ca is None:
                args.enable_ca = ['roi_feature_ids']
            elif 'roi_feature_ids' not in args.enable_ca:
                args.enable_ca += ['roi_feature_ids']

    if seed_ids is None:
        roi_ids = rids
    else:
        if rids is not None:
            # we had to sub-select by scatterring among available rids
            # so we would need to get original ids
            roi_ids = rids[seed_ids]
        else:
            # scattering happened on entire feature-set
            roi_ids = seed_ids

    verbose(
        3, 'Attempting %i ROI analyses' %
        ((roi_ids is None) and ds.nfeatures or len(roi_ids)))

    from mvpa2.measures.searchlight import Searchlight

    sl = Searchlight(measure,
                     queryengine=qe,
                     roi_ids=roi_ids,
                     nproc=args.nproc,
                     results_backend=args.multiproc_backend,
                     results_fx=aggregate_fx,
                     enable_ca=args.enable_ca,
                     disable_ca=args.disable_ca)
    # XXX support me too!
    #                 add_center_fa
    #                 tmp_prefix
    #                 nblocks
    #                 null_dist
    # run
    res = sl(ds)
    if (seed_ids is not None) and ('mapper' in res.a):
        # strip the last mapper link in the chain, which would be the seed ID selection
        res.a['mapper'] = res.a.mapper[:-1]
    # XXX create more output
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    return res