Python ds2hdf5の例、mvpa2.cmdline.helpers.ds2hdf5 Pythonの例

コード例 #1

0

ファイルを表示

ファイル: cmd_exec.py プロジェクト: Arthurkorn/PyMVPA

def run(args):
    if not args.store is None and args.output is None:
        raise ValueError("--output is require for result storage")
    if not args.data is None:
        dss = [arg2ds(d) for d in args.data]
        if len(dss):
            # convenience short-cut
            ds = dss[0]
    try:
        import nose.tools as nt
    except ImportError:
        pass
    for expr in args.eval:
        if expr == '-':
            exec sys.stdin
        elif os.path.isfile(expr):
            execfile(expr, globals(), locals())
        else:
            exec expr
    if not args.store is None:
        out = {}
        for var in args.store:
            try:
                out[var] = locals()[var]
            except KeyError:
                warning("'%s' not found in local name space -- skipped." % var)
        if len(out):
            ds2hdf5(out, args.output, compression=args.hdf5_compression)

コード例 #2

0

ファイルを表示

ファイル: cmd_exec.py プロジェクト: lydiawawa/Machine-Learning

def run(args):
    if args.store is not None and args.output is None:
        raise ValueError("--output is require for result storage")
    if args.data is not None:
        dss = [arg2ds(d) for d in args.data]
        if len(dss):
            # convenience short-cut
            ds = dss[0]
    try:
        import nose.tools as nt
    except ImportError:
        pass
    for expr in args.eval:
        if expr == '-':
            exec sys.stdin
        elif os.path.isfile(expr):
            execfile(expr, globals(), locals())
        else:
            exec expr
    if args.store is not None:
        out = {}
        for var in args.store:
            try:
                out[var] = locals()[var]
            except KeyError:
                warning("'%s' not found in local name space -- skipped." % var)
        if len(out):
            ds2hdf5(out, args.output, compression=args.hdf5_compression)

コード例 #3

0

ファイルを表示

def run(args):
    dss = hdf2ds(args.data)
    verbose(3, 'Loaded %i dataset(s)' % len(dss))
    ds = vstack(dss)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # slicing
    sliceme = {'samples': slice(None), 'features': slice(None)}
    # indices
    for opt, col, which in ((args.samples_by_index, ds.sa, 'samples'),
                     (args.features_by_index, ds.fa, 'features')):
        if opt is None:
            continue
        if len(opt) == 1 and opt[0].count(':'):
            # slice spec
            arg = opt[0].split(':')
            spec = []
            for a in arg:
                if not len(a):
                    spec.append(None)
                else:
                    spec.append(int(a))
            sliceme[which] = slice(*spec)
        else:
            # actual indices
            sliceme[which] = [int(o) for o in opt]
    # attribute evaluation
    for opt, col, which in ((args.samples_by_attr, ds.sa, 'samples'),
                     (args.features_by_attr, ds.fa, 'features')):
        if opt is None:
            continue
        sliceme[which] = _eval_attr_expr(opt, col)

    # apply selection
    ds = ds.__getitem__((sliceme['samples'], sliceme['features']))
    verbose(1, 'Selected %i samples with %i features' % ds.shape)

    # strip attributes
    for attrarg, col, descr in ((args.strip_sa, ds.sa, 'sample '),
                                (args.strip_fa, ds.fa, 'feature '),
                                (args.strip_da, ds.a, '')):
        if not attrarg is None:
            for attr in attrarg:
                try:
                    del col[attr]
                except KeyError:
                    warning("dataset has no %sattribute '%s' to remove"
                            % (descr, attr))
    # and store
    ds2hdf5(ds, args.output, compression=args.hdf5_compression)
    return ds

コード例 #4

0

ファイルを表示

ファイル: cmd_crossval.py プロジェクト: lydiawawa/Machine-Learning

def run(args):
    dss = hdf2ds(args.data)
    verbose(3, 'Loaded %i dataset(s)' % len(dss))
    ds = vstack(dss)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # get CV instance
    cv = get_crossvalidation_instance(args.learner, args.partitioner,
                                      args.errorfx, args.sampling_repetitions,
                                      args.learner_space,
                                      args.balance_training, args.permutations,
                                      args.avg_datafold_results,
                                      args.prob_tail)
    res = cv(ds)
    # some meaningful output
    # XXX make condition on classification analysis only?
    print cv.ca.stats
    print 'Results\n-------'
    if args.permutations > 0:
        nprob = cv.ca.null_prob.samples
    if res.shape[1] == 1:
        # simple result structure
        if args.permutations > 0:
            p = ', p-value (%s tail)' % args.prob_tail
        else:
            p = ''
        print 'Fold, Result%s' % p
        for i in xrange(len(res)):
            if args.permutations > 0:
                p = ', %f' % nprob[i, 0]
            else:
                p = ''
            print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p)
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    if args.permutations > 0:
        if args.output.endswith('.hdf5'):
            args.output = args.output[:-5]
        ds2hdf5(cv.ca.null_prob,
                '%s_nullprob' % args.output,
                compression=args.hdf5_compression)
    return res

コード例 #5

0

ファイルを表示

ファイル: cmd_crossval.py プロジェクト: Anhmike/PyMVPA

def run(args):
    dss = hdf2ds(args.data)
    verbose(3, 'Loaded %i dataset(s)' % len(dss))
    ds = vstack(dss)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # get CV instance
    cv = get_crossvalidation_instance(
            args.learner, args.partitioner, args.errorfx, args.sampling_repetitions,
            args.learner_space, args.balance_training, args.permutations,
            args.avg_datafold_results, args.prob_tail)
    res = cv(ds)
    # some meaningful output
    # XXX make condition on classification analysis only?
    print cv.ca.stats
    print 'Results\n-------'
    if args.permutations > 0:
        nprob =  cv.ca.null_prob.samples
    if res.shape[1] == 1:
        # simple result structure
        if args.permutations > 0:
            p=', p-value (%s tail)' % args.prob_tail
        else:
            p=''
        print 'Fold, Result%s' % p
        for i in xrange(len(res)):
            if args.permutations > 0:
                p = ', %f' % nprob[i, 0]
            else:
                p = ''
            print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p)
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    if args.permutations > 0:
        if args.output.endswith('.hdf5'):
            args.output = args.output[:-5]
        ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output,
                compression=args.hdf5_compression)
    return res

コード例 #6

0

ファイルを表示

def run(args):
    ds = arg2ds(args.data)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # build list of events
    events = []
    timebased_events = False
    if args.event_attrs is not None:
        def_attrs = dict([(k, ds.sa[k].value) for k in args.event_attrs])
        events = find_events(**def_attrs)
    elif args.csv_events is not None:
        if args.csv_events == '-':
            csv = sys.stdin.read()
            import cStringIO
            csv = cStringIO.StringIO(csv)
        else:
            csv = open(args.csv_events, 'rU')
        csvt = _load_csv_table(csv)
        if not len(csvt):
            raise ValueError("no CSV columns found")
        if args.onset_column:
            csvt['onset'] = csvt[args.onset_column]
        nevents = len(csvt[csvt.keys()[0]])
        events = []
        for ev in xrange(nevents):
            events.append(dict([(k, v[ev]) for k, v in csvt.iteritems()]))
    elif args.onsets is not None:
        if not len(args.onsets):
            args.onsets = [i for i in sys.stdin]
        # time or sample-based?
        if args.time_attr is None:
            oconv = int
        else:
            oconv = float
        events = [{'onset': oconv(o)} for o in args.onsets]
    elif args.fsl_ev3 is not None:
        timebased_events = True
        from mvpa2.misc.fsl import FslEV3
        events = []
        for evsrc in args.fsl_ev3:
            events.extend(FslEV3(evsrc).to_events())
    if not len(events):
        raise ValueError("no events defined")
    verbose(2, 'Extracting %i events' % len(events))
    if args.event_compression is None:
        evmap = None
    elif args.event_compression == 'mean':
        evmap = FxMapper('features', np.mean, attrfx=merge2first)
    elif args.event_compression == 'median':
        evmap = FxMapper('features', np.median, attrfx=merge2first)
    elif args.event_compression == 'min':
        evmap = FxMapper('features', np.min, attrfx=merge2first)
    elif args.event_compression == 'max':
        evmap = FxMapper('features', np.max, attrfx=merge2first)
    # convert to event-related ds
    evds = eventrelated_dataset(ds,
                                events,
                                time_attr=args.time_attr,
                                match=args.match_strategy,
                                event_offset=args.offset,
                                event_duration=args.duration,
                                event_mapper=evmap)
    # act on all attribute options
    evds = process_common_dsattr_opts(evds, args)
    # and store
    ds2hdf5(evds, args.output, compression=args.hdf5_compression)
    return evds

コード例 #7

0

ファイルを表示

ファイル: cmd_mkevds.py プロジェクト: Anhmike/PyMVPA

def run(args):
    ds = arg2ds(args.data)
    verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape)
    # build list of events
    events = []
    timebased_events = False
    if args.event_attrs is not None:
        def_attrs = dict([(k, ds.sa[k].value) for k in args.event_attrs])
        events = find_events(**def_attrs)
    elif args.csv_events is not None:
        if args.csv_events == '-':
            csv = sys.stdin.read()
            import cStringIO
            csv = cStringIO.StringIO(csv)
        else:
            csv = open(args.csv_events, 'rU')
        csvt = _load_csv_table(csv)
        if not len(csvt):
            raise ValueError("no CSV columns found")
        if args.onset_column:
            csvt['onset'] = csvt[args.onset_column]
        nevents = len(csvt[csvt.keys()[0]])
        events = []
        for ev in xrange(nevents):
            events.append(dict([(k, v[ev]) for k, v in csvt.iteritems()]))
    elif args.onsets is not None:
        if not len(args.onsets):
            args.onsets = [i for i in sys.stdin]
        # time or sample-based?
        if args.time_attr is None:
            oconv = int
        else:
            oconv = float
        events = [{'onset': oconv(o)} for o in args.onsets]
    elif args.fsl_ev3 is not None:
        timebased_events = True
        from mvpa2.misc.fsl import FslEV3
        events = []
        for evsrc in args.fsl_ev3:
            events.extend(FslEV3(evsrc).to_events())
    if not len(events):
        raise ValueError("no events defined")
    verbose(2, 'Extracting %i events' % len(events))
    if args.event_compression is None:
        evmap = None
    elif args.event_compression == 'mean':
        evmap = FxMapper('features', np.mean, attrfx=merge2first)
    elif args.event_compression == 'median':
        evmap = FxMapper('features', np.median, attrfx=merge2first)
    elif args.event_compression == 'min':
        evmap = FxMapper('features', np.min, attrfx=merge2first)
    elif args.event_compression == 'max':
        evmap = FxMapper('features', np.max, attrfx=merge2first)
    # convert to event-related ds
    evds = eventrelated_dataset(ds, events, time_attr=args.time_attr,
                                match=args.match_strategy,
                                event_offset=args.offset,
                                event_duration=args.duration,
                                event_mapper=evmap)
    # act on all attribute options
    evds = process_common_dsattr_opts(evds, args)
    # and store
    ds2hdf5(evds, args.output, compression=args.hdf5_compression)
    return evds

コード例 #8

0

ファイルを表示

ファイル: cmd_preproc.py プロジェクト: neurosbh/PyMVPA

def run(args):
    if not args.chunks is None:
        # apply global "chunks" setting
        for cattr in ('detrend_chunks', 'zscore_chunks'):
            if getattr(args, cattr) is None:
                # only overwrite if individual option is not given
                args.__setattr__(cattr, args.chunks)
    ds = arg2ds(args.data)
    if not args.poly_detrend is None:
        if not args.detrend_chunks is None \
           and not args.detrend_chunks in ds.sa:
            raise ValueError(
                "--detrend-chunks attribute '%s' not found in dataset" %
                args.detrend_chunks)
        from mvpa2.mappers.detrend import poly_detrend
        verbose(1, "Detrend")
        poly_detrend(ds,
                     polyord=args.poly_detrend,
                     chunks_attr=args.detrend_chunks,
                     opt_regs=args.detrend_regrs,
                     space=args.detrend_coords)
    if args.filter_passband is not None:
        from mvpa2.mappers.filters import iir_filter
        from scipy.signal import butter, buttord
        if args.sampling_rate is None or args.filter_stopband is None:
            raise ValueError("spectral filtering requires specification of "
                             "--filter-stopband and --sampling-rate")
        # determine filter type
        nyquist = args.sampling_rate / 2.0
        if len(args.filter_passband) > 1:
            btype = 'bandpass'
            if not len(args.filter_passband) == len(args.filter_stopband):
                raise ValueError(
                    "passband and stopband specifications have to "
                    "match in size")
            wp = [v / nyquist for v in args.filter_passband]
            ws = [v / nyquist for v in args.filter_stopband]
        elif args.filter_passband[0] < args.filter_stopband[0]:
            btype = 'lowpass'
            wp = args.filter_passband[0] / nyquist
            ws = args.filter_stopband[0] / nyquist
        elif args.filter_passband[0] > args.filter_stopband[0]:
            btype = 'highpass'
            wp = args.filter_passband[0] / nyquist
            ws = args.filter_stopband[0] / nyquist
        else:
            raise ValueError("invalid specification of Butterworth filter")
        # create filter
        verbose(1, "Spectral filtering (%s)" % (btype, ))
        try:
            ord, wn = buttord(wp,
                              ws,
                              args.filter_passloss,
                              args.filter_stopattenuation,
                              analog=False)
            b, a = butter(ord, wn, btype=btype)
        except OverflowError:
            raise ValueError(
                "cannot contruct Butterworth filter for the given "
                "specification")
        ds = iir_filter(ds, b, a)

    if args.zscore:
        from mvpa2.mappers.zscore import zscore
        verbose(1, "Z-score")
        zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params)
        verbose(3, "Dataset summary %s" % (ds.summary()))
    # invariants?
    if not args.strip_invariant_features is None:
        from mvpa2.datasets.miscfx import remove_invariant_features
        ds = remove_invariant_features(ds)
    # and store
    ds2hdf5(ds, args.output, compression=args.hdf5_compression)
    return ds

コード例 #9

0

ファイルを表示

ファイル: cmd_searchlight.py プロジェクト: Anhmike/PyMVPA

def run(args):
    if os.path.isfile(args.payload) and args.payload.endswith('.py'):
        measure = script2obj(args.payload)
    elif args.payload == 'cv':
        if args.cv_learner is None or args.cv_partitioner is None:
            raise ValueError('cross-validation payload requires --learner and --partitioner')
        # get CV instance
        measure = get_crossvalidation_instance(
                    args.cv_learner, args.cv_partitioner, args.cv_errorfx,
                    args.cv_sampling_repetitions, args.cv_learner_space,
                    args.cv_balance_training, args.cv_permutations,
                    args.cv_avg_datafold_results, args.cv_prob_tail)
    else:
        raise RuntimeError("this should not happen")
    ds = arg2ds(args.data)
    if args.ds_preproc_fx is not None:
        ds = args.ds_preproc_fx(ds)
    # setup neighborhood
    # XXX add big switch to allow for setting up surface-based neighborhoods
    from mvpa2.misc.neighborhood import IndexQueryEngine
    qe = IndexQueryEngine(**dict(args.neighbors))
    # determine ROIs
    rids = None     # all by default
    aggregate_fx = args.aggregate_fx
    if args.roi_attr is not None:
        # first figure out which roi features should be processed
        if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys():
            # name of an attribute -> pull non-zeroes
            rids = ds.fa[args.roi_attr[0]].value.nonzero()[0]
        else:
            # an expression?
            from .cmd_select import _eval_attr_expr
            rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0]

    seed_ids = None
    if args.scatter_rois is not None:
        # scatter_neighborhoods among available ids if was requested
        from mvpa2.misc.neighborhood import scatter_neighborhoods
        attr, nb = args.scatter_rois
        coords = ds.fa[attr].value
        if rids is not None:
            # select only those which were chosen by ROI
            coords = coords[rids]
        _, seed_ids = scatter_neighborhoods(nb, coords)
        if aggregate_fx is None:
            # no custom one given -> use default "fill in" function
            aggregate_fx = _fill_in_scattered_results
            if args.enable_ca is None:
                args.enable_ca = ['roi_feature_ids']
            elif 'roi_feature_ids' not in args.enable_ca:
                args.enable_ca += ['roi_feature_ids']

    if seed_ids is None:
        roi_ids = rids
    else:
        if rids is not None:
            # we had to sub-select by scatterring among available rids
            # so we would need to get original ids
            roi_ids = rids[seed_ids]
        else:
            # scattering happened on entire feature-set
            roi_ids = seed_ids

    verbose(3, 'Attempting %i ROI analyses'
               % ((roi_ids is None) and ds.nfeatures or len(roi_ids)))

    from mvpa2.measures.searchlight import Searchlight

    sl = Searchlight(measure,
                     queryengine=qe,
                     roi_ids=roi_ids,
                     nproc=args.nproc,
                     results_backend=args.multiproc_backend,
                     results_fx=aggregate_fx,
                     enable_ca=args.enable_ca,
                     disable_ca=args.disable_ca)
    # XXX support me too!
    #                 add_center_fa
    #                 tmp_prefix
    #                 nblocks
    #                 null_dist
    # run
    res = sl(ds)
    if (seed_ids is not None) and ('mapper' in res.a):
        # strip the last mapper link in the chain, which would be the seed ID selection
        res.a['mapper'] = res.a.mapper[:-1]
    # XXX create more output
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    return res

コード例 #10

0

ファイルを表示

def run(args):
    if os.path.isfile(args.payload) and args.payload.endswith('.py'):
        measure = script2obj(args.payload)
    elif args.payload == 'cv':
        if args.cv_learner is None or args.cv_partitioner is None:
            raise ValueError(
                'cross-validation payload requires --learner and --partitioner'
            )
        # get CV instance
        measure = get_crossvalidation_instance(
            args.cv_learner, args.cv_partitioner, args.cv_errorfx,
            args.cv_sampling_repetitions, args.cv_learner_space,
            args.cv_balance_training, args.cv_permutations,
            args.cv_avg_datafold_results, args.cv_prob_tail)
    else:
        raise RuntimeError("this should not happen")
    ds = arg2ds(args.data)
    if not args.ds_preproc_fx is None:
        ds = args.ds_preproc_fx(ds)
    # setup neighborhood
    # XXX add big switch to allow for setting up surface-based neighborhoods
    from mvpa2.misc.neighborhood import IndexQueryEngine
    qe = IndexQueryEngine(**dict(args.neighbors))
    # determine ROIs
    rids = None  # all by default
    aggregate_fx = args.aggregate_fx
    if args.roi_attr is not None:
        # first figure out which roi features should be processed
        if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys():
            # name of an attribute -> pull non-zeroes
            rids = ds.fa[args.roi_attr[0]].value.nonzero()[0]
        else:
            # an expression?
            from .cmd_select import _eval_attr_expr
            rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0]

    seed_ids = None
    if args.scatter_rois is not None:
        # scatter_neighborhoods among available ids if was requested
        from mvpa2.misc.neighborhood import scatter_neighborhoods
        attr, nb = args.scatter_rois
        coords = ds.fa[attr].value
        if rids is not None:
            # select only those which were chosen by ROI
            coords = coords[rids]
        _, seed_ids = scatter_neighborhoods(nb, coords)
        if aggregate_fx is None:
            # no custom one given -> use default "fill in" function
            aggregate_fx = _fill_in_scattered_results
            if args.enable_ca is None:
                args.enable_ca = ['roi_feature_ids']
            elif 'roi_feature_ids' not in args.enable_ca:
                args.enable_ca += ['roi_feature_ids']

    if seed_ids is None:
        roi_ids = rids
    else:
        if rids is not None:
            # we had to sub-select by scatterring among available rids
            # so we would need to get original ids
            roi_ids = rids[seed_ids]
        else:
            # scattering happened on entire feature-set
            roi_ids = seed_ids

    verbose(
        3, 'Attempting %i ROI analyses' %
        ((roi_ids is None) and ds.nfeatures or len(roi_ids)))

    from mvpa2.measures.searchlight import Searchlight

    sl = Searchlight(measure,
                     queryengine=qe,
                     roi_ids=roi_ids,
                     nproc=args.nproc,
                     results_backend=args.multiproc_backend,
                     results_fx=aggregate_fx,
                     enable_ca=args.enable_ca,
                     disable_ca=args.disable_ca)
    # XXX support me too!
    #                 add_center_fa
    #                 tmp_prefix
    #                 nblocks
    #                 null_dist
    # run
    res = sl(ds)
    if (seed_ids is not None) and ('mapper' in res.a):
        # strip the last mapper link in the chain, which would be the seed ID selection
        res.a['mapper'] = res.a.mapper[:-1]
    # XXX create more output
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    return res

コード例 #11

0

ファイルを表示

ファイル: cmd_preproc.py プロジェクト: robbisg/PyMVPA

def run(args):
    if args.chunks is not None:
        # apply global "chunks" setting
        for cattr in ("detrend_chunks", "zscore_chunks"):
            if getattr(args, cattr) is None:
                # only overwrite if individual option is not given
                args.__setattr__(cattr, args.chunks)
    ds = arg2ds(args.data)
    if args.poly_detrend is not None:
        if args.detrend_chunks is not None and not args.detrend_chunks in ds.sa:
            raise ValueError("--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks)
        from mvpa2.mappers.detrend import poly_detrend

        verbose(1, "Detrend")
        poly_detrend(
            ds,
            polyord=args.poly_detrend,
            chunks_attr=args.detrend_chunks,
            opt_regs=args.detrend_regrs,
            space=args.detrend_coords,
        )
    if args.filter_passband is not None:
        from mvpa2.mappers.filters import iir_filter
        from scipy.signal import butter, buttord

        if args.sampling_rate is None or args.filter_stopband is None:
            raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate")
        # determine filter type
        nyquist = args.sampling_rate / 2.0
        if len(args.filter_passband) > 1:
            btype = "bandpass"
            if not len(args.filter_passband) == len(args.filter_stopband):
                raise ValueError("passband and stopband specifications have to " "match in size")
            wp = [v / nyquist for v in args.filter_passband]
            ws = [v / nyquist for v in args.filter_stopband]
        elif args.filter_passband[0] < args.filter_stopband[0]:
            btype = "lowpass"
            wp = args.filter_passband[0] / nyquist
            ws = args.filter_stopband[0] / nyquist
        elif args.filter_passband[0] > args.filter_stopband[0]:
            btype = "highpass"
            wp = args.filter_passband[0] / nyquist
            ws = args.filter_stopband[0] / nyquist
        else:
            raise ValueError("invalid specification of Butterworth filter")
        # create filter
        verbose(1, "Spectral filtering (%s)" % (btype,))
        try:
            ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False)
            b, a = butter(ord, wn, btype=btype)
        except OverflowError:
            raise ValueError("cannot contruct Butterworth filter for the given " "specification")
        ds = iir_filter(ds, b, a)

    if args.zscore:
        from mvpa2.mappers.zscore import zscore

        verbose(1, "Z-score")
        zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params)
        verbose(3, "Dataset summary %s" % (ds.summary()))
    # invariants?
    if args.strip_invariant_features is not None:
        from mvpa2.datasets.miscfx import remove_invariant_features

        ds = remove_invariant_features(ds)
    # and store
    ds2hdf5(ds, args.output, compression=args.hdf5_compression)
    return ds