def create_betas_per_trial_with_pymvpa_roni(study_path, subj, conf, mask_name, flavor, TR): dhandle = OpenFMRIDataset(study_path) model = 1 task = 1 # Do this for other tasks as well. not only the first mask_fname = _opj(study_path, "sub{:0>3d}".format(subj), "masks", conf.mvpa_tasks[0], "{}.nii.gz".format(mask_name)) print mask_fname run_datasets = [] for run_id in dhandle.get_task_bold_run_ids(task)[subj]: if type(run_id) == str: continue # all_events = dhandle.get_bold_run_model(model, subj, run_id) all_events = get_bold_run_model(dhandle, 2, subj, run_id) run_events = [] i = 0 for event in all_events: if event["task"] == task: event["condition"] = "{}-{}".format(event["condition"], event["id"]) run_events.append(event) i += 1 # load BOLD data for this run (with masking); add 0-based chunk ID run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, flavor=flavor, chunks=run_id - 1, mask=mask_fname) # convert event info into a sample attribute and assign as 'targets' run_ds.sa.time_coords = run_ds.sa.time_indices * TR run_ds.sa["targets"] = events2sample_attr(run_events, run_ds.sa.time_coords, noinfolabel="rest") # additional time series preprocessing can go here poly_detrend(run_ds, polyord=1, chunks_attr="chunks") zscore(run_ds, chunks_attr="chunks", param_est=("targets", ["rest"]), dtype="float32") glm_dataset = fit_event_hrf_model(run_ds, run_events, time_attr="time_coords", condition_attr="condition") glm_dataset.sa["targets"] = [x[: x.find("-")] for x in glm_dataset.sa.condition] glm_dataset.sa["id"] = [x[x.find("-") + 1 :] for x in glm_dataset.sa.condition] glm_dataset.sa.condition = glm_dataset.sa["targets"] glm_dataset.sa["chunks"] = [run_id - 1] * len(glm_dataset.samples) # If a trial was dropped (the subject pressed on a button) than the counter trial from the # other condition should also be dropped for pair in conf.conditions_to_compare: cond_bool = np.array([c in pair for c in glm_dataset.sa["condition"]]) sub_dataset = glm_dataset[cond_bool] c = Counter(sub_dataset.sa.id) for value in c: if c[value] < 2: id_bool = np.array([value in cond_id for cond_id in glm_dataset.sa["id"]]) glm_dataset = glm_dataset[np.bitwise_not(np.logical_and(id_bool, cond_bool))] run_datasets.append(glm_dataset) return vstack(run_datasets, 0)
def testsg(ds, w, p, voxIdx, c='chunks'): import matplotlib.pyplot as plt import numpy as np from scipy.signal import savgol_filter from mvpa2.mappers.detrend import poly_detrend import SavGolFilter as sg poly0 = ds.copy(deep=False) poly1 = ds.copy(deep=False) poly2 = ds.copy(deep=False) t = np.arange(ds.shape[0]) poly_detrend(poly0, polyord=0, chunks_attr=c) sgFilt = ds.copy(deep=False) manualSet = ds.copy(deep=False) manual = getSGDrift(manualSet, w, p) # filterMe = filterMat[:,voxIdx] # manual = savgol_filter(filterMe, w, p, axis=0) # AXIS SHOULD BE 0 # manualMat = savgol_filter(filterMat, w, p, axis=0) poly_detrend(poly1, polyord=1, chunks_attr=c) poly_detrend(poly2, polyord=2, chunks_attr=c) # sgFilt = savgol_filter(sgFilt, w, p, axis=0) sg.sg_filter(sgFilt, window_length=w, polyorder=p, chunks_attr=c, axis=0) plt.plot(t, poly0.samples[:, voxIdx], 'k', label='demean') plt.plot(t, poly1.samples[:, voxIdx], 'b+', label='linear') plt.plot(t, poly2.samples[:, voxIdx], 'b*', label='quadratic') plt.plot(t, sgFilt.samples[:, voxIdx], 'go', label='sg') plt.plot(t, manual[:, voxIdx], 'r+', label='sg estimate') plt.legend(loc='upper left') plt.show()
def detrend(ds): #print ds.summary() ds.samples = ds.samples.astype('float') pl.figure() pl.subplot(221) plot_samples_distance(ds, sortbyattr='chunks') #plot_samples_distance(ds) pl.title('Sample distances (sorted by chunks)') poly_detrend(ds, polyord=2, chunks_attr='chunks') pl.subplot(222) plot_samples_distance(ds, sortbyattr='chunks') pl.show() zscore(ds, chunks_attr='chunks', dtype='float32') pl.subplot(223) plot_samples_distance(ds, sortbyattr='chunks') pl.subplot(224) # plot_samples_distance(ds, sortbyattr='targets') pl.title('Sample distances (sorted by condition)') pl.show() #poly_detrend(ds, polyord=1, chunks_attr='chunks') #zscore(ds, chunks_attr='chunks', dtype='float32') return ds
def create_betas_per_trial_with_pymvpa(study_path, subj, conf, mask_name, flavor, TR): dhandle = OpenFMRIDataset(study_path) model = 1 task = 1 # Do this for other tasks as well. not only the first mask_fname = _opj(study_path, "sub{:0>3d}".format(subj), "masks", conf.mvpa_tasks[0], "{}.nii.gz".format(mask_name)) print mask_fname run_datasets = [] for run_id in dhandle.get_task_bold_run_ids(task)[subj]: if type(run_id) == str: continue all_events = dhandle.get_bold_run_model(model, subj, run_id) run_events = [] i = 0 for event in all_events: if event["task"] == task: event["condition"] = "{}-{}".format(event["condition"], i) run_events.append(event) i += 1 # load BOLD data for this run (with masking); add 0-based chunk ID run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, flavor=flavor, chunks=run_id - 1, mask=mask_fname) # convert event info into a sample attribute and assign as 'targets' run_ds.sa.time_coords = run_ds.sa.time_indices * TR print run_id run_ds.sa["targets"] = events2sample_attr(run_events, run_ds.sa.time_coords, noinfolabel="rest") # additional time series preprocessing can go here poly_detrend(run_ds, polyord=1, chunks_attr="chunks") zscore(run_ds, chunks_attr="chunks", param_est=("targets", ["rest"]), dtype="float32") glm_dataset = fit_event_hrf_model(run_ds, run_events, time_attr="time_coords", condition_attr="condition") glm_dataset.sa["targets"] = [x[: x.find("-")] for x in glm_dataset.sa.condition] glm_dataset.sa.condition = glm_dataset.sa["targets"] glm_dataset.sa["chunks"] = [run_id - 1] * len(glm_dataset.samples) run_datasets.append(glm_dataset) return vstack(run_datasets, 0)
""" # load dataset -- ventral and occipital ROIs from mvpa2.datasets.sources.native import load_tutorial_data datapath = pjoin(cfg.get('location', 'tutorial data'), 'haxby2001') ds = load_tutorial_data(roi=(15, 16, 23, 24, 36, 38, 39, 40, 48)) """ We only do minimal pre-processing: linear trend removal and Z-scoring all voxel time-series with respect to the mean and standard deviation of the "rest" condition. """ # only minial detrending from mvpa2.mappers.detrend import poly_detrend poly_detrend(ds, polyord=1, chunks_attr='chunks') # z-scoring with respect to the 'rest' condition from mvpa2.mappers.zscore import zscore zscore(ds, chunks_attr='chunks', param_est=('targets', 'rest')) # now remove 'rest' samples ds = ds[ds.sa.targets != 'rest'] """ RSA is all about so-called dissimilarity matrices: square, symetric matrices with a zero diagonal that encode the (dis)similarity between all pairs of data samples or conditions in a dataset. We compose a little helper function to plot such matrices, including a color-scale and proper labeling of matrix rows and columns. """ # little helper function to plot dissimilarity matrices
def detrend(ds): poly_detrend(ds, polyord=1, chunks_attr='chunks') zscore(ds, chunks_attr='chunks', dtype='float32') return ds
def run(args): if not args.chunks is None: # apply global "chunks" setting for cattr in ('detrend_chunks', 'zscore_chunks'): if getattr(args, cattr) is None: # only overwrite if individual option is not given args.__setattr__(cattr, args.chunks) ds = arg2ds(args.data) if not args.poly_detrend is None: if not args.detrend_chunks is None \ and not args.detrend_chunks in ds.sa: raise ValueError( "--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks) from mvpa2.mappers.detrend import poly_detrend verbose(1, "Detrend") poly_detrend(ds, polyord=args.poly_detrend, chunks_attr=args.detrend_chunks, opt_regs=args.detrend_regrs, space=args.detrend_coords) if args.filter_passband is not None: from mvpa2.mappers.filters import iir_filter from scipy.signal import butter, buttord if args.sampling_rate is None or args.filter_stopband is None: raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate") # determine filter type nyquist = args.sampling_rate / 2.0 if len(args.filter_passband) > 1: btype = 'bandpass' if not len(args.filter_passband) == len(args.filter_stopband): raise ValueError( "passband and stopband specifications have to " "match in size") wp = [v / nyquist for v in args.filter_passband] ws = [v / nyquist for v in args.filter_stopband] elif args.filter_passband[0] < args.filter_stopband[0]: btype = 'lowpass' wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist elif args.filter_passband[0] > args.filter_stopband[0]: btype = 'highpass' wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist else: raise ValueError("invalid specification of Butterworth filter") # create filter verbose(1, "Spectral filtering (%s)" % (btype, )) try: ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False) b, a = butter(ord, wn, btype=btype) except OverflowError: raise ValueError( "cannot contruct Butterworth filter for the given " "specification") ds = iir_filter(ds, b, a) if args.zscore: from mvpa2.mappers.zscore import zscore verbose(1, "Z-score") zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params) verbose(3, "Dataset summary %s" % (ds.summary())) # invariants? if not args.strip_invariant_features is None: from mvpa2.datasets.miscfx import remove_invariant_features ds = remove_invariant_features(ds) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
def test_polydetrend(): samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2 ).T samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2 ).T chunks = [0, 0, 0, 1, 1, 1] chunks_bad = [ 0, 0, 1, 1, 1, 0] target_whole = np.array( [[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]], ndmin=2 ).T target_chunked = np.array( [[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]], ndmin=2 ).T ds = Dataset(samples_forwhole) # this one will auto-train the mapper on first use dm = PolyDetrendMapper(polyord=1, space='police') mds = dm.forward(ds) # features are linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials assert_array_equal(mds.sa.police, np.arange(len(ds))) # hackish way to get the previous regressors into a dataset ds.sa['opt_reg_const'] = dm._regs[:,0] ds.sa['opt_reg_lin'] = dm._regs[:,1] # using these precomputed regressors, we should get the same result as # before even if we do not generate a regressor for linear dm_optreg = PolyDetrendMapper(polyord=0, opt_regs=['opt_reg_const', 'opt_reg_lin']) mds_optreg = dm_optreg.forward(ds) assert_array_almost_equal(mds_optreg, np.zeros(mds.shape)) ds = Dataset(samples_forchunks) # 'constant' detrending removes the mean mds = PolyDetrendMapper(polyord=0).forward(ds) assert_array_almost_equal( mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0)) # if there is no GLOBAL linear trend it should be identical to mean removal # even if trying to remove linear mds2 = PolyDetrendMapper(polyord=1).forward(ds) assert_array_almost_equal(mds, mds2) # chunk-wise detrending ds = dataset_wizard(samples_forchunks, chunks=chunks) dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='police') mds = dm.forward(ds) # features are chunkswise linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials, which is the identical linspace in both # chunks assert_array_equal(mds.sa.police, range(3) * 2) # non-matching number of samples cannot be mapped assert_raises(ValueError, dm.forward, ds[:-1]) # however, if the dataset knows about the space it is possible ds.sa['police'] = mds.sa.police # XXX this should be #mds2 = dm(ds[1:-1]) #assert_array_equal(mds[1:-1], mds2) # XXX but right now is assert_raises(NotImplementedError, dm.forward, ds[1:-1]) # Detrend must preserve the size of dataset assert_equal(mds.shape, ds.shape) # small additional test for break points # although they are no longer there ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T, targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1).forward(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # test of different polyord on each chunk target_mixed = np.array( [[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]], ndmin=2 ).T ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0,1]).forward(ds) assert_array_almost_equal(mds, target_mixed) # test irregluar spacing of samples, but with corrective time info samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2 ).T ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:,0]}) # linear detrending that makes use of temporal info from dataset dm = PolyDetrendMapper(polyord=1, space='time') mds = dm.forward(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # and now the same stuff, but with chunking and ordered by time samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2 ).T chunks = [0, 1, 0, 1, 0, 1] time = [4, 4, 12, 8, 8, 12] ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time}) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='time').forward(ds) # the whole thing must not affect the source data assert_array_equal(ds, samples_forchunks) # but if done inplace that is no longer true poly_detrend(ds, chunks_attr='chunks', polyord=1, space='time') assert_array_equal(ds, mds)
def test_voxel_selection(self): """Compare surface and volume based searchlight""" """ Tests to see whether results are identical for surface-based searchlight (just one plane; Euclidean distnace) and volume-based searchlight. Note that the current value is a float; if it were int, it would specify the number of voxels in each searchlight""" radius = 10.0 """Define input filenames""" epi_fn = pathjoin(pymvpa_dataroot, "bold.nii.gz") maskfn = pathjoin(pymvpa_dataroot, "mask.nii.gz") """ Use the EPI datafile to define a surface. The surface has as many nodes as there are voxels and is parallel to the volume 'slice' """ vg = volgeom.from_any(maskfn, mask_volume=True) aff = vg.affine nx, ny, nz = vg.shape[:3] """Plane goes in x and y direction, so we take these vectors from the affine transformation matrix of the volume""" plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny) """ Simulate pial and white matter as just above and below the central plane """ normal_vec = aff[:3, 2] outer = plane + normal_vec inner = plane + -normal_vec """ Combine volume and surface information """ vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner) """ Run voxel selection with specified radius (in mm), using Euclidean distance measure """ surf_voxsel = surf_voxel_selection.voxel_selection(vsm, radius, distance_metric="e") """Define the measure""" # run_slow=True would give an actual cross-validation with meaningful # accuracies. Because this is a unit-test only the number of voxels # in each searchlight is tested. run_slow = False if run_slow: meas = CrossValidation(GNB(), OddEvenPartitioner(), errorfx=lambda p, t: np.mean(p == t)) postproc = mean_sample else: meas = _Voxel_Count_Measure() postproc = lambda x: x """ Surface analysis: define the query engine, cross validation, and searchlight """ surf_qe = SurfaceVerticesQueryEngine(surf_voxsel) surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc) """ new (Sep 2012): also test 'simple' queryengine wrapper function """ surf_qe2 = disc_surface_queryengine( radius, maskfn, inner, outer, plane, volume_mask=True, distance_metric="euclidean" ) surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc) """ Same for the volume analysis """ element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2]))) sph = Sphere(radius, element_sizes=element_sizes) kwa = {"voxel_indices": sph} vol_qe = IndexQueryEngine(**kwa) vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc) """The following steps are similar to start_easy.py""" attr = SampleAttributes(pathjoin(pymvpa_dataroot, "attributes_literal.txt")) mask = surf_voxsel.get_mask() dataset = fmri_dataset( samples=pathjoin(pymvpa_dataroot, "bold.nii.gz"), targets=attr.targets, chunks=attr.chunks, mask=mask ) if run_slow: # do chunkswise linear detrending on dataset poly_detrend(dataset, polyord=1, chunks_attr="chunks") # zscore dataset relative to baseline ('rest') mean zscore(dataset, chunks_attr="chunks", param_est=("targets", ["rest"])) # select class face and house for this demo analysis # would work with full datasets (just a little slower) dataset = dataset[np.array([l in ["face", "house"] for l in dataset.sa.targets], dtype="bool")] """Apply searchlight to datasets""" surf_dset = surf_sl(dataset) surf_dset2 = surf_sl2(dataset) vol_dset = vol_sl(dataset) surf_data = surf_dset.samples surf_data2 = surf_dset2.samples vol_data = vol_dset.samples assert_array_equal(surf_data, surf_data2) assert_array_equal(surf_data, vol_data)
""" # load dataset -- ventral and occipital ROIs from mvpa2.datasets.sources.native import load_tutorial_data #'/home/lab/Desktop/PyMVPA-master/mvpa2/data/' #datapath = '/usr/lib/python2.7/dist-packages/mvpa2/data/haxby2001' datapath = pjoin(cfg.get('location', 'tutorial data'), 'haxby2001') ds = load_tutorial_data(path = '/usr/lib/python2.7/dist-packages/mvpa2/data',roi=(15, 16, 23, 24, 36, 38, 39, 40, 48)) """ We only do minimal pre-processing: linear trend removal and Z-scoring all voxel time-series with respect to the mean and standard deviation of the “rest” condition. """ # only minimal detrending from mvpa2.mappers.detrend import poly_detrend poly_detrend(ds, polyord=1, chunks_attr='chunks') # z-scoring with respect to the 'rest' condition from mvpa2.mappers.zscore import zscore zscore(ds, chunks_attr='chunks', param_est=('targets', 'rest')) # now remove 'rest' samples ds = ds[ds.sa.targets != 'rest'] """ RSA is all about so-called dissimilarity matrices: square, symmetric matrices with a zero diagonal that encode the (dis)similarity between all pairs of data samples or conditions in a dataset. We compose a little helper function to plot such matrices, including a color-scale and proper labeling of matrix rows and columns. """ # little helper function to plot dissimilarity matrices # since we are using correlation-distance, we use colorbar range of [0,2] def plot_mtx(mtx, labels, title): pl.figure() pl.imshow(mtx, interpolation='nearest')
def test_voxel_selection(self): '''Compare surface and volume based searchlight''' ''' Tests to see whether results are identical for surface-based searchlight (just one plane; Euclidean distnace) and volume-based searchlight. Note that the current value is a float; if it were int, it would specify the number of voxels in each searchlight''' radius = 10. '''Define input filenames''' epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz') maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz') ''' Use the EPI datafile to define a surface. The surface has as many nodes as there are voxels and is parallel to the volume 'slice' ''' vg = volgeom.from_any(maskfn, mask_volume=True) aff = vg.affine nx, ny, nz = vg.shape[:3] '''Plane goes in x and y direction, so we take these vectors from the affine transformation matrix of the volume''' plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny) ''' Simulate pial and white matter as just above and below the central plane ''' normal_vec = aff[:3, 2] outer = plane + normal_vec inner = plane + -normal_vec ''' Combine volume and surface information ''' vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner) ''' Run voxel selection with specified radius (in mm), using Euclidean distance measure ''' surf_voxsel = surf_voxel_selection.voxel_selection(vsm, radius, distance_metric='e') '''Define the measure''' # run_slow=True would give an actual cross-validation with meaningful # accuracies. Because this is a unit-test only the number of voxels # in each searchlight is tested. run_slow = False if run_slow: meas = CrossValidation(GNB(), OddEvenPartitioner(), errorfx=lambda p, t: np.mean(p == t)) postproc = mean_sample else: meas = _Voxel_Count_Measure() postproc = lambda x: x ''' Surface analysis: define the query engine, cross validation, and searchlight ''' surf_qe = SurfaceVerticesQueryEngine(surf_voxsel) surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc) ''' new (Sep 2012): also test 'simple' queryengine wrapper function ''' surf_qe2 = disc_surface_queryengine(radius, maskfn, inner, outer, plane, volume_mask=True, distance_metric='euclidean') surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc) ''' Same for the volume analysis ''' element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2]))) sph = Sphere(radius, element_sizes=element_sizes) kwa = {'voxel_indices': sph} vol_qe = IndexQueryEngine(**kwa) vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc) '''The following steps are similar to start_easy.py''' attr = SampleAttributes( os.path.join(pymvpa_dataroot, 'attributes_literal.txt')) mask = surf_voxsel.get_mask() dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot, 'bold.nii.gz'), targets=attr.targets, chunks=attr.chunks, mask=mask) if run_slow: # do chunkswise linear detrending on dataset poly_detrend(dataset, polyord=1, chunks_attr='chunks') # zscore dataset relative to baseline ('rest') mean zscore(dataset, chunks_attr='chunks', param_est=('targets', ['rest'])) # select class face and house for this demo analysis # would work with full datasets (just a little slower) dataset = dataset[np.array( [l in ['face', 'house'] for l in dataset.sa.targets], dtype='bool')] '''Apply searchlight to datasets''' surf_dset = surf_sl(dataset) surf_dset2 = surf_sl2(dataset) vol_dset = vol_sl(dataset) surf_data = surf_dset.samples surf_data2 = surf_dset2.samples vol_data = vol_dset.samples assert_array_equal(surf_data, surf_data2) assert_array_equal(surf_data, vol_data)
def test_polydetrend(): samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2).T samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2).T chunks = [0, 0, 0, 1, 1, 1] chunks_bad = [0, 0, 1, 1, 1, 0] target_whole = np.array([[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]], ndmin=2).T target_chunked = np.array([[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]], ndmin=2).T ds = Dataset(samples_forwhole) # this one will auto-train the mapper on first use dm = PolyDetrendMapper(polyord=1, space='police') mds = dm.forward(ds) # features are linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials assert_array_equal(mds.sa.police, np.arange(len(ds))) # hackish way to get the previous regressors into a dataset ds.sa['opt_reg_const'] = dm._regs[:, 0] ds.sa['opt_reg_lin'] = dm._regs[:, 1] # using these precomputed regressors, we should get the same result as # before even if we do not generate a regressor for linear dm_optreg = PolyDetrendMapper(polyord=0, opt_regs=['opt_reg_const', 'opt_reg_lin']) mds_optreg = dm_optreg.forward(ds) assert_array_almost_equal(mds_optreg, np.zeros(mds.shape)) ds = Dataset(samples_forchunks) # 'constant' detrending removes the mean mds = PolyDetrendMapper(polyord=0).forward(ds) assert_array_almost_equal( mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0)) # if there is no GLOBAL linear trend it should be identical to mean removal # even if trying to remove linear mds2 = PolyDetrendMapper(polyord=1).forward(ds) assert_array_almost_equal(mds, mds2) # chunk-wise detrending ds = dataset_wizard(samples_forchunks, chunks=chunks) dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='police') mds = dm.forward(ds) # features are chunkswise linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials, which is the identical linspace in both # chunks assert_array_equal(mds.sa.police, list(range(3)) * 2) # non-matching number of samples cannot be mapped assert_raises(ValueError, dm.forward, ds[:-1]) # however, if the dataset knows about the space it is possible ds.sa['police'] = mds.sa.police # XXX this should be #mds2 = dm(ds[1:-1]) #assert_array_equal(mds[1:-1], mds2) # XXX but right now is assert_raises(NotImplementedError, dm.forward, ds[1:-1]) # Detrend must preserve the size of dataset assert_equal(mds.shape, ds.shape) # small additional test for break points # although they are no longer there ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T, targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1).forward(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # test of different polyord on each chunk target_mixed = np.array([[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]], ndmin=2).T ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0, 1]).forward(ds) assert_array_almost_equal(mds, target_mixed) # test irregluar spacing of samples, but with corrective time info samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2).T ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:, 0]}) # linear detrending that makes use of temporal info from dataset dm = PolyDetrendMapper(polyord=1, space='time') mds = dm.forward(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # and now the same stuff, but with chunking and ordered by time samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2).T chunks = [0, 1, 0, 1, 0, 1] time = [4, 4, 12, 8, 8, 12] ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time}) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='time').forward(ds) # the whole thing must not affect the source data assert_array_equal(ds, samples_forchunks) # but if done inplace that is no longer true poly_detrend(ds, chunks_attr='chunks', polyord=1, space='time') assert_array_equal(ds, mds)
def run(args): if args.chunks is not None: # apply global "chunks" setting for cattr in ("detrend_chunks", "zscore_chunks"): if getattr(args, cattr) is None: # only overwrite if individual option is not given args.__setattr__(cattr, args.chunks) ds = arg2ds(args.data) if args.poly_detrend is not None: if args.detrend_chunks is not None and not args.detrend_chunks in ds.sa: raise ValueError("--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks) from mvpa2.mappers.detrend import poly_detrend verbose(1, "Detrend") poly_detrend( ds, polyord=args.poly_detrend, chunks_attr=args.detrend_chunks, opt_regs=args.detrend_regrs, space=args.detrend_coords, ) if args.filter_passband is not None: from mvpa2.mappers.filters import iir_filter from scipy.signal import butter, buttord if args.sampling_rate is None or args.filter_stopband is None: raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate") # determine filter type nyquist = args.sampling_rate / 2.0 if len(args.filter_passband) > 1: btype = "bandpass" if not len(args.filter_passband) == len(args.filter_stopband): raise ValueError("passband and stopband specifications have to " "match in size") wp = [v / nyquist for v in args.filter_passband] ws = [v / nyquist for v in args.filter_stopband] elif args.filter_passband[0] < args.filter_stopband[0]: btype = "lowpass" wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist elif args.filter_passband[0] > args.filter_stopband[0]: btype = "highpass" wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist else: raise ValueError("invalid specification of Butterworth filter") # create filter verbose(1, "Spectral filtering (%s)" % (btype,)) try: ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False) b, a = butter(ord, wn, btype=btype) except OverflowError: raise ValueError("cannot contruct Butterworth filter for the given " "specification") ds = iir_filter(ds, b, a) if args.zscore: from mvpa2.mappers.zscore import zscore verbose(1, "Z-score") zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params) verbose(3, "Dataset summary %s" % (ds.summary())) # invariants? if args.strip_invariant_features is not None: from mvpa2.datasets.miscfx import remove_invariant_features ds = remove_invariant_features(ds) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
def detrend(ds): poly_detrend(ds, polyord=1, chunks_attr="chunks") zscore(ds, chunks_attr="chunks", dtype="float32") return ds