Example #1
0
def test_load(tmpdir):
    sim = Simulator()
    sigma = 1
    y = [0, 1]
    n_reps = 3
    output_dir = str(tmpdir)
    dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)

    # if MNI_Template["resolution"] == '2mm':
    #     shape_3d = (91, 109, 91)
    #     shape_2d = (6, 238955)
    # elif MNI_Template["resolution"] == '3mm':
    #     shape_3d = (60, 72, 60)
    #     shape_2d = (6, 71020)

    y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))), header=None, index_col=None)
    # holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))), header=None, index_col=None)

    # Test load list of 4D images
    file_list = [str(tmpdir.join('data.nii.gz')), str(tmpdir.join('data.nii.gz'))]
    dat = Brain_Data(file_list)
    dat = Brain_Data([nb.load(x) for x in file_list])

    # Test load list
    dat = Brain_Data(data=str(tmpdir.join('data.nii.gz')), Y=y)

    # Test Write
    dat.write(os.path.join(str(tmpdir.join('test_write.nii'))))
    assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii'))))
    def _run_interface(self, runtime):
        from nltools.data import Brain_Data
        import os
        in_file = self.inputs.in_file
        mask = self.inputs.mask
        low_pass = self.inputs.low_pass_cutoff
        high_pass = self.inputs.high_pass_cutoff
        TR = self.inputs.sampling_rate

        if low_pass == 0:
            low_pass = None
        if high_pass == 0:
            high_pass = None

        dat = Brain_Data(in_file, mask=mask)
        # Handle no filtering
        if low_pass or high_pass:
            dat = dat.filter(sampling_rate=TR,
                             low_pass=low_pass,
                             high_pass=high_pass)

        # Generate output file name
        out_file = os.path.split(in_file)[-1].split(
            '.nii.gz')[0] + '_filtered.nii.gz'
        dat.write(out_file)

        self._out_file = out_file

        runtime.returncode = 0
        return runtime
def test_load(tmpdir):
    sim = Simulator()
    sigma = 1
    y = [0, 1]
    n_reps = 3
    output_dir = str(tmpdir)
    dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)

    # if MNI_Template["resolution"] == '2mm':
    #     shape_3d = (91, 109, 91)
    #     shape_2d = (6, 238955)
    # elif MNI_Template["resolution"] == '3mm':
    #     shape_3d = (60, 72, 60)
    #     shape_2d = (6, 71020)

    y = pd.read_csv(os.path.join(str(tmpdir.join("y.csv"))),
                    header=None,
                    index_col=None)
    # holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))), header=None, index_col=None)

    # Test load list of 4D images
    file_list = [
        str(tmpdir.join("data.nii.gz")),
        str(tmpdir.join("data.nii.gz"))
    ]
    dat = Brain_Data(file_list)
    dat = Brain_Data([nb.load(x) for x in file_list])

    # Test load string and path
    dat = Brain_Data(data=str(tmpdir.join("data.nii.gz")), Y=y)
    dat = Brain_Data(data=Path(tmpdir.join("data.nii.gz")), Y=y)

    # Test Write
    dat.write(os.path.join(str(tmpdir.join("test_write.nii"))))
    assert Brain_Data(os.path.join(str(tmpdir.join("test_write.nii"))))

    # Test i/o for hdf5
    dat.write(os.path.join(str(tmpdir.join("test_write.h5"))))
    b = Brain_Data(os.path.join(tmpdir.join("test_write.h5")))
    for k in ["X", "Y", "mask", "nifti_masker", "file_name", "data"]:
        if k == "data":
            assert np.allclose(b.__dict__[k], dat.__dict__[k])
        elif k in ["X", "Y"]:
            assert all(b.__dict__[k].eq(dat.__dict__[k]).values)
        elif k == "mask":
            assert np.allclose(b.__dict__[k].affine, dat.__dict__[k].affine)
            assert np.allclose(b.__dict__[k].get_fdata(),
                               dat.__dict__[k].get_fdata())
            assert b.__dict__[k].get_filename(
            ) == dat.__dict__[k].get_filename()
        elif k == "nifti_masker":
            assert np.allclose(b.__dict__[k].affine_, dat.__dict__[k].affine_)
            assert np.allclose(b.__dict__[k].mask_img.get_fdata(),
                               dat.__dict__[k].mask_img.get_fdata())
        else:
            assert b.__dict__[k] == dat.__dict__[k]
Example #4
0
    def create_data(self, levels, sigma, radius = 5, center = None, reps = 1, output_dir = None):
        """ create simulated data with integers

        Args:
            levels: vector of intensities or class labels
            sigma: amount of noise to add
            radius: vector of radius.  Will create multiple spheres if len(radius) > 1
            center: center(s) of sphere(s) of the form [px, py, pz] or [[px1, py1, pz1], ..., [pxn, pyn, pzn]]
            reps: number of data repetitions useful for trials or subjects
            output_dir: string path of directory to output data.  If None, no data will be written
            **kwargs: Additional keyword arguments to pass to the prediction algorithm

        """

        # Create reps
        nlevels = len(levels)
        y = levels
        rep_id = [1] * len(levels)
        for i in range(reps - 1):
            y = y + levels
            rep_id.extend([i+2] * nlevels)

        # Initialize Spheres with options for multiple radii and centers of the spheres (or just an int and a 3D list)
        A = self.n_spheres(radius, center)

        #for each intensity
        A_list = []
        for i in y:
            A_list.append(np.multiply(A, i))

        #generate a different gaussian noise profile for each mask
        mu = 0 #values centered around 0
        N_list = []
        for i in range(len(y)):
            N_list.append(self.normal_noise(mu, sigma))

        #add noise and signal together, then convert to nifti files
        NF_list = []
        for i in range(len(y)):
            NF_list.append(self.to_nifti(np.add(N_list[i], A_list[i]) ))
        NF_list = Brain_Data(NF_list)

        # Assign variables to object
        self.data = NF_list
        self.y = pd.DataFrame(data=y)
        self.rep_id = pd.DataFrame(data=rep_id)

        dat = self.data
        dat.Y = self.y

        # Write Data to files if requested
        if output_dir is not None and isinstance(output_dir, six.string_types):
            NF_list.write(os.path.join(output_dir,'data.nii.gz'))
            self.y.to_csv(os.path.join(output_dir, 'y.csv'), index=None,header=False)
            self.rep_id.to_csv(os.path.join(output_dir, 'rep_id.csv'), index=None,header=False)
        return dat
    def get_trialtype_pain_regressors(self,nifti_data,onset_file):
        print("importing nifti")
        #import the nifti
        if (os.path.isfile(nifti_data + "nltoolstandard.nii.gz")):
            msmrl1 = Brain_Data(
                nifti_data + "nltoolstandard.nii.gz")
        else:
            msmrl1 = Brain_Data(
                nifti_data + ".nii.gz")
            msmrl1.write(nifti_data + "nltoolstandard.nii.gz")
        #preprocess the nifti?
        print("importing onsets")
        #import the onset
        onsets = onsets_to_dm(
            onset_file,
            TR=2,
            runLength=msmrl1.shape()[0]
        )

        #process the onset files
        #
        onsets.sampling_rate=2

        onsets_convolved=onsets.convolve()

        for c in onsets_convolved.columns:
            if sum(onsets_convolved.ix[:, c]) <= 0:
                print('deleting '+ str(c))
                del onsets_convolved[c]

        onsets_convolved['linearterm']=range(1,361)
        onsets_convolved['quadraticterm']=[pow(x,2) for x in onsets_convolved['linearterm']]
        onsets_convolved['cubicterm']=[pow(x,3) for x in onsets_convolved['linearterm']]
        onsets_convolved['ones']=[1]*360
        msmrl1.X=onsets_convolved
        print("convolved onsets; regressing...")
        #regress
        regression=msmrl1.regress()
        print("Regressing; calculating similarity...")
        msm_predicted_pain = regression['beta'].similarity(self.stats['weight_map'], 'dot_product')
        onset_colnames = onsets_convolved.columns.tolist()
        msm_predicted_pain_dict={}
        for i, b in enumerate(msm_predicted_pain):
            msm_predicted_pain_dict[onset_colnames[i]] = b
        return msm_predicted_pain_dict
Example #6
0
def test_data(tmpdir):
    sim = Simulator()
    r = 10
    sigma = 1
    y = [0, 1]
    n_reps = 3
    output_dir = str(tmpdir)
    sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)

    shape_3d = (91, 109, 91)
    shape_2d = (6, 238955)
    y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))),
                    header=None,
                    index_col=None).T
    flist = glob.glob(str(tmpdir.join('centered*.nii.gz')))

    # Test load list
    dat = Brain_Data(data=flist, Y=y)

    # Test load file
    assert Brain_Data(flist[0])

    # Test to_nifti
    d = dat.to_nifti()
    assert d.shape[0:3] == shape_3d

    # Test load nibabel
    assert Brain_Data(d)

    # Test shape
    assert dat.shape() == shape_2d

    # Test Mean
    assert dat.mean().shape()[0] == shape_2d[1]

    # Test Std
    assert dat.std().shape()[0] == shape_2d[1]

    # Test add
    new = dat + dat
    assert new.shape() == shape_2d

    # Test subtract
    new = dat - dat
    assert new.shape() == shape_2d

    # Test multiply
    new = dat * dat
    assert new.shape() == shape_2d

    # Test Iterator
    x = [x for x in dat]
    assert len(x) == len(dat)
    assert len(x[0].data.shape) == 1

    # # Test T-test
    out = dat.ttest()
    assert out['t'].shape()[0] == shape_2d[1]

    # # # Test T-test - permutation method
    # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1})
    # assert out['t'].shape()[0]==shape_2d[1]

    # Test Regress
    dat.X = pd.DataFrame(
        {
            'Intercept': np.ones(len(dat.Y)),
            'X1': np.array(dat.Y).flatten()
        },
        index=None)
    out = dat.regress()
    assert out['beta'].shape() == (2, shape_2d[1])

    # Test indexing
    assert out['t'][1].shape()[0] == shape_2d[1]

    # Test threshold
    i = 1
    tt = threshold(out['t'][i], out['p'][i], .05)
    assert isinstance(tt, Brain_Data)

    # Test write
    dat.write(os.path.join(str(tmpdir.join('test_write.nii'))))
    assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii'))))

    # Test append
    assert dat.append(dat).shape()[0] == shape_2d[0] * 2

    # Test distance
    distance = dat.distance(method='euclidean')
    assert distance.shape == (shape_2d[0], shape_2d[0])

    # Test predict
    stats = dat.predict(algorithm='svm',
                        cv_dict={
                            'type': 'kfolds',
                            'n_folds': 2,
                            'n': len(dat.Y)
                        },
                        plot=False,
                        **{'kernel': "linear"})

    # Support Vector Regression, with 5 fold cross-validation with Platt Scaling
    # This will output probabilities of each class
    stats = dat.predict(algorithm='svm',
                        cv_dict=None,
                        plot=False,
                        **{
                            'kernel': 'linear',
                            'probability': True
                        })

    assert isinstance(stats['weight_map'], Brain_Data)
    # Logistic classificiation, with 5 fold stratified cross-validation.

    stats = dat.predict(algorithm='logistic',
                        cv_dict={
                            'type': 'kfolds',
                            'n_folds': 5,
                            'n': len(dat.Y)
                        },
                        plot=False)
    assert isinstance(stats['weight_map'], Brain_Data)

    # Ridge classificiation, with 5 fold between-subject cross-validation, where data for each subject is held out together.
    stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None, plot=False)
    assert isinstance(stats['weight_map'], Brain_Data)

    # Test Similarity
    r = dat.similarity(stats['weight_map'])
    assert len(r) == shape_2d[0]
    r2 = dat.similarity(stats['weight_map'].to_nifti())
    assert len(r2) == shape_2d[0]

    # Test apply_mask - might move part of this to test mask suite
    s1 = create_sphere([41, 64, 55], radius=10)
    assert isinstance(s1, nb.Nifti1Image)
    s2 = Brain_Data(s1)
    masked_dat = dat.apply_mask(s1)
    assert masked_dat.shape()[1] == np.sum(s2.data != 0)

    # Test extract_roi
    mask = create_sphere([41, 64, 55], radius=10)
    assert len(dat.extract_roi(mask)) == shape_2d[0]

    # Test r_to_z
    z = dat.r_to_z()
    assert z.shape() == dat.shape()

    # Test copy
    d_copy = dat.copy()
    assert d_copy.shape() == dat.shape()

    # Test detrend
    detrend = dat.detrend()
    assert detrend.shape() == dat.shape()
    def get_trialtype_pain_regressors(self, nifti_data, onset_file):
        print("importing nifti")
        #import the nifti
        #load the nltools prepped file if it's available.
        if (os.path.isfile(nifti_data + "nltoolstandard.nii.gz")):
            msmrl1 = Brain_Data(nifti_data + "nltoolstandard.nii.gz")
        else:  #but if it's not; no worries; just load the original one.
            msmrl1 = Brain_Data(nifti_data + ".nii.gz")
            msmrl1.write(nifti_data + "nltoolstandard.nii.gz")

        #I want to standardize globally; this will preserve the relative strengths of each time point
        #and preserve the relative activity at each voxel.
        #and let's use the mean standard deviation across all the images.
        #msmrl1.data = msmrl1.data - np.tile(msmrl1.mean().mean(),msmrl1.data.shape)
        #msmrl1.data = msmrl1.data / np.tile(np.std(msmrl1.data,axis=1).mean(),msmrl1.data.shape)
        # OR we could apply the standardization to the OUTPUT.
        #grand_mean=msmrl1.mean().mean()
        #grand_sd=np.std(msmrl1.data,axis=1).mean()

        #preprocess the nifti?
        print("importing onsets")
        #import the onset
        onsets = onsets_to_dm(onset_file, TR=2, runLength=msmrl1.shape()[0])

        #process the onset files
        #
        onsets.sampling_rate = 2

        onsets_convolved = onsets.convolve()

        #delete columns with no information in them.
        for c in onsets_convolved.columns:
            if sum(onsets_convolved.ix[:, c]) <= 0:
                print('deleting ' + str(c))
                del onsets_convolved[c]

        rowcount = onsets_convolved.__len__()
        if rowcount != 360:
            warnings.warn(
                "Just a friendly FYI: expected number of rows is 360 but this subject had "
                + str(rowcount) +
                ". Probably this subject got cut off the task half-way through."
            )
        onsets_convolved['linearterm'] = range(1, rowcount + 1)

        onsets_convolved['quadraticterm'] = [
            pow(x, 2) for x in onsets_convolved['linearterm']
        ]
        onsets_convolved['cubicterm'] = [
            pow(x, 3) for x in onsets_convolved['linearterm']
        ]
        onsets_convolved['ones'] = [1] * rowcount
        msmrl1.X = onsets_convolved
        print("convolved onsets; regressing...")
        #regress the file on each of the onsets. So then, when we compare similarity to the regression, we'll be getting the
        #regression to the each event, not to each TR.
        regression = msmrl1.regress()
        print("Regressing; calculating similarity to the pain map from " +
              self.decoder_origin + "...")
        msm_predicted_pain = regression['beta'].similarity(
            self.decoder, 'dot_product')
        msm_predicted_pain_scaled = msm_predicted_pain - msmrl1.data.std()
        onset_colnames = onsets_convolved.columns.tolist()
        msm_predicted_pain_dict = {}
        for i, b in enumerate(msm_predicted_pain_scaled):
            msm_predicted_pain_dict[onset_colnames[i]] = b
        return msm_predicted_pain_dict
Example #8
0
def test_brain_data(tmpdir):

    # Add 3mm to list to test that resolution as well
    for resolution in ['2mm']:

        MNI_Template["resolution"] = resolution

        sim = Simulator()
        r = 10
        sigma = 1
        y = [0, 1]
        n_reps = 3
        output_dir = str(tmpdir)
        dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)

        if MNI_Template["resolution"] == '2mm':
            shape_3d = (91, 109, 91)
            shape_2d = (6, 238955)
        elif MNI_Template["resolution"] == '3mm':
            shape_3d = (60, 72, 60)
            shape_2d = (6, 71020)

        y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))),header=None, index_col=None)
        holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))),header=None,index_col=None)

        # Test load list of 4D images
        file_list = [str(tmpdir.join('data.nii.gz')), str(tmpdir.join('data.nii.gz'))]
        dat = Brain_Data(file_list)
        dat = Brain_Data([nb.load(x) for x in file_list])

        # Test load list
        dat = Brain_Data(data=str(tmpdir.join('data.nii.gz')), Y=y)

        # Test concatenate
        out = Brain_Data([x for x in dat])
        assert isinstance(out, Brain_Data)
        assert len(out)==len(dat)

        # Test to_nifti
        d = dat.to_nifti()
        assert d.shape[0:3] == shape_3d

        # Test load nibabel
        assert Brain_Data(d)

        # Test shape
        assert dat.shape() == shape_2d

        # Test Mean
        assert dat.mean().shape()[0] == shape_2d[1]

        # Test Std
        assert dat.std().shape()[0] == shape_2d[1]

        # Test add
        new = dat + dat
        assert new.shape() == shape_2d

        # Test subtract
        new = dat - dat
        assert new.shape() == shape_2d

        # Test multiply
        new = dat * dat
        assert new.shape() == shape_2d

        # Test Indexing
        index = [0, 3, 1]
        assert len(dat[index]) == len(index)
        index = range(4)
        assert len(dat[index]) == len(index)
        index = dat.Y == 1

        assert len(dat[index.values.flatten()]) == index.values.sum()

        assert len(dat[index]) == index.values.sum()
        assert len(dat[:3]) == 3

        # Test Iterator
        x = [x for x in dat]
        assert len(x) == len(dat)
        assert len(x[0].data.shape) == 1

        # # Test T-test
        out = dat.ttest()
        assert out['t'].shape()[0] == shape_2d[1]

        # # # Test T-test - permutation method
        # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1})
        # assert out['t'].shape()[0]==shape_2d[1]

        # Test Regress
        dat.X = pd.DataFrame({'Intercept':np.ones(len(dat.Y)),
                            'X1':np.array(dat.Y).flatten()}, index=None)

        # Standard OLS
        out = dat.regress()

        assert type(out['beta'].data) == np.ndarray
        assert type(out['t'].data) == np.ndarray
        assert type(out['p'].data) == np.ndarray
        assert type(out['residual'].data) == np.ndarray
        assert type(out['df'].data) == np.ndarray
        assert out['beta'].shape() == (2, shape_2d[1])
        assert out['t'][1].shape()[0] == shape_2d[1]

        # Robust OLS
        out = dat.regress(mode='robust')

        assert type(out['beta'].data) == np.ndarray
        assert type(out['t'].data) == np.ndarray
        assert type(out['p'].data) == np.ndarray
        assert type(out['residual'].data) == np.ndarray
        assert type(out['df'].data) == np.ndarray
        assert out['beta'].shape() == (2, shape_2d[1])
        assert out['t'][1].shape()[0] == shape_2d[1]

        # Test threshold
        i=1
        tt = threshold(out['t'][i], out['p'][i], .05)
        assert isinstance(tt, Brain_Data)

        # Test write
        dat.write(os.path.join(str(tmpdir.join('test_write.nii'))))
        assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii'))))

        # Test append
        assert dat.append(dat).shape()[0] == shape_2d[0]*2

        # Test distance
        distance = dat.distance(method='euclidean')
        assert isinstance(distance, Adjacency)
        assert distance.square_shape()[0] == shape_2d[0]

        # Test predict
        stats = dat.predict(algorithm='svm',
                            cv_dict={'type': 'kfolds', 'n_folds': 2},
                            plot=False, **{'kernel':"linear"})

        # Support Vector Regression, with 5 fold cross-validation with Platt Scaling
        # This will output probabilities of each class
        stats = dat.predict(algorithm='svm',
                            cv_dict=None, plot=False,
                            **{'kernel':'linear', 'probability':True})
        assert isinstance(stats['weight_map'], Brain_Data)

        # Logistic classificiation, with 2 fold cross-validation.
        stats = dat.predict(algorithm='logistic',
                            cv_dict={'type': 'kfolds', 'n_folds': 2},
                            plot=False)
        assert isinstance(stats['weight_map'], Brain_Data)

        # Ridge classificiation,
        stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None, plot=False)
        assert isinstance(stats['weight_map'], Brain_Data)

        # Ridge
        stats = dat.predict(algorithm='ridge',
                            cv_dict={'type': 'kfolds', 'n_folds': 2,
                            'subject_id':holdout}, plot=False, **{'alpha':.1})

        # Lasso
        stats = dat.predict(algorithm='lasso',
                            cv_dict={'type': 'kfolds', 'n_folds': 2,
                            'stratified':dat.Y}, plot=False, **{'alpha':.1})

        # PCR
        stats = dat.predict(algorithm='pcr', cv_dict=None, plot=False)

        # Test Similarity
        r = dat.similarity(stats['weight_map'])
        assert len(r) == shape_2d[0]
        r2 = dat.similarity(stats['weight_map'].to_nifti())
        assert len(r2) == shape_2d[0]
        r = dat.similarity(stats['weight_map'], method='dot_product')
        assert len(r) == shape_2d[0]
        r = dat.similarity(stats['weight_map'], method='cosine')
        assert len(r) == shape_2d[0]
        r = dat.similarity(dat, method='correlation')
        assert r.shape == (dat.shape()[0],dat.shape()[0])
        r = dat.similarity(dat, method='dot_product')
        assert r.shape == (dat.shape()[0],dat.shape()[0])
        r = dat.similarity(dat, method='cosine')
        assert r.shape == (dat.shape()[0],dat.shape()[0])

        # Test apply_mask - might move part of this to test mask suite
        s1 = create_sphere([12, 10, -8], radius=10)
        assert isinstance(s1, nb.Nifti1Image)
        masked_dat = dat.apply_mask(s1)
        assert masked_dat.shape()[1] == np.sum(s1.get_data() != 0)

        # Test extract_roi
        mask = create_sphere([12, 10, -8], radius=10)
        assert len(dat.extract_roi(mask)) == shape_2d[0]

        # Test r_to_z
        z = dat.r_to_z()
        assert z.shape() == dat.shape()

        # Test copy
        d_copy = dat.copy()
        assert d_copy.shape() == dat.shape()

        # Test detrend
        detrend = dat.detrend()
        assert detrend.shape() == dat.shape()

        # Test standardize
        s = dat.standardize()
        assert s.shape() == dat.shape()
        assert np.isclose(np.sum(s.mean().data), 0, atol=.1)
        s = dat.standardize(method='zscore')
        assert s.shape() == dat.shape()
        assert np.isclose(np.sum(s.mean().data), 0, atol=.1)

        # Test Sum
        s = dat.sum()
        assert s.shape() == dat[1].shape()

        # Test Groupby
        s1 = create_sphere([12, 10, -8], radius=10)
        s2 = create_sphere([22, -2, -22], radius=10)
        mask = Brain_Data([s1, s2])
        d = dat.groupby(mask)
        assert isinstance(d, Groupby)

        # Test Aggregate
        mn = dat.aggregate(mask, 'mean')
        assert isinstance(mn, Brain_Data)
        assert len(mn.shape()) == 1

        # Test Threshold
        s1 = create_sphere([12, 10, -8], radius=10)
        s2 = create_sphere([22, -2, -22], radius=10)
        mask = Brain_Data(s1)*5
        mask = mask + Brain_Data(s2)

        m1 = mask.threshold(upper=.5)
        m2 = mask.threshold(upper=3)
        m3 = mask.threshold(upper='98%')
        m4 = Brain_Data(s1)*5 + Brain_Data(s2)*-.5
        m4 = mask.threshold(upper=.5,lower=-.3)
        assert np.sum(m1.data > 0) > np.sum(m2.data > 0)
        assert np.sum(m1.data > 0) == np.sum(m3.data > 0)
        assert np.sum(m4.data[(m4.data > -.3) & (m4.data <.5)]) == 0
        assert np.sum(m4.data[(m4.data < -.3) | (m4.data >.5)]) > 0

        # Test Regions
        r = mask.regions(min_region_size=10)
        m1 = Brain_Data(s1)
        m2 = r.threshold(1, binarize=True)
        # assert len(r)==2
        assert len(np.unique(r.to_nifti().get_data())) == 2
        diff = m2-m1
        assert np.sum(diff.data) == 0

        # Test Bootstrap
        masked = dat.apply_mask(create_sphere(radius=10, coordinates=[0, 0, 0]))
        n_samples = 3
        b = masked.bootstrap('mean', n_samples=n_samples)
        assert isinstance(b['Z'], Brain_Data)
        b = masked.bootstrap('std', n_samples=n_samples)
        assert isinstance(b['Z'], Brain_Data)
        b = masked.bootstrap('predict', n_samples=n_samples, plot=False)
        assert isinstance(b['Z'], Brain_Data)
        b = masked.bootstrap('predict', n_samples=n_samples,
                        plot=False, cv_dict={'type':'kfolds','n_folds':3})
        assert isinstance(b['Z'], Brain_Data)
        b = masked.bootstrap('predict', n_samples=n_samples,
                        save_weights=True, plot=False)
        assert len(b['samples'])==n_samples

        # Test decompose
        n_components = 3
        stats = dat.decompose(algorithm='pca', axis='voxels',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        stats = dat.decompose(algorithm='ica', axis='voxels',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        dat.data = dat.data + 2
        dat.data[dat.data<0] = 0
        stats = dat.decompose(algorithm='nnmf', axis='voxels',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        stats = dat.decompose(algorithm='fa', axis='voxels',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        stats = dat.decompose(algorithm='pca', axis='images',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        stats = dat.decompose(algorithm='ica', axis='images',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        dat.data = dat.data + 2
        dat.data[dat.data<0] = 0
        stats = dat.decompose(algorithm='nnmf', axis='images',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        stats = dat.decompose(algorithm='fa', axis='images',
                              n_components=n_components)
        assert n_components == len(stats['components'])
        assert stats['weights'].shape == (len(dat), n_components)

        # Test Hyperalignment Method
        sim = Simulator()
        y = [0, 1]
        n_reps = 10
        s1 = create_sphere([0, 0, 0], radius=3)
        d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1)
        d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1)
        d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1)

        # Test procrustes using align
        data = [d1, d2, d3]
        out = align(data, method='procrustes')
        assert len(data) == len(out['transformed'])
        assert len(data) == len(out['transformation_matrix'])
        assert data[0].shape() == out['common_model'].shape()
        transformed = np.dot(d1.data, out['transformation_matrix'][0])
        centered = d1.data - np.mean(d1.data, 0)
        transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0])*out['scale'][0])
        np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - transformed), decimal=5)

        # Test deterministic brain_data
        bout = d1.align(out['common_model'], method='deterministic_srm')
        assert d1.shape() == bout['transformed'].shape()
        assert d1.shape() == bout['common_model'].shape()
        assert d1.shape()[1] == bout['transformation_matrix'].shape[0]
        btransformed = np.dot(d1.data, bout['transformation_matrix'])
        np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data - btransformed))

        # Test deterministic brain_data
        bout = d1.align(out['common_model'], method='probabilistic_srm')
        assert d1.shape() == bout['transformed'].shape()
        assert d1.shape() == bout['common_model'].shape()
        assert d1.shape()[1] == bout['transformation_matrix'].shape[0]
        btransformed = np.dot(d1.data, bout['transformation_matrix'])
        np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed))

        # Test procrustes brain_data
        bout = d1.align(out['common_model'], method='procrustes')
        assert d1.shape() == bout['transformed'].shape()
        assert d1.shape() == bout['common_model'].shape()
        assert d1.shape()[1] == bout['transformation_matrix'].shape[0]
        centered = d1.data - np.mean(d1.data, 0)
        btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'])*bout['scale'])
        np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed), decimal=5)
        np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - bout['transformed'].data))

        # Test hyperalignment on Brain_Data over time (axis=1)
        sim = Simulator()
        y = [0, 1]
        n_reps = 10
        s1 = create_sphere([0, 0, 0], radius=5)
        d1 = sim.create_data(y, 1, reps=n_reps, output_dir=None).apply_mask(s1)
        d2 = sim.create_data(y, 2, reps=n_reps, output_dir=None).apply_mask(s1)
        d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1)
        data = [d1, d2, d3]

        out = align(data, method='procrustes', axis=1)
        assert len(data) == len(out['transformed'])
        assert len(data) == len(out['transformation_matrix'])
        assert data[0].shape() == out['common_model'].shape()
        centered = data[0].data.T-np.mean(data[0].data.T, 0)
        transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0])*out['scale'][0])
        np.testing.assert_almost_equal(0,np.sum(out['transformed'][0].data-transformed.T), decimal=5)

        bout = d1.align(out['common_model'], method='deterministic_srm', axis=1)
        assert d1.shape() == bout['transformed'].shape()
        assert d1.shape() == bout['common_model'].shape()
        assert d1.shape()[0] == bout['transformation_matrix'].shape[0]
        btransformed = np.dot(d1.data.T, bout['transformation_matrix'])
        np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T))

        bout = d1.align(out['common_model'], method='probabilistic_srm', axis=1)
        assert d1.shape() == bout['transformed'].shape()
        assert d1.shape() == bout['common_model'].shape()
        assert d1.shape()[0] == bout['transformation_matrix'].shape[0]
        btransformed = np.dot(d1.data.T, bout['transformation_matrix'])
        np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T))

        bout = d1.align(out['common_model'], method='procrustes', axis=1)
        assert d1.shape() == bout['transformed'].shape()
        assert d1.shape() == bout['common_model'].shape()
        assert d1.shape()[0] == bout['transformation_matrix'].shape[0]
        centered = d1.data.T-np.mean(d1.data.T, 0)
        btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'])*bout['scale'])
        np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T), decimal=5)
        np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data-bout['transformed'].data))
Example #9
0
new = new.append(data[4])

# Lists of `Brain_Data` instances can also be concatenated by recasting as a `Brain_Data` object.

# In[60]:

print(type([x for x in data[:4]]))

type(Brain_Data([x for x in data[:4]]))

# Any Brain_Data object can be written out to a nifti file.

# In[203]:

data.write('Tmp_Data.nii.gz')

# Images within a Brain_Data() instance are iterable.  Here we use a list comprehension to calculate the overall mean across all voxels within an image.

# In[61]:

[x.mean() for x in data]

# Though, we could also do this with the `mean` method by setting `axis=1`.

# In[62]:

data.mean(axis=1)

# Let's plot the mean to see how the global signal changes over time.
Example #10
0
    def import_and_convolve(self,
                            nifti_data,
                            onset_file,
                            data_mask=None,
                            motion_regressors=None):
        print("importing nifti")
        # import the nifti
        # load the nltools prepped file if it's available.
        if (os.path.isfile(nifti_data + self.data_fmri_space + ".nii.gz")):
            msmrl1 = Brain_Data(nifti_data + self.data_fmri_space + ".nii.gz",
                                mask=data_mask)
        else:  # but if it's not; no worries; just load the original one.
            msmrl1 = Brain_Data(nifti_data + ".nii.gz", mask=data_mask)
            msmrl1.write(nifti_data + self.data_fmri_space + ".nii.gz")

        # I want to standardize globally; this will preserve the relative strengths of each time point
        # and preserve the relative activity at each voxel.
        # and let's use the mean standard deviation across all the images.
        # msmrl1.data = msmrl1.data - np.tile(msmrl1.mean().mean(),msmrl1.data.shape)
        # msmrl1.data = msmrl1.data / np.tile(np.std(msmrl1.data,axis=1).mean(),msmrl1.data.shape)
        # OR we could apply the standardization to the OUTPUT.
        # grand_mean=msmrl1.mean().mean()
        # grand_sd=np.std(msmrl1.data,axis=1).mean()

        # preprocess the nifti?
        print("importing onsets")
        # import the onset
        onsets = onsets_to_dm(onset_file, TR=2, runLength=msmrl1.shape()[0])

        # process the onset files
        #
        onsets.sampling_rate = 2

        onsets_convolved = onsets.convolve()

        # delete columns with no information in them.
        for c in onsets_convolved.columns:
            if sum(onsets_convolved.ix[:, c]) <= 0:
                print('deleting ' + str(c))
                del onsets_convolved[c]

        rowcount = onsets_convolved.__len__()
        if rowcount != 360:
            warnings.warn(
                "Just a friendly FYI: expected number of rows is 360 but this subject had "
                + str(rowcount) +
                ". Probably this subject got cut off the task half-way through."
            )
        onsets_convolved['linearterm'] = range(1, rowcount + 1)

        onsets_convolved['quadraticterm'] = [
            pow(x, 2) for x in onsets_convolved['linearterm']
        ]
        onsets_convolved['cubicterm'] = [
            pow(x, 3) for x in onsets_convolved['linearterm']
        ]
        onsets_convolved['ones'] = [1] * rowcount

        if (motion_regressors is not None):
            onsets_convolved = pandas.concat(
                [onsets_convolved, motion_regressors], axis=1)

        msmrl1.X = onsets_convolved
        return msmrl1
Example #11
0
def test_brain_data(tmpdir):
    sim = Simulator()
    r = 10
    sigma = 1
    y = [0, 1]
    n_reps = 3
    output_dir = str(tmpdir)
    sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir)

    shape_3d = (91, 109, 91)
    shape_2d = (6, 238955)
    y=pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))), header=None,index_col=None).T
    holdout=pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))),header=None,index_col=None).T
    flist = glob.glob(str(tmpdir.join('centered*.nii.gz')))

    # Test load list
    dat = Brain_Data(data=flist,Y=y)

    # Test load file
    assert Brain_Data(flist[0])

    # Test to_nifti
    d = dat.to_nifti()
    assert d.shape[0:3] == shape_3d

    # Test load nibabel
    assert Brain_Data(d)

    # Test shape
    assert dat.shape() == shape_2d

    # Test Mean
    assert dat.mean().shape()[0] == shape_2d[1]

    # Test Std
    assert dat.std().shape()[0] == shape_2d[1]

    # Test add
    new = dat + dat
    assert new.shape() == shape_2d

    # Test subtract
    new = dat - dat
    assert new.shape() == shape_2d

    # Test multiply
    new = dat * dat
    assert new.shape() == shape_2d

    # Test Iterator
    x = [x for x in dat]
    assert len(x) == len(dat)
    assert len(x[0].data.shape) == 1

    # # Test T-test
    out = dat.ttest()
    assert out['t'].shape()[0] == shape_2d[1]

    # # # Test T-test - permutation method
    # out = dat.ttest(threshold_dict={'permutation':'tfce','n_permutations':50,'n_jobs':1})
    # assert out['t'].shape()[0]==shape_2d[1]

    # Test Regress
    dat.X = pd.DataFrame({'Intercept':np.ones(len(dat.Y)), 'X1':np.array(dat.Y).flatten()},index=None)
    out = dat.regress()
    assert out['beta'].shape() == (2,shape_2d[1])

    # Test indexing
    assert out['t'][1].shape()[0] == shape_2d[1]

    # Test threshold
    i=1
    tt = threshold(out['t'][i], out['p'][i], .05)
    assert isinstance(tt,Brain_Data)

    # Test write
    dat.write(os.path.join(str(tmpdir.join('test_write.nii'))))
    assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii'))))

    # Test append
    assert dat.append(dat).shape()[0]==shape_2d[0]*2

    # Test distance
    distance = dat.distance(method='euclidean')
    assert isinstance(distance,Adjacency)
    assert distance.square_shape()[0]==shape_2d[0]

    # Test predict
    stats = dat.predict(algorithm='svm', cv_dict={'type': 'kfolds','n_folds': 2}, plot=False,**{'kernel':"linear"})

    # Support Vector Regression, with 5 fold cross-validation with Platt Scaling
    # This will output probabilities of each class
    stats = dat.predict(algorithm='svm', cv_dict=None, plot=False,**{'kernel':'linear', 'probability':True})
    assert isinstance(stats['weight_map'],Brain_Data)

    # Logistic classificiation, with 2 fold cross-validation.
    stats = dat.predict(algorithm='logistic', cv_dict={'type': 'kfolds', 'n_folds': 2}, plot=False)
    assert isinstance(stats['weight_map'],Brain_Data)

    # Ridge classificiation,
    stats = dat.predict(algorithm='ridgeClassifier', cv_dict=None,plot=False)
    assert isinstance(stats['weight_map'],Brain_Data)

    # Ridge
    stats = dat.predict(algorithm='ridge', cv_dict={'type': 'kfolds', 'n_folds': 2,'subject_id':holdout}, plot=False,**{'alpha':.1})

    # Lasso
    stats = dat.predict(algorithm='lasso', cv_dict={'type': 'kfolds', 'n_folds': 2,'stratified':dat.Y}, plot=False,**{'alpha':.1})

    # PCR
    stats = dat.predict(algorithm='pcr', cv_dict=None, plot=False)

    # Test Similarity
    r = dat.similarity(stats['weight_map'])
    assert len(r) == shape_2d[0]
    r2 = dat.similarity(stats['weight_map'].to_nifti())
    assert len(r2) == shape_2d[0]

    # Test apply_mask - might move part of this to test mask suite
    s1 = create_sphere([12, 10, -8], radius=10)
    assert isinstance(s1, nb.Nifti1Image)
    s2 = Brain_Data(s1)
    masked_dat = dat.apply_mask(s1)
    assert masked_dat.shape()[1] == np.sum(s2.data != 0)

    # Test extract_roi
    mask = create_sphere([12, 10, -8], radius=10)
    assert len(dat.extract_roi(mask)) == shape_2d[0]

    # Test r_to_z
    z = dat.r_to_z()
    assert z.shape() == dat.shape()

    # Test copy
    d_copy = dat.copy()
    assert d_copy.shape() == dat.shape()

    # Test detrend
    detrend = dat.detrend()
    assert detrend.shape() == dat.shape()

    # Test standardize
    s = dat.standardize()
    assert s.shape() == dat.shape()
    assert np.isclose(np.sum(s.mean().data), 0, atol=.1)
    s = dat.standardize(method='zscore')
    assert s.shape() == dat.shape()
    assert np.isclose(np.sum(s.mean().data), 0, atol=.1)

    # Test Sum
    s = dat.sum()
    assert s.shape() == dat[1].shape()

    # Test Groupby
    s1 = create_sphere([12, 10, -8], radius=10)
    s2 = create_sphere([22, -2, -22], radius=10)
    mask = Brain_Data([s1, s2])
    d = dat.groupby(mask)
    assert isinstance(d, Groupby)

    # Test Aggregate
    mn = dat.aggregate(mask, 'mean')
    assert isinstance(mn, Brain_Data)
    assert len(mn.shape()) == 1

    # Test Threshold
    s1 = create_sphere([12, 10, -8], radius=10)
    s2 = create_sphere([22, -2, -22], radius=10)
    mask = Brain_Data(s1)*5
    mask = mask + Brain_Data(s2)

    m1 = mask.threshold(thresh=.5)
    m2 = mask.threshold(thresh=3)
    m3 = mask.threshold(thresh='98%')
    assert np.sum(m1.data > 0) > np.sum(m2.data > 0)
    assert np.sum(m1.data > 0) == np.sum(m3.data > 0)

    # Test Regions
    r = mask.regions(min_region_size=10)
    m1 = Brain_Data(s1)
    m2 = r.threshold(1, binarize=True)
    # assert len(r)==2
    assert len(np.unique(r.to_nifti().get_data())) == 2 # JC edit: I think this is what you were trying to do
    diff = m2-m1
    assert np.sum(diff.data) == 0
Example #12
0
    def load_data(self, sub, return_type="Brain_Data", write="all", force="none", verbose=True, reload=True, **processes) -> Brain_Data:
        """Load data from pipeline.root/derivatives/pipeline.name and/or applies processes from
        pipeline.processes to it.
        By default, first checks wether the processes have been applied and saved before and 
        then loads them. By default, saves all the intermediate steps

        Parameters
        ----------
        sub : str
            Name of the subject to load the process from.
        return_type : str, optional
            Type the return value. Must be one of "path", "Brain_Data". If "path" and write="none" and file does not exist,
            throws an Error, as path does not exist. By default "Brain_Data"
        write : str, optional
            Wether to save the intermediate and the last step when applying processes. Must be one of "none" (no step is saved),
            "main" (only endresult is saved) or "all" (all intermediate steps are saved). By default "all"
        force : str, optional
            Wether to apply processes even though a file of this already exists. Must be one of "none", "main", "all" (see above).
            By default "none"
        verbose : bool, optional
            Wether to be verbose, by default True
        reload : bool, optional
            Wether to reload the pipeline.layout after writing a file. Only recommended if computing multiple independend processes.
            Then, afterwards, should be reloaded by hand (call `pipeline.layout = BIDSLayout(pipeline.root)`
            , by default True

        Returns
        -------
        Brain_Data, str
            (Un)Processed data or path to where the data is stored

        Raises
        ------
        TypeError
            If wrong return_type is supplied
        FileNotFoundError
            If subject is not found
        KeyError
            If an unknown process is supplied
        """
        #  We'll use this function to print only when being verbose
        def v_print(*args, **kwargs):
            if verbose:
                print(*args, **kwargs)

        return_types = ["Brain_Data", "path"]

        if return_type not in return_types:
            raise TypeError(
                f"Returntype {return_type} not recognised. Must be in {return_types}.")

        path = join(self.root, "derivatives", self.name, f"sub-{sub}")

        if not isdir(path):
            raise FileNotFoundError(
                f"Did not find subject {sub} in directory {dir}.")

        for key in processes.keys():
            if key not in self.processes.keys():
                raise KeyError(
                    f"{key} is not known process. Known processes are {self.processes.keys()}")

        if len(processes) == 0:
            v_print(f"...loading the unprocessed file of subject {sub}")
            path = self.original_path(layout=self.layout, sub=sub)
            if return_type == "path":
                return path
            if return_type == "Brain_Data":
                return Brain_Data(path)

        # This is the most important part:
        name = "_".join([f"sub-{sub}", "_".join(
            [f"{self.processes[key].readable(args)}" for key, args in processes.items()]), "bold.nii.gz"])

        if isfile(join(path, name)):
            v_print(f"...found {name}")
            if return_type == "path":
                return join(path, name)
            data = Brain_Data(join(path, name))
        else:
            last_process, last_key = processes.popitem()
            v_print(f"...{name} does not exist yet")
            yet_to_process = self.load_data(
                sub=sub,
                return_type="Brain_Data",
                write=("all" if write == "all" else "none"),
                **processes
            )
            v_print(f"Applying process {last_process}")
            data = self.processes[last_process].process(
                self,
                sub,
                yet_to_process,
                **last_key if isinstance(last_key, dict) else last_key
            )
            if write in ["all", "main"]:
                v_print(f"...writing {name}")
                data.write(join(path, name))
                if reload:
                    self.layout = BIDSLayout(self.root, derivatives=True)

        if return_type == "Brain_Data":
            return data
        if return_type == "path":
            return join(dir, f"sub-{sub}", name)
    smoothed.X = dm
    stats = smoothed.regress()
    stats['residual'].data = np.float32(stats['residual'].data) # cast as float32 to reduce storage space
    stats['residual'].write(os.path.join(base_dir, sub, 'func', f'{sub}_denoise_smooth{fwhm}mm_task-sherlockPart1_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz'))


We also saved the cropped denoised viewing data as an hdf5 file to speed up loading times when using nltools.

data_dir = '/Volumes/Engram/Data/Sherlock/fmriprep'

for scan in ['Part1', 'Part2']:
    file_list = glob.glob(os.path.join(data_dir, '*', 'func', f'*crop*{scan}*nii.gz'))
    for f in file_list:
        data = Brain_Data(f)
        data.write(f"{f.split('.nii.gz')[0]}.hdf5")

Finally, we have also precomputed average activations within a whole brain parcellation (n=50) for some of the tutorials.

data_dir = '/Volumes/Engram/Data/Sherlock/fmriprep'

mask = Brain_Data('http://neurovault.org/media/images/2099/Neurosynth%20Parcellation_0.nii.gz')

for scan in ['Part1', 'Part2']:
    file_list = glob.glob(os.path.join(data_dir, '*', 'func', f'*crop*{scan}*hdf5'))
    for f in file_list:
        sub = os.path.basename(f).split('_')[0]
        print(sub)
        data = Brain_Data(f)
        roi = data.extract_roi(mask)
        pd.DataFrame(roi.T).to_csv(os.path.join(os.path.dirname(f), f"{sub}_{scan}_Average_ROI_n50.csv" ), index=False)