Example #1
0
def test_bin_prop_ci():
    skip_if_no_external('scipy')
    n = 100
    succ_thresh = np.random.randint(n)
    acc = 1 - (float(succ_thresh) / n)
    bl = np.random.random(n) < acc
    ds = Dataset(bl)
    m95 = BinomialProportionCI()
    m50 = BinomialProportionCI(width=0.5)
    cids = m95(ds)
    assert_equal(cids.shape, (2, 1))
    # accuracy is in the CI
    maxdist = cids.samples[1, 0] - acc
    mindist = acc - cids.samples[1, 0]
    # but allow for numerical uncertainty proportional to the sample size
    assert_true(maxdist > 0 or maxdist <= 1. / n)
    assert_true(mindist > 0 or mindist <= 1. / n)
    # more than one feature
    ds = Dataset(np.transpose([bl, np.logical_not(bl)]))
    ci95 = m95(ds)
    assert_equal(ci95.shape, (2, 2))
    # CIs should be inverse
    assert_array_almost_equal(1 - ci95.samples[0, ::-1], ci95.samples[1])
    ci50 = m50(ds)
    assert_array_almost_equal(1 - ci50.samples[0, ::-1], ci50.samples[1])
    # 50% interval is smaller than 95%
    assert_true(np.all(ci95.samples[0] < ci50.samples[0]))
    assert_true(np.all(ci95.samples[1] > ci50.samples[1]))
    assert_equal(list(ci50.sa.ci_boundary), ['lower', 'upper'])
Example #2
0
def test_stack_add_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['ok'] = data0.sa['ok'] = np.arange(5)
    data1.fa['ok'] = data1.sa['ok'] = np.arange(5)
    data0.fa['nok'] = data0.sa['nok'] = [0]
    data1.fa['nok'] = data1.sa['nok'] = np.arange(5)

    # function, collection name, the other collection name
    for xstack, colname, ocolname in ((vstack, 'fa', 'sa'),
                                      (hstack, 'sa', 'fa')):
        for add_param in None, 'update', 'drop_nonunique':

            kw = {colname: add_param} if add_param else {}
            r = xstack((data0, data1), **kw)
            COL = lambda x: getattr(x, colname)
            col = COL(r)
            ocol = getattr(r, ocolname)

            # in any scenario, the other collection should have got
            # both names and be just fine
            assert_array_equal(ocol['nok'].value, [0] * 5 + range(5))
            assert_array_equal(ocol['ok'].value, range(5) * 2)

            if add_param in ('update',):
                # will be of the last dataset
                assert_array_equal(col['nok'].value, COL(data1)['nok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
            elif add_param in (None, 'drop_nonunique'):
                assert('nok' not in col)  # must be dropped since not unique
                # both the same but let's check ;)
                assert_array_equal(col['ok'].value, COL(data0)['ok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
    def _frobenius_norm_and_merge(self, dss_connectomes, dss_response, node_indices):
        # figure out which of the two types of data are larger
        if  dss_response[0].shape[0] > dss_connectomes[0].shape[0]:
            larger = dss_response
            smaller = dss_connectomes
        else:
            larger = dss_connectomes
            smaller = dss_response
        node_ids = node_indices
        # find the normalization ratio based on which is larger 
        norm_ratios = []
        for la, sm in zip(larger, smaller):
            laN = np.linalg.norm(la, ord='fro')
            smN = np.linalg.norm(sm, ord='fro')
            v = laN / smN
            norm_ratios.append(v)

        # normalize the smaller one and then merge the datasets
        merged_dss = []
        for la, sm, norm in zip(larger, smaller, norm_ratios):
            d_sm = sm.samples * norm
            merged = np.vstack((d_sm, la.samples))
            merged = Dataset(samples=merged)
            merged.fa['node_indices'] = node_ids.copy()
            merged_dss.append(merged)
        return merged_dss
Example #4
0
 def __call__(self, dataset):
     #if self.model == 'correlation':
     #    orig_ds = copy.deepcopy(dataset)
     #    zscore(orig_ds, chunks_attr=None)
     #    ref_ts = orig_ds[:,orig_ds.fa.roi_seed].samples
     #    corrs = np.mat(ref_ts).T*np.mat(orig_ds.samples)/orig_ds.nsamples
     #    corrs[np.isnan(corrs)] = 0
     #    corrs[abs(corrs)<self.cthresh] = 0
     #    corrs = corrs/np.sum(corrs)
     #    return Dataset(np.asarray(np.mat(orig_ds.samples)*corrs.T))
     #elif self.model == 'regression':
     X = np.mat(dataset[:, dataset.fa.roi_seed != True].samples)
     y = np.mat(dataset[:, dataset.fa.roi_seed == True].samples)
     try:
         Xi = np.linalg.pinv(X, 1e-5)
         r = y.T * X * Xi * y
         r = r[0, 0]**2
     except LinAlgError:
         r = -1000
     if r >= self.cthresh:
         if self.cthresh >= 0:
             ym = (y + r * (X * Xi * y)) / (1 + r)
         else:
             ym = (0.241275 * y + 0.758725 * (X * Xi * y))
         return Dataset(np.asarray(ym))
     else:
         return Dataset(np.asarray(y))
Example #5
0
    def _call(self, dataset=None):
        """Extract weights from SMLR classifier.

        SMLR always has weights available, so nothing has to be computed here.
        """
        clf = self.clf
        # transpose to have the number of features on the second axis
        # (as usual)
        weights = clf.weights.T

        if __debug__:
            debug('SMLR',
                  "Extracting weights for %d-class SMLR" %
                  (len(weights) + 1) +
                  "Result: min=%f max=%f" %\
                  (np.min(weights), np.max(weights)))

        # limit the labels to the number of sensitivity sets, to deal
        # with the case of `fit_all_weights=False`
        ds = Dataset(weights,
                     sa={clf.get_space(): clf._ulabels[:len(weights)]})

        if clf.params.has_bias:
            ds.sa['biases'] = clf.biases
        return ds
def dense_connectivity_profile_isc(data):
    """
    Takes the data and creates a vertex-by-vertex full connectivity matrix for each subject, then performs ISC on the
    connectivity profiles.
    
    Parameters:
    ----------
    data: a n_subjects-length list of (timeseries, features) datasets from which to compute a connectivity matrix.

    Retu
    -------
    all_results: a numpy array of shape (n_subjects, n_features) of ISC values.

    """
    from mvpa2.datasets.base import Dataset
    from mvpa2.mappers.fxy import FxyMapper

    conn_metric = lambda x, y: np.dot(x.samples, y.samples) / x.nsamples
    connectivity_mapper = FxyMapper(conn_metric)
    connectomes = np.ndarray((data.shape[0], data.shape[2], data.shape[2]),
                             dtype=float)
    for i, ds in enumerate(data):
        d = Dataset(ds)
        conn_targets = Dataset(samples=ds.T)
        connectivity_mapper.train(conn_targets)
        connectomes[i] = connectivity_mapper.forward(d)
        del conn_targets, d
    results = vertex_isc(connectomes)
    return results
Example #7
0
    def _call(self, dataset=None):
        """Extract weights from SMLR classifier.

        SMLR always has weights available, so nothing has to be computed here.
        """
        clf = self.clf
        # transpose to have the number of features on the second axis
        # (as usual)
        weights = clf.weights.T

        if __debug__:
            debug('SMLR',
                  "Extracting weights for %d-class SMLR" %
                  (len(weights) + 1) +
                  "Result: min=%f max=%f" %\
                  (np.min(weights), np.max(weights)))

        # limit the labels to the number of sensitivity sets, to deal
        # with the case of `fit_all_weights=False`
        ds = Dataset(weights,
                     sa={clf.get_space(): clf._ulabels[:len(weights)]})

        if clf.params.has_bias:
            ds.sa['biases'] = clf.biases
        return ds
Example #8
0
    def _call(self, dataset):
        """Computes the average correlation in similarity structure across chunks."""

        chunks_attr = self.params.chunks_attr
        nchunks = len(dataset.sa[chunks_attr].unique)
        if nchunks < 2:
            raise StandardError("This measure calculates similarity consistency across "
                                "chunks and is not meaningful for datasets with only "
                                "one chunk:")
        dsms = []
        chunks = []
        for chunk in dataset.sa[chunks_attr].unique:
            data = np.atleast_2d(
                    dataset.samples[dataset.sa[chunks_attr].value == chunk,:])
            if self.params.center_data:
                data = data - np.mean(data,0)
            dsm = pdist(data, self.params.pairwise_metric)
            dsms.append(dsm)
            chunks.append(chunk)
        dsms = np.vstack(dsms)

        if self.params.consistency_metric=='spearman':
            dsms = np.apply_along_axis(rankdata, 1, dsms)
        corrmat = np.corrcoef(dsms)
        if self.params.square:
            ds = Dataset(corrmat, sa={self.params.chunks_attr: chunks})
        else:
            ds = Dataset(squareform(corrmat,checks=False),
                         sa=dict(pairs=list(combinations(chunks, 2))))
        return ds
Example #9
0
def test_stack_add_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['ok'] = data0.sa['ok'] = np.arange(5)
    data1.fa['ok'] = data1.sa['ok'] = np.arange(5)
    data0.fa['nok'] = data0.sa['nok'] = [0]
    data1.fa['nok'] = data1.sa['nok'] = np.arange(5)

    # function, collection name, the other collection name
    for xstack, colname, ocolname in ((vstack, 'fa', 'sa'),
                                      (hstack, 'sa', 'fa')):
        for add_param in None, 'update', 'drop_nonunique':

            kw = {colname: add_param} if add_param else {}
            r = xstack((data0, data1), **kw)
            COL = lambda x: getattr(x, colname)
            col = COL(r)
            ocol = getattr(r, ocolname)

            # in any scenario, the other collection should have got
            # both names and be just fine
            assert_array_equal(ocol['nok'].value, [0] * 5 + list(range(5)))
            assert_array_equal(ocol['ok'].value, list(range(5)) * 2)

            if add_param in ('update',):
                # will be of the last dataset
                assert_array_equal(col['nok'].value, COL(data1)['nok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
            elif add_param in (None, 'drop_nonunique'):
                assert('nok' not in col)  # must be dropped since not unique
                # both the same but let's check ;)
                assert_array_equal(col['ok'].value, COL(data0)['ok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
Example #10
0
def test_featuregroup_mapper():
    ds = Dataset(np.arange(24).reshape(3, 8))
    ds.fa['roi'] = [0, 1] * 4
    # just to check
    ds.sa['chunks'] = np.arange(3)

    # correct results
    csamples = [[3, 4], [11, 12], [19, 20]]

    m = mean_group_feature(['roi'])
    mds = m.forward(ds)
    assert_equal(mds.shape, (3, 2))
    assert_array_equal(mds.samples, csamples)
    assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4))
    # FAs should simply remain the same
    assert_array_equal(mds.sa.chunks, np.arange(3))

    # now without grouping
    m = mean_feature()
    # forwarding just the samples should yield the same result
    assert_array_equal(m.forward(ds.samples), m.forward(ds).samples)

    # And when operating on a dataset with >1D samples, then operate
    # only across "features", i.e. 1st dimension
    ds = Dataset(np.arange(24).reshape(3, 2, 2, 2))
    mapped = ds.get_mapped(m)
    assert_array_equal(m.forward(ds.samples), mapped.samples)
    assert_array_equal(mapped.samples.shape, (3, 2, 2))
    assert_array_equal(mapped.samples, np.mean(ds.samples, axis=1))
    # and still could map back? ;) not ATM, so just to ensure consistency
    assert_raises(NotImplementedError, mapped.a.mapper.reverse, mapped.samples)
    # but it should also work with standard 2d sample arrays
    ds = Dataset(np.arange(24).reshape(3, 8))
    mapped = ds.get_mapped(m)
    assert_array_equal(mapped.samples.shape, (3, 1))
Example #11
0
def test_unique_stack():
    data = Dataset(np.reshape(np.arange(24), (4, 6)),
                        sa=dict(x=[0, 1, 0, 1]),
                        fa=dict(y=[x for x in 'abccba']))

    sa_stack = stack_by_unique_sample_attribute(data, 'x')
    assert_equal(sa_stack.shape, (2, 12))
    assert_array_equal(sa_stack.fa.x, [0] * 6 + [1] * 6)
    assert_array_equal(sa_stack.fa.y, [x for x in 'abccbaabccba'])

    fa_stack = stack_by_unique_feature_attribute(data, 'y')
    assert_equal(fa_stack.shape, (12, 2))
    assert_array_equal(fa_stack.sa.x, [0, 1] * 6)
    assert_array_equal(fa_stack.sa.y, [y for y in 'aaaabbbbcccc'])
    #assert_array_equal(fa_stack.fa.y,[''])

    # check values match the fa or sa
    for i in xrange(4):
        for j in xrange(6):
            d = data[i, j]
            for k, other in enumerate((sa_stack, fa_stack)):
                msk = other.samples == d.samples
                ii, jj = np.nonzero(msk) # find matching indices in other

                o = other[ii, jj]
                coll = [o.fa, o.sa][k]

                assert_equal(coll.x, d.sa.x)
                assert_equal(coll.y, d.fa.y)

    ystacker = lambda y: lambda x: stack_by_unique_feature_attribute(x, y)
    assert_raises(KeyError, ystacker('z'), data)

    data.fa['z'] = [z for z in '123451']
    assert_raises(ValueError, ystacker('z'), data)
Example #12
0
def test_query_engine():
    data = np.arange(54)
    # indices in 3D
    ind = np.transpose((np.ones((3, 3, 3)).nonzero()))
    # sphere generator for 3 elements diameter
    sphere = ne.Sphere(1)
    # dataset with just one "space"
    ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))})
    # and the query engine attaching the generator to the "index-space"
    qe = ne.IndexQueryEngine(s_ind=sphere)
    # cannot train since the engine does not know about the second space
    assert_raises(ValueError, qe.train, ds)
    # now do it again with a full spec
    ds = Dataset([data, data],
                 fa={
                     's_ind': np.concatenate((ind, ind)),
                     't_ind': np.repeat([0, 1], 27)
                 })
    qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None)
    qe.train(ds)
    # internal representation check
    # YOH: invalid for new implementation with lookup tables (dictionaries)
    #assert_array_equal(qe._searcharray,
    #                   np.arange(54).reshape(qe._searcharray.shape) + 1)
    # should give us one corner, collapsing the 't_ind'
    assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # directly specifying an index for 't_ind' without having an ROI
    # generator, should give the same corner, but just once
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9])
    # just out of the mask -- no match
    assert_array_equal(qe(s_ind=(3, 3, 3)), [])
    # also out of the mask -- but single match
    assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53])
    # query by id
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0])
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]), qe(s_ind=(0, 0, 0)))
    # should not fail if t_ind is outside
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]),
                       qe(s_ind=(0, 0, 0)))

    # should fail if asked about some unknown thing
    assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0)

    # Test by using some literal feature atttribute
    ds.fa['lit'] = ['roi1', 'ro2', 'r3'] * 18
    # should work as well as before
    assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # should fail if asked about some unknown (yet) thing
    assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), lit='roi1')

    # Create qe which can query literals as well
    qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None)
    qe_lit.train(ds)
    # should work as well as before
    assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # and subselect nicely -- only /3 ones
    assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'),
                       [0, 3, 9, 27, 30, 36])
    assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']),
                       [0, 1, 3, 9, 27, 28, 30, 36])
Example #13
0
def test_mergeds():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['one'] = np.ones(5)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
    data1.fa['one'] = np.zeros(5)
    data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
    data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
    data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
    data4.fa['test'] = np.arange(5)

    # cannot merge if there are attributes missing in one of the datasets
    assert_raises(DatasetError, data1.append, data0)

    merged = data1.copy()
    merged.append(data2)

    ok_(merged.nfeatures == 5)
    l12 = [1] * 5 + [2] * 3
    l1 = [1] * 8
    ok_((merged.targets == l12).all())
    ok_((merged.chunks == l1).all())

    data_append = data1.copy()
    data_append.append(data2)

    ok_(data_append.nfeatures == 5)
    ok_((data_append.targets == l12).all())
    ok_((data_append.chunks == l1).all())

    #
    # appending
    #

    # we need the same samples attributes in both datasets
    assert_raises(DatasetError, data2.append, data3)

    #
    # vstacking
    #
    if __debug__:
        # tested only in __debug__
        assert_raises(ValueError, vstack, (data0, data1, data2, data3))
    datasets = (data1, data2, data4)
    merged = vstack(datasets)
    assert_equal(merged.shape,
                 (np.sum([len(ds) for ds in datasets]), data1.nfeatures))
    assert_true('test' in merged.fa)
    assert_array_equal(merged.sa.targets, [1] * 5 + [2] * 3 + [3] * 2)

    #
    # hstacking
    #
    assert_raises(ValueError, hstack, datasets)
    datasets = (data0, data1)
    merged = hstack(datasets)
    assert_equal(merged.shape,
                 (len(data1), np.sum([ds.nfeatures for ds in datasets])))
    assert_true('chunks' in merged.sa)
    assert_array_equal(merged.fa.one, [1] * 5 + [0] * 5)
Example #14
0
def test_mergeds():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['one'] = np.ones(5)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
    data1.fa['one'] = np.zeros(5)
    data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
    data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
    data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
    data4.fa['test'] = np.arange(5)

    # cannot merge if there are attributes missing in one of the datasets
    assert_raises(DatasetError, data1.append, data0)

    merged = data1.copy()
    merged.append(data2)

    ok_( merged.nfeatures == 5 )
    l12 = [1]*5 + [2]*3
    l1 = [1]*8
    ok_((merged.targets == l12).all())
    ok_((merged.chunks == l1).all())

    data_append = data1.copy()
    data_append.append(data2)

    ok_(data_append.nfeatures == 5)
    ok_((data_append.targets == l12).all())
    ok_((data_append.chunks == l1).all())

    #
    # appending
    #

    # we need the same samples attributes in both datasets
    assert_raises(DatasetError, data2.append, data3)

    #
    # vstacking
    #
    if __debug__:
        # tested only in __debug__
        assert_raises(ValueError, vstack, (data0, data1, data2, data3))
    datasets = (data1, data2, data4)
    merged = vstack(datasets)
    assert_equal(merged.shape,
                 (np.sum([len(ds) for ds in datasets]), data1.nfeatures))
    assert_true('test' in merged.fa)
    assert_array_equal(merged.sa.targets, [1]*5 + [2]*3 + [3]*2)

    #
    # hstacking
    #
    assert_raises(ValueError, hstack, datasets)
    datasets = (data0, data1)
    merged = hstack(datasets)
    assert_equal(merged.shape,
                 (len(data1), np.sum([ds.nfeatures for ds in datasets])))
    assert_true('chunks' in merged.sa)
    assert_array_equal(merged.fa.one, [1]*5 + [0]*5)
Example #15
0
def test_query_engine():
    data = np.arange(54)
    # indices in 3D
    ind = np.transpose((np.ones((3, 3, 3)).nonzero()))
    # sphere generator for 3 elements diameter
    sphere = ne.Sphere(1)
    # dataset with just one "space"
    ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))})
    # and the query engine attaching the generator to the "index-space"
    qe = ne.IndexQueryEngine(s_ind=sphere)
    # cannot train since the engine does not know about the second space
    assert_raises(ValueError, qe.train, ds)
    # now do it again with a full spec
    ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind)),
                                   't_ind': np.repeat([0,1], 27)})
    qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None)
    qe.train(ds)
    # internal representation check
    # YOH: invalid for new implementation with lookup tables (dictionaries)
    #assert_array_equal(qe._searcharray,
    #                   np.arange(54).reshape(qe._searcharray.shape) + 1)
    # should give us one corner, collapsing the 't_ind'
    assert_array_equal(qe(s_ind=(0, 0, 0)),
                       [0, 1, 3, 9, 27, 28, 30, 36])
    # directly specifying an index for 't_ind' without having an ROI
    # generator, should give the same corner, but just once
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9])
    # just out of the mask -- no match
    assert_array_equal(qe(s_ind=(3, 3, 3)), [])
    # also out of the mask -- but single match
    assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53])
    # query by id
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0])
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]),
                       qe(s_ind=(0, 0, 0)))
    # should not fail if t_ind is outside
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]),
                       qe(s_ind=(0, 0, 0)))

    # should fail if asked about some unknown thing
    assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0)

    # Test by using some literal feature atttribute
    ds.fa['lit'] =  ['roi1', 'ro2', 'r3']*18
    # should work as well as before
    assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # should fail if asked about some unknown (yet) thing
    assert_raises(ValueError, qe.__call__, s_ind=(0,0,0), lit='roi1')

    # Create qe which can query literals as well
    qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None)
    qe_lit.train(ds)
    # should work as well as before
    assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # and subselect nicely -- only /3 ones
    assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'),
                       [0, 3, 9, 27, 30, 36])
    assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']),
                       [0, 1, 3, 9, 27, 28, 30, 36])
Example #16
0
    def _call(self, dataset):
        # XXX Hm... it might make sense to unify access functions
        # naming across our swig libsvm wrapper and sg access
        # functions for svm
        clf = self.clf
        sgsvm = clf.svm
        sens_labels = None
        if isinstance(sgsvm, shogun.Classifier.MultiClassSVM):
            sens, biases = [], []
            nsvms = sgsvm.get_num_svms()
            clabels = sorted(clf._attrmap.values())
            nclabels = len(clabels)
            sens_labels = []
            isvm = 0  # index for svm among known

            for i in xrange(nclabels):
                for j in xrange(i + 1, nclabels):
                    sgsvmi = sgsvm.get_svm(isvm)
                    labels_tuple = (clabels[i], clabels[j])
                    # Since we gave the labels in incremental order,
                    # we always should be right - but it does not
                    # hurt to check if set of labels is the same
                    if __debug__ and _shogun_exposes_slavesvm_labels:
                        if not sgsvmi.get_labels():
                            # We need to call classify() so labels get assigned
                            # to the multiclass SVM
                            sgsvm.classify()
                        assert (set([
                            sgsvmi.get_label(int(x))
                            for x in sgsvmi.get_support_vectors()
                        ]) == set(labels_tuple))
                    sens1, bias = self.__sg_helper(sgsvmi)
                    sens.append(sens1)
                    biases.append(bias)
                    sens_labels += [labels_tuple[::-1]]  # ??? positive first
                    isvm += 1
            assert (len(sens) == nsvms)  # we should have  covered all
        else:
            sens1, bias = self.__sg_helper(sgsvm)
            biases = np.atleast_1d(bias)
            sens = np.atleast_2d(sens1)
            if not clf.__is_regression__:
                assert (set(clf._attrmap.values()) == set([-1.0, 1.0]))
                assert (sens.shape[0] == 1)
                sens_labels = [(-1.0, 1.0)]

        ds = Dataset(np.atleast_2d(sens))
        if sens_labels is not None:
            if isinstance(sens_labels[0], tuple):
                # Need to have them in array of dtype object
                sens_labels = asobjarray(sens_labels)

            if len(clf._attrmap):
                sens_labels = clf._attrmap.to_literal(sens_labels,
                                                      recurse=True)
            ds.sa[clf.get_space()] = sens_labels
        ds.sa['biases'] = biases

        return ds
Example #17
0
def test_samples_shape():
    ds = Dataset.from_wizard(np.ones((10, 2, 3, 4)), targets=1, chunks=1)
    ok_(ds.samples.shape == (10, 24))

    # what happens to 1D samples
    ds = Dataset(np.arange(5))
    assert_equal(ds.shape, (5, 1))
    assert_equal(ds.nfeatures, 1)
Example #18
0
    def _call(self, dataset):
        # This code is based on SciPy's stats.f_oneway()
        # Copyright (c) Gary Strangman.  All rights reserved
        # License: BSD
        #
        # However, it got tweaked and optimized to better fit into PyMVPA.

        # number of groups
        targets_sa = dataset.sa[self.get_space()]
        labels = targets_sa.value
        ul = targets_sa.unique

        na = len(ul)
        bign = float(dataset.nsamples)
        alldata = dataset.samples

        # total squares of sums
        sostot = np.sum(alldata, axis=0)
        sostot *= sostot
        sostot /= bign

        # total sum of squares
        sstot = np.sum(alldata * alldata, axis=0) - sostot

        # between group sum of squares
        ssbn = 0
        for l in ul:
            # all samples for the respective label
            d = alldata[labels == l]
            sos = np.sum(d, axis=0)
            sos *= sos
            ssbn += sos / float(len(d))

        ssbn -= sostot
        # within
        sswn = sstot - ssbn

        # degrees of freedom
        dfbn = na - 1
        dfwn = bign - na

        # mean sums of squares
        msb = ssbn / float(dfbn)
        msw = sswn / float(dfwn)
        f = msb / msw
        # assure no NaNs -- otherwise it leads instead of
        # sane unittest failure (check of NaNs) to crazy
        #   File "mtrand.pyx", line 1661, in mtrand.shuffle
        #  TypeError: object of type 'numpy.int64' has no len()
        # without any sane backtrace
        f[np.isnan(f)] = 0

        if externals.exists('scipy'):
            from scipy.stats import fprob
            return Dataset(f[np.newaxis], fa={'fprob': fprob(dfbn, dfwn, f)})
        else:
            return Dataset(f[np.newaxis])
Example #19
0
    def _call(self, dataset):
        # XXX Hm... it might make sense to unify access functions
        # naming across our swig libsvm wrapper and sg access
        # functions for svm
        clf = self.clf
        sgsvm = clf.svm
        sens_labels = None
        if isinstance(sgsvm, shogun.Classifier.MultiClassSVM):
            sens, biases = [], []
            nsvms = sgsvm.get_num_svms()
            clabels = sorted(clf._attrmap.values())
            nclabels = len(clabels)
            sens_labels = []
            isvm = 0                    # index for svm among known

            for i in xrange(nclabels):
                for j in xrange(i+1, nclabels):
                    sgsvmi = sgsvm.get_svm(isvm)
                    labels_tuple = (clabels[i], clabels[j])
                    # Since we gave the labels in incremental order,
                    # we always should be right - but it does not
                    # hurt to check if set of labels is the same
                    if __debug__ and _shogun_exposes_slavesvm_labels:
                        if not sgsvmi.get_labels():
                            # We need to call classify() so labels get assigned
                            # to the multiclass SVM
                            sgsvm.classify()
                        assert(set([sgsvmi.get_label(int(x))
                                    for x in sgsvmi.get_support_vectors()])
                               == set(labels_tuple))
                    sens1, bias = self.__sg_helper(sgsvmi)
                    sens.append(sens1)
                    biases.append(bias)
                    sens_labels += [labels_tuple[::-1]] # ??? positive first
                    isvm += 1
            assert(len(sens) == nsvms)  # we should have  covered all
        else:
            sens1, bias = self.__sg_helper(sgsvm)
            biases = np.atleast_1d(bias)
            sens = np.atleast_2d(sens1)
            if not clf.__is_regression__:
                assert(set(clf._attrmap.values()) == set([-1.0, 1.0]))
                assert(sens.shape[0] == 1)
                sens_labels = [(-1.0, 1.0)]

        ds = Dataset(np.atleast_2d(sens))
        if sens_labels is not None:
            if isinstance(sens_labels[0], tuple):
                # Need to have them in array of dtype object
                sens_labels = asobjarray(sens_labels)

            if len(clf._attrmap):
                sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True)
            ds.sa[clf.get_space()] = sens_labels
        ds.sa['biases'] = biases

        return ds
Example #20
0
def test_ex_from_masked():
    ds = Dataset.from_wizard(samples=np.atleast_2d(np.arange(5)).view(myarray),
                             targets=1,
                             chunks=1)
    # simple sequence has to be a single pattern
    assert_equal(ds.nsamples, 1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    # check correct pattern layout (1x5)
    assert_array_equal(ds.samples, [[0, 1, 2, 3, 4]])

    # check for single label and origin
    assert_array_equal(ds.targets, [1])
    assert_array_equal(ds.chunks, [1])

    # now try adding pattern with wrong shape
    assert_raises(
        ValueError, vstack,
        (ds, Dataset.from_wizard(np.ones((2, 3)), targets=1, chunks=1)))

    # now add two real patterns
    ds = vstack((ds,
                 Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                     targets=2,
                                     chunks=2)))
    assert_equal(ds.nsamples, 3)
    assert_array_equal(ds.targets, [1, 2, 2])
    assert_array_equal(ds.chunks, [1, 2, 2])

    # test unique class labels
    ds = vstack((ds,
                 Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                     targets=3,
                                     chunks=5)))
    assert_array_equal(ds.sa['targets'].unique, [1, 2, 3])

    # test wrong attributes length
    assert_raises(ValueError,
                  Dataset.from_wizard,
                  np.random.standard_normal((4, 2, 3, 4)),
                  targets=[1, 2, 3],
                  chunks=2)
    assert_raises(ValueError,
                  Dataset.from_wizard,
                  np.random.standard_normal((4, 2, 3, 4)),
                  targets=[1, 2, 3, 4],
                  chunks=[2, 2, 2])

    # no test one that is using from_masked
    ds = datasets['3dlarge']
    for a in ds.sa:
        assert_equal(len(ds.sa[a].value), len(ds))
    for a in ds.fa:
        assert_equal(len(ds.fa[a].value), ds.nfeatures)
Example #21
0
def create_mvpa_dataset(aXData1, aXData2, chunks, labels):
    feat_list = []
    for x1, x2, chunk in zip(aXData1, aXData2, chunks):
        feat_list.append([x1, x2])

    data = Dataset(samples=feat_list)
    data.sa['id'] = range(0,len(labels))
    data.sa['chunks'] = chunks
    data.sa['targets'] = labels

    return data
Example #22
0
def create_mvpa_dataset(aXData1, aXData2, aXData3, aXData4, chunks, labels):
    feat_list = []
    for x1, x2, x3, x4, chunk in zip(aXData1, aXData2, aXData3, aXData4, chunks):
        feat_list.append([x1, x2, x3, x4])

    data = Dataset(samples=feat_list)
    data.sa['id'] = range(0, len(labels))
    data.sa['chunks'] = chunks
    data.sa['targets'] = labels

    return data
Example #23
0
def _get_test_dataset(include_nodes=True):
    # returns test dataset matching the contents of _get_test_sample_node_data
    samples, nodes, _ = _get_test_sample_node_data()
    ds = Dataset(np.asarray(samples))

    if include_nodes:
        ds.fa['node_indices'] = np.asarray(nodes)

    nsamples = ds.nsamples
    ds.sa['intents'] = ['NIFTI_INTENT_NONE'] * nsamples

    return ds
Example #24
0
 def _call(self,dataset):
     data = dataset.samples
     if self.params.center_data:
         data = data - np.mean(data,0)
     dsm = pdist(data,self.params.pairwise_metric)
     if self.params.comparison_metric=='spearman':
         dsm = rankdata(dsm)
     rho, p = pearsonr(dsm,self.target_dsm)
     if self.params.corrcoef_only:
         return Dataset([rho], fa={'metrics': ['rho']})
     else:
         return Dataset([[rho,p]], fa={'metrics': ['rho', 'p']})
Example #25
0
def _get_test_dataset(include_nodes=True):
    # returns test dataset matching the contents of _get_test_sample_node_data
    samples, nodes, _ = _get_test_sample_node_data()
    ds = Dataset(np.asarray(samples))

    if include_nodes:
        ds.fa['node_indices'] = np.asarray(nodes)

    nsamples = ds.nsamples
    ds.sa['intents'] = ['NIFTI_INTENT_NONE'] * nsamples

    return ds
 def _prep_h2a_data(self, response_data, node_indices):
     for d in response_data:
         if isinstance(d, np.ndarray):
             d = Dataset(d)
         d.fa['node_indices']= node_indices.copy()
     
     connectivity_data = self._get_connectomes(response_data)
     h2a_input_data = self._frobenius_norm_and_merge(connectivity_data, response_data, node_indices)
     for d in h2a_input_data:
         d.fa['node_indices'] = node_indices.copy()
         zscore(d, chunks_attr=None)
     return h2a_input_data
Example #27
0
def test_stack_add_dataset_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.a['one'] = np.ones(2)
    data0.a['two'] = 2
    data0.a['three'] = 'three'
    data0.a['common'] = range(10)
    data0.a['array'] = np.arange(10)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1.a['one'] = np.ones(3)
    data1.a['two'] = 3
    data1.a['four'] = 'four'
    data1.a['common'] = range(10)
    data1.a['array'] = np.arange(10)


    vstacker = lambda x: vstack((data0, data1), a=x)
    hstacker = lambda x: hstack((data0, data1), a=x)

    add_params = (1, None, 'unique', 'uniques', 'all', 'drop_nonunique')

    for stacker in (vstacker, hstacker):
        for add_param in add_params:
            if add_param == 'unique':
                assert_raises(DatasetError, stacker, add_param)
                continue

            r = stacker(add_param)

            if add_param == 1:
                assert_array_equal(data1.a.one, r.a.one)
                assert_equal(r.a.two, 3)
                assert_equal(r.a.four, 'four')
                assert_true('three' not in r.a.keys())
                assert_true('array' in r.a.keys())
            elif add_param == 'uniques':
                assert_equal(set(r.a.keys()),
                             set(['one', 'two', 'three',
                                  'four', 'common', 'array']))
                assert_equal(r.a.two, (2, 3))
                assert_equal(r.a.four, ('four',))
            elif add_param == 'all':
                assert_equal(set(r.a.keys()),
                             set(['one', 'two', 'three',
                                  'four', 'common', 'array']))
                assert_equal(r.a.two, (2, 3))
                assert_equal(r.a.three, ('three', None))
            elif add_param == 'drop_nonunique':
                assert_equal(set(r.a.keys()),
                             set(['common', 'three', 'four', 'array']))
                assert_equal(r.a.three, 'three')
                assert_equal(r.a.four, 'four')
                assert_equal(r.a.common, range(10))
                assert_array_equal(r.a.array, np.arange(10))
Example #28
0
def test_cosmo_do_not_store_unsupported_datatype():
    ds = Dataset(np.zeros((0, 0)))

    class ArbitraryClass(object):
        pass

    ds.a['unused'] = ArbitraryClass()
    c = cosmo.map2cosmo(ds)
    assert_false('a' in c.keys())

    ds.a['foo'] = np.zeros((1,))
    c = cosmo.map2cosmo(ds)
    assert_true('a' in c.keys())
Example #29
0
 def _call(self,dataset):
     data = dataset.samples
     if self.center_data:
         data = data - np.mean(data,0)
     dsm = pdist(data,self.pairwise_metric)
     if self.comparison_metric=='spearman':
         dsm = rankdata(dsm)
     if self.partial_dsm == None:
         rho, p = pearsonr(dsm,self.target_dsm)
         return Dataset(np.array([rho]))
     elif self.partial_dsm != None:
         rp = pcf3(dsm,self.target_dsm,self.partial_dsm)
         return Dataset(np.array([rp['rxy_z']]))
Example #30
0
 def _call(self, dataset):
     data = dataset.samples
     if self.center_data:
         data = data - np.mean(data, 0)
     dsm = pdist(data, self.pairwise_metric)
     if self.comparison_metric == 'spearman':
         dsm = rankdata(dsm)
     rho, p = pearsonr(dsm, self.target_dsm)
     if self.corrcoef_only:
         return Dataset(np.array([
             rho,
         ]))
     else:
         return Dataset(np.array([rho, p]))
Example #31
0
def test_assign_sa():
    # https://github.com/PyMVPA/PyMVPA/issues/149
    ds = Dataset(np.arange(6).reshape((2,-1)), sa=dict(targets=range(2)))
    ds.sa['task'] = ds.sa['targets']
    # so it should be a new collectable now
    assert_equal(ds.sa['task'].name, 'task')
    assert_equal(ds.sa['targets'].name, 'targets') # this lead to issue reported in 149
    assert('task' in ds.sa.keys())
    assert('targets' in ds.sa.keys())
    ds1 = ds[:, 1]
    assert('task' in ds1.sa.keys())
    assert('targets' in ds1.sa.keys()) # issue reported in 149
    assert_equal(ds1.sa['task'].name, 'task')
    assert_equal(ds1.sa['targets'].name,'targets')
Example #32
0
def test_assign_sa():
    # https://github.com/PyMVPA/PyMVPA/issues/149
    ds = Dataset(np.arange(6).reshape((2,-1)), sa=dict(targets=range(2)))
    ds.sa['task'] = ds.sa['targets']
    # so it should be a new collectable now
    assert_equal(ds.sa['task'].name, 'task')
    assert_equal(ds.sa['targets'].name, 'targets') # this lead to issue reported in 149
    assert('task' in ds.sa.keys())
    assert('targets' in ds.sa.keys())
    ds1 = ds[:, 1]
    assert('task' in ds1.sa.keys())
    assert('targets' in ds1.sa.keys()) # issue reported in 149
    assert_equal(ds1.sa['task'].name, 'task')
    assert_equal(ds1.sa['targets'].name,'targets')
Example #33
0
def test_labelpermutation_randomsampling():
    ds = Dataset.from_wizard(np.ones((5, 10)),     targets=range(5), chunks=1)
    for i in xrange(1, 5):
        ds.append(Dataset.from_wizard(np.ones((5, 10)) + i,
                                      targets=range(5), chunks=i+1))
    # assign some feature attributes
    ds.fa['roi'] = np.repeat(np.arange(5), 2)
    ds.fa['lucky'] = np.arange(10)%2
    # use subclass for testing if it would survive
    ds.samples = ds.samples.view(myarray)

    ok_(ds.get_nsamples_per_attr('targets') == {0:5, 1:5, 2:5, 3:5, 4:5})
    sample = ds.random_samples(2)
    ok_(sample.get_nsamples_per_attr('targets').values() == [ 2, 2, 2, 2, 2 ])
    ok_((ds.sa['chunks'].unique == range(1, 6)).all())
Example #34
0
def test_labelpermutation_randomsampling():
    ds = Dataset.from_wizard(np.ones((5, 10)),     targets=range(5), chunks=1)
    for i in xrange(1, 5):
        ds.append(Dataset.from_wizard(np.ones((5, 10)) + i,
                                      targets=range(5), chunks=i+1))
    # assign some feature attributes
    ds.fa['roi'] = np.repeat(np.arange(5), 2)
    ds.fa['lucky'] = np.arange(10)%2
    # use subclass for testing if it would survive
    ds.samples = ds.samples.view(myarray)

    ok_(ds.get_nsamples_per_attr('targets') == {0:5, 1:5, 2:5, 3:5, 4:5})
    sample = ds.random_samples(2)
    ok_(sample.get_nsamples_per_attr('targets').values() == [ 2, 2, 2, 2, 2 ])
    ok_((ds.sa['chunks'].unique == range(1, 6)).all())
Example #35
0
    def _call(self, dataset):
        # just for the beauty of it
        X = self._design

        # precompute transformation is not yet done
        if self._inv_design is None:
            self._inv_ip = (X.T * X).I
            self._inv_design = self._inv_ip * X.T

        # get parameter estimations for all features at once
        # (betas x features)
        betas = self._inv_design * dataset.samples

        # charge state
        self.ca.pe = pe = betas.T.A

        # if betas and no z-stats are desired return them right away
        if not self._voi == 'pe' or self.ca.is_enabled('zstat'):
            # compute residuals
            residuals = X * betas
            residuals -= dataset.samples

            # estimates of the parameter variance and compute zstats
            # assumption of mean(E) == 0 and equal variance
            # XXX next lines ignore off-diagonal elements and hence covariance
            # between regressors. The humble being writing these lines asks the
            # god of statistics for forgives, because it knows not what it does
            diag_ip = np.diag(self._inv_ip)
            # (features x betas)
            beta_vars = np.array([ r.var() * diag_ip for r in residuals.T ])
            # (parameter x feature)
            zstat = pe / np.sqrt(beta_vars)

            # charge state
            self.ca.zstat = zstat

        if self._voi == 'pe':
            # return as (beta x feature)
            result = Dataset(pe.T)
        elif self._voi == 'zstat':
            # return as (zstat x feature)
            result = Dataset(zstat.T)
        else:
            # we shall never get to this point
            raise ValueError, \
                  "Unknown variable of interest '%s'" % str(self._voi)
        result.sa['regressor'] = np.arange(len(result))
        return result
Example #36
0
def test_h5py_io(dsfile):
    skip_if_no_external('h5py')

    # store random dataset to file
    ds = datasets['3dlarge']
    ds.save(dsfile)

    # reload and check for identity
    ds2 = Dataset.from_hdf5(dsfile)
    assert_array_equal(ds.samples, ds2.samples)
    for attr in ds.sa:
        assert_array_equal(ds.sa[attr].value, ds2.sa[attr].value)
    for attr in ds.fa:
        assert_array_equal(ds.fa[attr].value, ds2.fa[attr].value)
    assert_true(len(ds.a.mapper), 2)

    # since we have no __equal__ do at least some comparison
    assert_equal(repr(ds.a.mapper), repr(ds2.a.mapper))

    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        #
        # INCORRECT slicing (:-1) since without any hash it results in
        # empty list -- moreover we seems of not reporting ids with #
        # any longer
        #
        #assert_equal('#'.join(repr(ds.a.mapper).split('#')[:-1]),
        #             '#'.join(repr(ds2.a.mapper).split('#')[:-1]))
        pass
Example #37
0
def test_multidim_attrs():
    samples = np.arange(24).reshape(2, 3, 4)
    # have a dataset with two samples -- mapped from 2d into 1d
    # but have 2d labels and 3d chunks -- whatever that is
    ds = Dataset.from_wizard(samples.copy(),
                             targets=samples.copy(),
                             chunks=np.random.normal(size=(2,10,4,2)))
    assert_equal(ds.nsamples, 2)
    assert_equal(ds.nfeatures, 12)
    assert_equal(ds.sa.targets.shape, (2, 3, 4))
    assert_equal(ds.sa.chunks.shape, (2, 10, 4, 2))

    # try slicing
    subds = ds[0]
    assert_equal(subds.nsamples, 1)
    assert_equal(subds.nfeatures, 12)
    assert_equal(subds.sa.targets.shape, (1, 3, 4))
    assert_equal(subds.sa.chunks.shape, (1, 10, 4, 2))

    # add multidim feature attr
    fattr = ds.mapper.forward(samples)
    assert_equal(fattr.shape, (2, 12))
    # should puke -- first axis is #samples
    assert_raises(ValueError, ds.fa.__setitem__, 'moresamples', fattr)
    # but that should be fine
    ds.fa['moresamples'] = fattr.T
    assert_equal(ds.fa.moresamples.shape, (12, 2))
Example #38
0
def test_h5py_io(dsfile):
    skip_if_no_external('h5py')

    # store random dataset to file
    ds = datasets['3dlarge']
    ds.save(dsfile)

    # reload and check for identity
    ds2 = Dataset.from_hdf5(dsfile)
    assert_array_equal(ds.samples, ds2.samples)
    for attr in ds.sa:
        assert_array_equal(ds.sa[attr].value, ds2.sa[attr].value)
    for attr in ds.fa:
        assert_array_equal(ds.fa[attr].value, ds2.fa[attr].value)
    assert_true(len(ds.a.mapper), 2)

    # since we have no __equal__ do at least some comparison
    assert_equal(repr(ds.a.mapper), repr(ds2.a.mapper))

    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        #
        # INCORRECT slicing (:-1) since without any hash it results in
        # empty list -- moreover we seems of not reporting ids with #
        # any longer
        #
        #assert_equal('#'.join(repr(ds.a.mapper).split('#')[:-1]),
        #             '#'.join(repr(ds2.a.mapper).split('#')[:-1]))
        pass
Example #39
0
def cosmo_dataset(cosmo):
    '''
    Construct Dataset from CoSMoMVPA format

    Parameters
    ----------
    cosmo: str or Dataset-like or dict
        If a str it is treated as a filename of a .mat file with a matlab
        struct used in CoSMoMVPA, i.e. a struct with fields .samples, .sa,
        .fa, and .a.
        If a dict is is treated like the result from scipy's loadmat of
        a matlab struct used in CoSMoMVPA.

    Returns
    -------
    ds : Dataset
        PyMVPA Dataset object with values in .samples, .fa., .sa and .a
        based on the input
    '''

    if isinstance(cosmo, basestring):
        # load file
        cosmo = _loadmat_internal(cosmo)

    # do some sanity checks
    _check_cosmo_dataset(cosmo)

    # store samples
    args = dict(samples=cosmo['samples'])

    # set dataset, feature and sample attributes
    args.update(_attributes_cosmo2dict(cosmo))

    # build dataset using samples, fa, sa and a arguments
    return Dataset(**args)
Example #40
0
def test_labelschunks_access():
    samples = np.arange(12).reshape((4, 3)).view(myarray)
    labels = range(4)
    chunks = [1, 1, 2, 2]
    ds = Dataset.from_wizard(samples, labels, chunks)

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    assert_array_equal(ds.targets, labels)
    assert_array_equal(ds.chunks, chunks)

    # moreover they should point to the same thing
    ok_(ds.targets is ds.sa.targets)
    ok_(ds.targets is ds.sa['targets'].value)
    ok_(ds.chunks is ds.sa.chunks)
    ok_(ds.chunks is ds.sa['chunks'].value)

    # assignment should work at all levels including 1st
    ds.targets = chunks
    assert_array_equal(ds.targets, chunks)
    ok_(ds.targets is ds.sa.targets)
    ok_(ds.targets is ds.sa['targets'].value)

    # test broadcasting
    # but not for plain scalars
    assert_raises(ValueError, ds.set_attr, 'sa.bc', 5)
    # and not for plain plain str
    assert_raises(TypeError, ds.set_attr, 'sa.bc', "mike")
    # but for any iterable of len == 1
    ds.set_attr('sa.bc', (5,))
    ds.set_attr('sa.dc', ["mike"])
    assert_array_equal(ds.sa.bc, [5] * len(ds))
    assert_array_equal(ds.sa.dc, ["mike"] * len(ds))
Example #41
0
 def get_data(self):
     data = np.random.standard_normal(( 100, 2, 2, 2 ))
     labels = np.concatenate( ( np.repeat( 0, 50 ),
                               np.repeat( 1, 50 ) ) )
     chunks = np.repeat( range(5), 10 )
     chunks = np.concatenate( (chunks, chunks) )
     return Dataset.from_wizard(samples=data, targets=labels, chunks=chunks)
Example #42
0
    def _call(self, dataset):
        """Computes featurewise f-scores using compound comparisons."""

        targets_sa = dataset.sa[self.get_space()]
        orig_labels = targets_sa.value
        labels = orig_labels.copy()

        # Lets create a very shallow copy of a dataset with just
        # samples and targets_attr
        dataset_mod = Dataset(dataset.samples, sa={self.get_space(): labels})
        results = []
        for ul in targets_sa.unique:
            labels[orig_labels == ul] = 1
            labels[orig_labels != ul] = 2
            f_ds = OneWayAnova._call(self, dataset_mod)
            if 'fprob' in f_ds.fa:
                # rename the fprob attribute to something label specific
                # to survive final aggregation stage
                f_ds.fa['fprob_' + str(ul)] = f_ds.fa.fprob
                del f_ds.fa['fprob']
            results.append(f_ds)

        results = vstack(results)
        results.sa[self.get_space()] = targets_sa.unique
        return results
Example #43
0
def test_addaxis():
    from mvpa2.mappers.shape import AddAxisMapper
    ds = Dataset(np.arange(24).reshape(2, 3, 4),
                 sa={'testsa': np.arange(2)},
                 fa={'testfa': np.arange(3)})
    ds0 = AddAxisMapper(pos=0)(ds)
    assert_array_equal(ds0.shape, (1,) + ds.shape)
    # sas have extra dimension
    assert_array_equal(ds0.sa.testsa[0], ds.sa.testsa)
    # fas are duplicated
    assert_array_equal(ds0.fa.testfa[0], ds0.fa.testfa[1])
    ds1 = AddAxisMapper(pos=1)(ds)
    assert_array_equal(ds1.shape, (2, 1, 3, 4))
    # same sample attribute
    assert_equal(ds1.sa, ds.sa)
    # fas have extra dimension
    assert_array_equal(ds1.fa.testfa[0], ds.fa.testfa)
    ds2 = AddAxisMapper(pos=2)(ds)
    assert_array_equal(ds2.shape, (2, 3, 1, 4))
    # no change to attribute collections
    assert_equal(ds2.sa, ds.sa)
    assert_equal(ds2.fa, ds.fa)
    # append an axis
    ds3 = AddAxisMapper(pos=3)(ds)
    assert_array_equal(ds3.shape, ds.shape + (1,))
    # reverse indexing
    ds_1 = AddAxisMapper(pos=-1)(ds)
    assert_array_equal(ds3.samples, ds_1.samples)
    assert_equal(ds3.sa, ds_1.sa)
    assert_equal(ds3.fa, ds_1.fa)
    # add multiple axes
    ds4 = AddAxisMapper(pos=4)(ds)
    assert_array_equal(ds4.shape, ds.shape + (1, 1))
Example #44
0
def test_h5py_io():
    skip_if_no_external('h5py')

    tempdir = tempfile.mkdtemp()

    # store random dataset to file
    ds = datasets['3dlarge']
    ds.save(os.path.join(tempdir, 'plain.hdf5'))

    # reload and check for identity
    ds2 = Dataset.from_hdf5(os.path.join(tempdir, 'plain.hdf5'))
    assert_array_equal(ds.samples, ds2.samples)
    for attr in ds.sa:
        assert_array_equal(ds.sa[attr].value, ds2.sa[attr].value)
    for attr in ds.fa:
        assert_array_equal(ds.fa[attr].value, ds2.fa[attr].value)
    assert_true(len(ds.a.mapper), 2)
    # since we have no __equal__ do at least some comparison
    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        assert_equal('#'.join(repr(ds.a.mapper).split('#')[:-1]),
                     '#'.join(repr(ds2.a.mapper).split('#')[:-1]))
    else:
        assert_equal(repr(ds.a.mapper), repr(ds2.a.mapper))


    #cleanup temp dir
    shutil.rmtree(tempdir, ignore_errors=True)
Example #45
0
def test_origmask_extraction():
    origdata = np.random.standard_normal((10, 2, 4, 3))
    data = Dataset.from_wizard(origdata, targets=2, chunks=2)

    # check with custom mask
    sel = data[:, 5]
    ok_(sel.samples.shape[1] == 1)
Example #46
0
def test_feature_masking():
    mask = np.zeros((5, 3), dtype='bool')
    mask[2, 1] = True
    mask[4, 0] = True
    data = Dataset.from_wizard(np.arange(60).reshape((4, 5, 3)),
                               targets=1, chunks=1, mask=mask)

    # check simple masking
    ok_(data.nfeatures == 2)

    # selection should be idempotent
    ok_(data[:, mask].nfeatures == data.nfeatures)
    # check that correct feature get selected
    assert_array_equal(data[:, 1].samples[:, 0], [12, 27, 42, 57])
    # XXX put back when coord -> fattr is implemented
    #ok_(tuple(data[:, 1].a.mapper.getInId(0)) == (4, 0))
    ok_(data[:, 1].a.mapper.forward1(mask).shape == (1,))

    # check sugarings
    # XXX put me back
    #self.failUnless(np.all(data.I == data.origids))
    assert_array_equal(data.C, data.chunks)
    assert_array_equal(data.UC, np.unique(data.chunks))
    assert_array_equal(data.T, data.targets)
    assert_array_equal(data.UT, np.unique(data.targets))
    assert_array_equal(data.S, data.samples)
    assert_array_equal(data.O, data.mapper.reverse(data.samples))
Example #47
0
def test_samples_shape():
    ds = Dataset.from_wizard(np.ones((10, 2, 3, 4)), targets=1, chunks=1)
    ok_(ds.samples.shape == (10, 24))

    # what happens to 1D samples
    ds = Dataset(np.arange(5))
    assert_equal(ds.shape, (5, 1))
    assert_equal(ds.nfeatures, 1)
Example #48
0
def test_mean_removal():
    test_array = np.array([[0, 0.5, 1, 1.5],
                           [2, 2.5, 3, 3.5],
                           [3, 3.5, 4, 4.5],
                           [5, 5.5, 6, 6.5],
                           [7, 7.5, 8, 8.5]])
    test_dataset = Dataset(test_array)
    desired_result = np.array([[-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75]])

    mr = MeanRemoval(in_place=False)
    mr_inplace = MeanRemoval(in_place=True)
    mr_fx = subtract_mean_feature()

    functions = (mr, mr_inplace, mr_fx)
    for function in functions:
        assert_true(np.array_equal(function(test_array.copy()),
                                   desired_result), function)

    for function in functions:
        assert_true(np.array_equal(function(test_dataset.copy()).samples,
                                   desired_result))

    random_array = np.random.rand(50, 1000)
    assert_true(np.array_equal(mr_fx(random_array.copy()),
                               mr(random_array.copy())))
    assert_true(np.array_equal(mr_fx(random_array.copy()),
                               mr_inplace(random_array.copy())))

    # corner cases
    int_arr = np.array([1, 2, 3, 4, 5])
    desired = int_arr.astype(float) - int_arr.mean()
    assert_array_equal(mr.forward1(int_arr), desired)
    # or list
    assert_array_equal(mr.forward1(list(int_arr)), desired)
    # missing value -> NaN just like mean() would do
    nan_arr = np.array([1, 2, np.nan, 4, 5])
    assert_array_equal(mr.forward1(nan_arr), [np.nan] * len(int_arr))
    # but with a masked array it works as intended, i.e. just like mean()
    nan_arr = np.ma.array(nan_arr, mask=np.isnan(nan_arr))
    nan_arr_dm = desired.copy()
    nan_arr_dm[2] = np.nan
    assert_array_equal(mr.forward1(nan_arr), nan_arr_dm)
Example #49
0
def test_ex_from_masked():
    ds = Dataset.from_wizard(samples=np.atleast_2d(np.arange(5)).view(myarray),
                             targets=1, chunks=1)
    # simple sequence has to be a single pattern
    assert_equal(ds.nsamples, 1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    # check correct pattern layout (1x5)
    assert_array_equal(ds.samples, [[0, 1, 2, 3, 4]])

    # check for single label and origin
    assert_array_equal(ds.targets, [1])
    assert_array_equal(ds.chunks, [1])

    # now try adding pattern with wrong shape
    assert_raises(DatasetError, ds.append,
                  Dataset.from_wizard(np.ones((2,3)), targets=1, chunks=1))

    # now add two real patterns
    ds.append(Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                  targets=2, chunks=2))
    assert_equal(ds.nsamples, 3)
    assert_array_equal(ds.targets, [1, 2, 2])
    assert_array_equal(ds.chunks, [1, 2, 2])

    # test unique class labels
    ds.append(Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                  targets=3, chunks=5))
    assert_array_equal(ds.sa['targets'].unique, [1, 2, 3])

    # test wrong attributes length
    assert_raises(ValueError, Dataset.from_wizard,
                  np.random.standard_normal((4,2,3,4)), targets=[1, 2, 3],
                  chunks=2)
    assert_raises(ValueError, Dataset.from_wizard,
                  np.random.standard_normal((4,2,3,4)), targets=[1, 2, 3, 4],
                  chunks=[2, 2, 2])

    # no test one that is using from_masked
    ds = datasets['3dlarge']
    for a in ds.sa:
        assert_equal(len(ds.sa[a].value), len(ds))
    for a in ds.fa:
        assert_equal(len(ds.fa[a].value), ds.nfeatures)
Example #50
0
def test_shape_conversion():
    ds = Dataset.from_wizard(np.arange(24).reshape((2, 3, 4)).view(myarray),
                             targets=1, chunks=1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    assert_equal(ds.nsamples, 2)
    assert_equal(ds.samples.shape, (2, 12))
    assert_array_equal(ds.samples, [range(12), range(12, 24)])
Example #51
0
    def _forward_dataset(self, ds):
        chunks_attr = self.__chunks_attr
        mds = Dataset([])
        mds.a = ds.a
       # mds.sa =ds.sa
       # mds.fa =ds.fa
        if chunks_attr is None:
	       # global kmeans
           mds.samples = self._kmeans(ds.samples).labels_
           print max(mds.samples)
        else:
	       # per chunk kmeans
            for c in ds.sa[chunks_attr].unique:
                slicer = np.where(ds.sa[chunks_attr].value == c)[0]
                mds.samples = ds.samples[0,:]
                mds.samples[slicer] = self._kmeans(ds.samples[slicer]).labels_

        return mds
Example #52
0
 def setUp(self):
     data = np.random.standard_normal((100, 3, 4, 2))
     labels = np.concatenate((np.repeat(0, 50), np.repeat(1, 50)))
     chunks = np.repeat(range(5), 10)
     chunks = np.concatenate((chunks, chunks))
     mask = np.ones((3, 4, 2), dtype="bool")
     mask[0, 0, 0] = 0
     mask[1, 3, 1] = 0
     self.dataset = Dataset.from_wizard(samples=data, targets=labels, chunks=chunks, mask=mask)
Example #53
0
File: rsa.py Project: PyMVPA/PyMVPA
    def _call(self, ds):
        test_ds = self._prepare_ds(ds)
        if test_ds.nsamples != self._train_ds.nsamples:
            raise ValueError('Datasets should have same sample size for dissimilarity, '\
                             'nsamples for train: %d, test: %d'%(self._train_ds.nsamples,
                                                                 test_ds.nsamples))
        # Call actual distance metric
        distds = cdist(self._train_ds.samples, test_ds.samples,
                       metric=self.params.pairwise_metric,
                       **self.params.pairwise_metric_kwargs)
        # Make target pairs
        sa_dict = dict()
        for k in self._train_ds.sa:
            if k in test_ds.sa:
                sa_dict[k] = list(product(self._train_ds.sa.get(k).value,
                                                   test_ds.sa.get(k).value))

        distds = Dataset(samples=distds.ravel()[:, None], sa=sa_dict)
        return distds
Example #54
0
def test_icamapper():
    # data: 40 sample feature line in 2d space (40x2; samples x features)
    samples = np.vstack([np.arange(40.) for i in range(2)]).T
    samples -= samples.mean()
    samples +=  np.random.normal(size=samples.shape, scale=0.1)
    ndlin = Dataset(samples)

    pm = ICAMapper()
    try:
        pm.train(ndlin.copy())
        assert_equal(pm.proj.shape, (2, 2))
        p = pm.forward(ndlin.copy())
        assert_equal(p.shape, (40, 2))
        # check that the mapped data can be fully recovered by 'reverse()'
        assert_array_almost_equal(pm.reverse(p), ndlin)
    except mdp.NodeException:
        # do not puke if the ICA did not converge at all -- that is not our
        # fault but MDP's
        pass
Example #55
0
def test_npz_io(dsfile):

    # store random dataset to file
    ds = datasets['3dlarge'].copy()

    ds.a.pop('mapper')  # can't be saved
    ds.to_npz(dsfile)

    # reload and check for identity
    ds2 = Dataset.from_npz(dsfile)
    assert_datasets_equal(ds, ds2)

    assert_array_equal(ds.samples, ds2.samples)

    # But if we try to save with mapper -- it just gets ignored (warning is
    # issued)
    datasets['3dlarge'].to_npz(dsfile)
    ds2_ = Dataset.from_npz(dsfile)
    assert_datasets_equal(ds2, ds2_)
Example #56
0
def test_pcamapper():
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    ndlin = Dataset(np.concatenate([np.arange(40)
                               for i in range(20)]).reshape(20,-1).T)

    pm = PCAMapper()
    # train PCA
    assert_raises(mdp.NodeException, pm.train, ndlin)
    ndlin.samples = ndlin.samples.astype('float')
    ndlin_noise = ndlin.copy()
    ndlin_noise.samples += np.random.random(size=ndlin.samples.shape)
    # we have no variance for more than one PCA component, hence just one
    # actual non-zero eigenvalue
    assert_raises(mdp.NodeException, pm.train, ndlin)
    pm.train(ndlin_noise)
    assert_equal(pm.proj.shape, (20, 20))
    # now project data into PCA space
    p = pm.forward(ndlin.samples)
    assert_equal(p.shape, (40, 20))
    # check that the mapped data can be fully recovered by 'reverse()'
    assert_array_almost_equal(pm.reverse(p), ndlin)