Example #1
0
def test_array_collectable_unique(a):
    c = ArrayCollectable(a)
    a_flat = np.asanyarray(a).ravel()
    # Since nan != nan, we better compare based on string
    # representation here
    # And sort since order of those is not guaranteed (failed test
    # on squeeze)
    def repr_(x):
        return repr(np.sort(set(x)))

    assert_equal(repr_(a_flat), repr_(c.unique))
    # even if we request it 2nd time ;)
    assert_equal(repr_(a_flat), repr_(c.unique))
    assert_equal(len(a_flat), len(c.unique))

    c2 = ArrayCollectable(list(a_flat) + [float('nan')])
    # and since nan != nan, we should get new element
    assert_equal(len(c2.unique), len(c.unique) + 1)
Example #2
0
def test_array_collectable():
    c = ArrayCollectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)

    # late assignment
    c.name = 'somename'
    assert_raises(ValueError, c._set, 12345)
    assert_equal(c.value, None)
    c.value = np.arange(5)
    assert_equal(c.name, 'somename')
    assert_array_equal(c.value, np.arange(5))

    # immediate content
    data = np.random.random(size=(3,10))
    c = ArrayCollectable(data.copy(), 'myname',
                         "This is a test", length=3)
    assert_equal(c.name, 'myname')
    assert_array_equal(c.value, data)
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    from numpy import array
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_array_almost_equal(e.value, data)
    assert_equal(e.__doc__, "This is a test")

    # cannot assign array of wrong length
    assert_raises(ValueError, c._set, np.arange(5))
    assert_equal(len(c), 3)

    # shallow copy DOES create a view of value array
    c.value = np.arange(3)
    d = copy.copy(c)
    assert_true(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Example #3
0
def from_niml(dset, fa_labels=[], sa_labels=[], a_labels=[]):
    '''Convert a NIML dataset to a Dataset

    Parameters
    ----------
    dset: dict
        Dictionary with NIML key-value pairs, such as obtained from
        mvpa2.support.nibabel.afni_niml_dset.read()
    fa_labels: list
        Keys in dset that are enforced to be feature attributes
    sa_labels: list
        Keys in dset that are enforced to be sample attributes
    a_labels: list
        Keys in dset that are enforced to be dataset attributes

    Returns
    -------
    dataset: mvpa2.base.Dataset
        a PyMVPA Dataset
    '''

    # check for singleton element
    if type(dset) is list and len(dset) == 1:
        # recursive call
        return from_niml(dset[0])

    if not type(dset) is dict:
        raise ValueError("Expected a dict")

    if not 'data' in dset:
        raise ValueError("dset with no data?")

    data = dset['data']
    if len(data.shape) == 1:
        nfeatures = data.shape[0]
        nsamples = 1
    else:
        nfeatures, nsamples = data.shape

    # some labels have predefined destinations
    sa_labels_ = ['labels', 'stats', 'chunks', 'targets'] + sa_labels
    fa_labels_ = ['node_indices', 'center_ids'] + fa_labels
    a_labels_ = ['history'] + a_labels
    ignore_labels = ('data', 'dset_type')

    sa = SampleAttributesCollection(length=nsamples)
    fa = FeatureAttributesCollection(length=nfeatures)
    a = DatasetAttributesCollection()

    labels_collections = [(sa_labels_, sa),
                          (fa_labels_, fa),
                          (a_labels_, a)]

    infix2collection = {'sa': sa,
                        'fa': fa,
                        'a': a}

    infix2length = {'sa': nsamples, 'fa': nfeatures}

    for k, v in dset.iteritems():
        if k in ignore_labels:
            continue

        if k.startswith(_PYMVPA_PREFIX + _PYMVPA_SEP):
            # special PYVMPA field - do the proper conversion
            k_split = k.split(_PYMVPA_SEP)
            if len(k_split) > 2:
                infix = k_split[1].lower()
                collection = infix2collection.get(infix, None)
                if not collection is None:
                    short_k = _PYMVPA_SEP.join(k_split[2:])
                    expected_length = infix2length.get(infix, None)
                    if expected_length:
                        if isinstance(v, np.ndarray) and np.dtype == np.str_:
                            v = str(v)

                        while isinstance(v, basestring):
                            # strings are seperated by ';'
                            # XXX what if this is part of the value
                            # intended by the user?
                            v = v.split(';')

                        if expected_length != len(v):
                            raise ValueError("Unexpected length: %d != %d" %
                                             (expected_length, len(v)))

                        v = ArrayCollectable(v, length=expected_length)

                    collection[short_k] = v
                    continue

        found_label = False

        for label, collection in labels_collections:
            if k in label:
                collection[k] = v
                found_label = True
                break

        if found_label:
            continue

        # try to be smart and deduce this from dimensions.
        # this only works if nfeatures!=nsamples otherwise it would be
        # ambiguous
        # XXX is this ugly?
        if nfeatures != nsamples:
            try:
                n = len(v)
                if n == nfeatures:
                    fa[k] = v
                    continue
                elif n == nsamples:
                    sa[k] = v
                    continue
            except:
                pass

        # don't know what this is - make it a general attribute
        a[k] = v

    ds = Dataset(np.transpose(data), sa=sa, fa=fa, a=a)

    return ds