def test_array_collectable_unique(a): c = ArrayCollectable(a) a_flat = np.asanyarray(a).ravel() # Since nan != nan, we better compare based on string # representation here # And sort since order of those is not guaranteed (failed test # on squeeze) def repr_(x): return repr(np.sort(set(x))) assert_equal(repr_(a_flat), repr_(c.unique)) # even if we request it 2nd time ;) assert_equal(repr_(a_flat), repr_(c.unique)) assert_equal(len(a_flat), len(c.unique)) c2 = ArrayCollectable(list(a_flat) + [float('nan')]) # and since nan != nan, we should get new element assert_equal(len(c2.unique), len(c.unique) + 1)
def test_array_collectable(): c = ArrayCollectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) # late assignment c.name = 'somename' assert_raises(ValueError, c._set, 12345) assert_equal(c.value, None) c.value = np.arange(5) assert_equal(c.name, 'somename') assert_array_equal(c.value, np.arange(5)) # immediate content data = np.random.random(size=(3,10)) c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3) assert_equal(c.name, 'myname') assert_array_equal(c.value, data) assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr from numpy import array e = eval(repr(c)) assert_equal(e.name, 'myname') assert_array_almost_equal(e.value, data) assert_equal(e.__doc__, "This is a test") # cannot assign array of wrong length assert_raises(ValueError, c._set, np.arange(5)) assert_equal(len(c), 3) # shallow copy DOES create a view of value array c.value = np.arange(3) d = copy.copy(c) assert_true(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def from_niml(dset, fa_labels=[], sa_labels=[], a_labels=[]): '''Convert a NIML dataset to a Dataset Parameters ---------- dset: dict Dictionary with NIML key-value pairs, such as obtained from mvpa2.support.nibabel.afni_niml_dset.read() fa_labels: list Keys in dset that are enforced to be feature attributes sa_labels: list Keys in dset that are enforced to be sample attributes a_labels: list Keys in dset that are enforced to be dataset attributes Returns ------- dataset: mvpa2.base.Dataset a PyMVPA Dataset ''' # check for singleton element if type(dset) is list and len(dset) == 1: # recursive call return from_niml(dset[0]) if not type(dset) is dict: raise ValueError("Expected a dict") if not 'data' in dset: raise ValueError("dset with no data?") data = dset['data'] if len(data.shape) == 1: nfeatures = data.shape[0] nsamples = 1 else: nfeatures, nsamples = data.shape # some labels have predefined destinations sa_labels_ = ['labels', 'stats', 'chunks', 'targets'] + sa_labels fa_labels_ = ['node_indices', 'center_ids'] + fa_labels a_labels_ = ['history'] + a_labels ignore_labels = ('data', 'dset_type') sa = SampleAttributesCollection(length=nsamples) fa = FeatureAttributesCollection(length=nfeatures) a = DatasetAttributesCollection() labels_collections = [(sa_labels_, sa), (fa_labels_, fa), (a_labels_, a)] infix2collection = {'sa': sa, 'fa': fa, 'a': a} infix2length = {'sa': nsamples, 'fa': nfeatures} for k, v in dset.iteritems(): if k in ignore_labels: continue if k.startswith(_PYMVPA_PREFIX + _PYMVPA_SEP): # special PYVMPA field - do the proper conversion k_split = k.split(_PYMVPA_SEP) if len(k_split) > 2: infix = k_split[1].lower() collection = infix2collection.get(infix, None) if not collection is None: short_k = _PYMVPA_SEP.join(k_split[2:]) expected_length = infix2length.get(infix, None) if expected_length: if isinstance(v, np.ndarray) and np.dtype == np.str_: v = str(v) while isinstance(v, basestring): # strings are seperated by ';' # XXX what if this is part of the value # intended by the user? v = v.split(';') if expected_length != len(v): raise ValueError("Unexpected length: %d != %d" % (expected_length, len(v))) v = ArrayCollectable(v, length=expected_length) collection[short_k] = v continue found_label = False for label, collection in labels_collections: if k in label: collection[k] = v found_label = True break if found_label: continue # try to be smart and deduce this from dimensions. # this only works if nfeatures!=nsamples otherwise it would be # ambiguous # XXX is this ugly? if nfeatures != nsamples: try: n = len(v) if n == nfeatures: fa[k] = v continue elif n == nsamples: sa[k] = v continue except: pass # don't know what this is - make it a general attribute a[k] = v ds = Dataset(np.transpose(data), sa=sa, fa=fa, a=a) return ds