def __init__(self, samples, sa=None, fa=None, a=None): """ A Dataset might have an arbitrary number of attributes for samples, features, or the dataset as a whole. However, only the data samples themselves are required. Parameters ---------- samples : ndarray Data samples. This has to be a two-dimensional (samples x features) array. If the samples are not in that format, please consider one of the `AttrDataset.from_*` classmethods. sa : SampleAttributesCollection Samples attributes collection. fa : FeatureAttributesCollection Features attributes collection. a : DatasetAttributesCollection Dataset attributes collection. """ # conversions if isinstance(samples, list): samples = np.array(samples) # Check all conditions we need to have for `samples` dtypes if not hasattr(samples, 'dtype'): raise ValueError( "AttrDataset only supports dtypes as samples that have a " "`dtype` attribute that behaves similar to the one of an " "array-like.") if not hasattr(samples, 'shape'): raise ValueError( "AttrDataset only supports dtypes as samples that have a " "`shape` attribute that behaves similar to the one of an " "array-like.") if not len(samples.shape): raise ValueError("Only `samples` with at least one axis are " "supported (got: %i)" % len(samples.shape)) # handling of 1D-samples # i.e. 1D is treated as multiple samples with a single feature if len(samples.shape) == 1: samples = np.atleast_2d(samples).T # that's all -- accepted self.samples = samples # Everything in a dataset (except for samples) is organized in # collections # Number of samples is .shape[0] for sparse matrix support self.sa = SampleAttributesCollection(length=len(self)) if not sa is None: self.sa.update(sa) self.fa = FeatureAttributesCollection(length=self.nfeatures) if not fa is None: self.fa.update(fa) self.a = DatasetAttributesCollection() if not a is None: self.a.update(a)
def gifti_dataset(samples, targets=None, chunks=None): """ Parameters ---------- samples : str or GiftiImage GIFTI surface-based data, specified either as a filename or an image. targets : scalar or sequence Label attribute for each volume in the timeseries. chunks : scalar or sequence Chunk attribute for each volume in the timeseries. """ node_indices = None data_vectors = [] intents = [] image = _get_gifti_image(samples) for darray in image.darrays: intent_string = _gifti_intent_niistring(darray.intent) if _gifti_intent_is_data(intent_string): data_vectors.append(darray.data) intents.append(intent_string) elif _gifti_intent_is_node_indices(intent_string): node_indices = darray.data samples = np.asarray(data_vectors) nsamples, nfeatures = samples.shape # set sample attributes sa = SampleAttributesCollection(length=nsamples) sa['intents'] = intents if targets is not None: sa['targets'] = targets if chunks is not None: sa['chunks'] = chunks # set feature attributes fa = FeatureAttributesCollection(length=nfeatures) if node_indices is not None: fa['node_indices'] = node_indices return Dataset(samples=samples, sa=sa, fa=fa)
def _edit_attr(self, ds, shape): attr = dict() for key in ds.sa.keys(): attr[key] = [] for v in ds.sa[key].value: attr[key] += [v for _ in range(shape[1])] attr['roi_labels'] = [] for _ in range(shape[0] / shape[1]): for i in range(shape[1]): attr['roi_labels'] += ["roi_%02d" % (i + 1)] logger.debug(shape) return SampleAttributesCollection(attr)
def test_collections(): sa = SampleAttributesCollection() assert_equal(len(sa), 0) assert_raises(ValueError, sa.__setitem__, 'test', 0) l = range(5) sa['test'] = l # auto-wrapped assert_true(isinstance(sa['test'], ArrayCollectable)) assert_equal(len(sa), 1) # names which are already present in dict interface assert_raises(ValueError, sa.__setitem__, 'values', range(5)) sa_c = copy.deepcopy(sa) assert_equal(len(sa), len(sa_c)) assert_array_equal(sa.test, sa_c.test)
def edit_attr(attr, shape): factor = shape[0] / len(attr.targets) attr_ = dict() for key in attr.keys(): attr_[key] = [] for label in attr[key]: attr_[key] += [label for i in range(factor)] """ attr_['roi_labels'] = [] for j in range(len(attr.targets)): for i in range(shape[1]): attr_['roi_labels'] += ["roi_%02d" % (i+1)] """ return SampleAttributesCollection( attr_), None #attr_['roi_labels'][:shape[1]]
def from_niml(dset, fa_labels=[], sa_labels=[], a_labels=[]): '''Convert a NIML dataset to a Dataset Parameters ---------- dset: dict Dictionary with NIML key-value pairs, such as obtained from mvpa2.support.nibabel.afni_niml_dset.read() fa_labels: list Keys in dset that are enforced to be feature attributes sa_labels: list Keys in dset that are enforced to be sample attributes a_labels: list Keys in dset that are enforced to be dataset attributes Returns ------- dataset: mvpa2.base.Dataset a PyMVPA Dataset ''' # check for singleton element if type(dset) is list and len(dset) == 1: # recursive call return from_niml(dset[0]) if not type(dset) is dict: raise ValueError("Expected a dict") if not 'data' in dset: raise ValueError("dset with no data?") data = dset['data'] if len(data.shape) == 1: nfeatures = data.shape[0] nsamples = 1 else: nfeatures, nsamples = data.shape # some labels have predefined destinations sa_labels_ = ['labels', 'stats', 'chunks', 'targets'] + sa_labels fa_labels_ = ['node_indices', 'center_ids'] + fa_labels a_labels_ = ['history'] + a_labels ignore_labels = ('data', 'dset_type') sa = SampleAttributesCollection(length=nsamples) fa = FeatureAttributesCollection(length=nfeatures) a = DatasetAttributesCollection() labels_collections = [(sa_labels_, sa), (fa_labels_, fa), (a_labels_, a)] infix2collection = {'sa': sa, 'fa': fa, 'a': a} infix2length = {'sa': nsamples, 'fa': nfeatures} for k, v in dset.iteritems(): if k in ignore_labels: continue if k.startswith(_PYMVPA_PREFIX + _PYMVPA_SEP): # special PYVMPA field - do the proper conversion k_split = k.split(_PYMVPA_SEP) if len(k_split) > 2: infix = k_split[1].lower() collection = infix2collection.get(infix, None) if not collection is None: short_k = _PYMVPA_SEP.join(k_split[2:]) expected_length = infix2length.get(infix, None) if expected_length: if isinstance(v, np.ndarray) and np.dtype == np.str_: v = str(v) while isinstance(v, basestring): # strings are seperated by ';' # XXX what if this is part of the value # intended by the user? v = v.split(';') if expected_length != len(v): raise ValueError("Unexpected length: %d != %d" % (expected_length, len(v))) v = ArrayCollectable(v, length=expected_length) collection[short_k] = v continue found_label = False for label, collection in labels_collections: if k in label: collection[k] = v found_label = True break if found_label: continue # try to be smart and deduce this from dimensions. # this only works if nfeatures!=nsamples otherwise it would be # ambiguous # XXX is this ugly? if nfeatures != nsamples: try: n = len(v) if n == nfeatures: fa[k] = v continue elif n == nsamples: sa[k] = v continue except: pass # don't know what this is - make it a general attribute a[k] = v ds = Dataset(np.transpose(data), sa=sa, fa=fa, a=a) return ds
iplv = mat['iPLV'] ds_list = [] runs = [] for i in range(iplv.shape[-1]): ref = iplv[0, i] data = mat[ref][()] ds_list.append(data) run = [i+1 for _ in range(data.shape[0])] runs.append(run) ds_ = np.vstack(ds_list) sa = SampleAttributesCollection({ 'targets': np.hstack(runs), 'chunks': np.hstack(runs), 'runs': np.hstack(runs), 'subject': np.ones(ds_.shape[0]), 'file': ["Subj=1_connectivity_individualalpha.mat" for _ in range(ds_.shape[0])] }) fa = FeatureAttributesCollection({'matrix_values':np.ones(ds_.shape[1])}) a = DatasetAttributesCollection({'data_path':'/media/robbis/DATA/meg/hcp/', 'experiment':'hcp', }) ds = Dataset(ds_, sa=sa, a=a, fa=fa) mat.close() nan_mask = np.logical_not(np.isnan(ds.samples)) keep_idx = np.bool_(np.sum(nan_mask, axis=1))
def lean_errorfx(ds):#Node): #def __call__(self, ds): assert_collections_equal(ds.sa, target_sa) # since equal, we could just replace with a blank one ds.sa = SampleAttributesCollection() return ds