Ejemplo n.º 1
0
    def __init__(self, samples, sa=None, fa=None, a=None):
        """
        A Dataset might have an arbitrary number of attributes for samples,
        features, or the dataset as a whole. However, only the data samples
        themselves are required.

        Parameters
        ----------
        samples : ndarray
          Data samples.  This has to be a two-dimensional (samples x features)
          array. If the samples are not in that format, please consider one of
          the `AttrDataset.from_*` classmethods.
        sa : SampleAttributesCollection
          Samples attributes collection.
        fa : FeatureAttributesCollection
          Features attributes collection.
        a : DatasetAttributesCollection
          Dataset attributes collection.

        """
        # conversions
        if isinstance(samples, list):
            samples = np.array(samples)
        # Check all conditions we need to have for `samples` dtypes
        if not hasattr(samples, 'dtype'):
            raise ValueError(
                "AttrDataset only supports dtypes as samples that have a "
                "`dtype` attribute that behaves similar to the one of an "
                "array-like.")
        if not hasattr(samples, 'shape'):
            raise ValueError(
                "AttrDataset only supports dtypes as samples that have a "
                "`shape` attribute that behaves similar to the one of an "
                "array-like.")
        if not len(samples.shape):
            raise ValueError("Only `samples` with at least one axis are "
                             "supported (got: %i)" % len(samples.shape))

        # handling of 1D-samples
        # i.e. 1D is treated as multiple samples with a single feature
        if len(samples.shape) == 1:
            samples = np.atleast_2d(samples).T

        # that's all -- accepted
        self.samples = samples

        # Everything in a dataset (except for samples) is organized in
        # collections
        # Number of samples is .shape[0] for sparse matrix support
        self.sa = SampleAttributesCollection(length=len(self))
        if not sa is None:
            self.sa.update(sa)
        self.fa = FeatureAttributesCollection(length=self.nfeatures)
        if not fa is None:
            self.fa.update(fa)
        self.a = DatasetAttributesCollection()
        if not a is None:
            self.a.update(a)
Ejemplo n.º 2
0
def gifti_dataset(samples, targets=None, chunks=None):
    """
    Parameters
    ----------
    samples : str or GiftiImage
      GIFTI surface-based data, specified either as a filename or an image.
    targets : scalar or sequence
      Label attribute for each volume in the timeseries.
    chunks : scalar or sequence
      Chunk attribute for each volume in the timeseries.
    """
    node_indices = None
    data_vectors = []
    intents = []

    image = _get_gifti_image(samples)

    for darray in image.darrays:
        intent_string = _gifti_intent_niistring(darray.intent)

        if _gifti_intent_is_data(intent_string):
            data_vectors.append(darray.data)
            intents.append(intent_string)

        elif _gifti_intent_is_node_indices(intent_string):
            node_indices = darray.data

    samples = np.asarray(data_vectors)
    nsamples, nfeatures = samples.shape

    # set sample attributes
    sa = SampleAttributesCollection(length=nsamples)

    sa['intents'] = intents

    if targets is not None:
        sa['targets'] = targets

    if chunks is not None:
        sa['chunks'] = chunks

    # set feature attributes
    fa = FeatureAttributesCollection(length=nfeatures)

    if node_indices is not None:
        fa['node_indices'] = node_indices

    return Dataset(samples=samples, sa=sa, fa=fa)
Ejemplo n.º 3
0
    def _edit_attr(self, ds, shape):

        attr = dict()
        for key in ds.sa.keys():
            attr[key] = []
            for v in ds.sa[key].value:
                attr[key] += [v for _ in range(shape[1])]

        attr['roi_labels'] = []
        for _ in range(shape[0] / shape[1]):
            for i in range(shape[1]):
                attr['roi_labels'] += ["roi_%02d" % (i + 1)]

        logger.debug(shape)

        return SampleAttributesCollection(attr)
Ejemplo n.º 4
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Ejemplo n.º 5
0
def edit_attr(attr, shape):

    factor = shape[0] / len(attr.targets)

    attr_ = dict()
    for key in attr.keys():
        attr_[key] = []
        for label in attr[key]:
            attr_[key] += [label for i in range(factor)]
    """    
    attr_['roi_labels'] = []
    for j in range(len(attr.targets)):
        for i in range(shape[1]):
            attr_['roi_labels'] += ["roi_%02d" % (i+1)]
    """

    return SampleAttributesCollection(
        attr_), None  #attr_['roi_labels'][:shape[1]]
Ejemplo n.º 6
0
def from_niml(dset, fa_labels=[], sa_labels=[], a_labels=[]):
    '''Convert a NIML dataset to a Dataset

    Parameters
    ----------
    dset: dict
        Dictionary with NIML key-value pairs, such as obtained from
        mvpa2.support.nibabel.afni_niml_dset.read()
    fa_labels: list
        Keys in dset that are enforced to be feature attributes
    sa_labels: list
        Keys in dset that are enforced to be sample attributes
    a_labels: list
        Keys in dset that are enforced to be dataset attributes

    Returns
    -------
    dataset: mvpa2.base.Dataset
        a PyMVPA Dataset
    '''

    # check for singleton element
    if type(dset) is list and len(dset) == 1:
        # recursive call
        return from_niml(dset[0])

    if not type(dset) is dict:
        raise ValueError("Expected a dict")

    if not 'data' in dset:
        raise ValueError("dset with no data?")

    data = dset['data']
    if len(data.shape) == 1:
        nfeatures = data.shape[0]
        nsamples = 1
    else:
        nfeatures, nsamples = data.shape

    # some labels have predefined destinations
    sa_labels_ = ['labels', 'stats', 'chunks', 'targets'] + sa_labels
    fa_labels_ = ['node_indices', 'center_ids'] + fa_labels
    a_labels_ = ['history'] + a_labels
    ignore_labels = ('data', 'dset_type')

    sa = SampleAttributesCollection(length=nsamples)
    fa = FeatureAttributesCollection(length=nfeatures)
    a = DatasetAttributesCollection()

    labels_collections = [(sa_labels_, sa),
                          (fa_labels_, fa),
                          (a_labels_, a)]

    infix2collection = {'sa': sa,
                        'fa': fa,
                        'a': a}

    infix2length = {'sa': nsamples, 'fa': nfeatures}

    for k, v in dset.iteritems():
        if k in ignore_labels:
            continue

        if k.startswith(_PYMVPA_PREFIX + _PYMVPA_SEP):
            # special PYVMPA field - do the proper conversion
            k_split = k.split(_PYMVPA_SEP)
            if len(k_split) > 2:
                infix = k_split[1].lower()
                collection = infix2collection.get(infix, None)
                if not collection is None:
                    short_k = _PYMVPA_SEP.join(k_split[2:])
                    expected_length = infix2length.get(infix, None)
                    if expected_length:
                        if isinstance(v, np.ndarray) and np.dtype == np.str_:
                            v = str(v)

                        while isinstance(v, basestring):
                            # strings are seperated by ';'
                            # XXX what if this is part of the value
                            # intended by the user?
                            v = v.split(';')

                        if expected_length != len(v):
                            raise ValueError("Unexpected length: %d != %d" %
                                             (expected_length, len(v)))

                        v = ArrayCollectable(v, length=expected_length)

                    collection[short_k] = v
                    continue

        found_label = False

        for label, collection in labels_collections:
            if k in label:
                collection[k] = v
                found_label = True
                break

        if found_label:
            continue

        # try to be smart and deduce this from dimensions.
        # this only works if nfeatures!=nsamples otherwise it would be
        # ambiguous
        # XXX is this ugly?
        if nfeatures != nsamples:
            try:
                n = len(v)
                if n == nfeatures:
                    fa[k] = v
                    continue
                elif n == nsamples:
                    sa[k] = v
                    continue
            except:
                pass

        # don't know what this is - make it a general attribute
        a[k] = v

    ds = Dataset(np.transpose(data), sa=sa, fa=fa, a=a)

    return ds
Ejemplo n.º 7
0
iplv = mat['iPLV']
ds_list = []
runs = []
for i in range(iplv.shape[-1]):
    ref = iplv[0, i]
    data = mat[ref][()]
    ds_list.append(data)
    run = [i+1 for _ in range(data.shape[0])]
    runs.append(run)

ds_ = np.vstack(ds_list)

sa = SampleAttributesCollection({
    'targets': np.hstack(runs),
    'chunks': np.hstack(runs),
    'runs': np.hstack(runs),
    'subject': np.ones(ds_.shape[0]),
    'file': ["Subj=1_connectivity_individualalpha.mat" for _ in range(ds_.shape[0])]
})

fa = FeatureAttributesCollection({'matrix_values':np.ones(ds_.shape[1])})
a = DatasetAttributesCollection({'data_path':'/media/robbis/DATA/meg/hcp/', 
                                 'experiment':'hcp', 
                                 })

ds = Dataset(ds_, sa=sa, a=a, fa=fa)

mat.close()

nan_mask = np.logical_not(np.isnan(ds.samples))
keep_idx = np.bool_(np.sum(nan_mask, axis=1))
Ejemplo n.º 8
0
 def lean_errorfx(ds):#Node):
     #def __call__(self, ds):
         assert_collections_equal(ds.sa, target_sa)
         # since equal, we could just replace with a blank one
         ds.sa = SampleAttributesCollection()
         return ds