Beispiel #1
0
class PyMVPAAtlas(XMLBasedAtlas):
    """Base class for PyMVPA atlases, such as LabelsAtlas and ReferenceAtlas
    """

    source = 'PyMVPA'

    def __init__(self, *args, **kwargs):
        XMLBasedAtlas.__init__(self, *args, **kwargs)

        # sanity checks
        header = self.header
        headerChildrenTags = XMLBasedAtlas._children_tags(header)
        if not ('space' in headerChildrenTags) or \
           not ('space-flavor' in headerChildrenTags):
            raise XMLAtlasException("PyMVPA Atlas requires specification of" +
                                    " the space in which atlas resides")

        self.__space = header.space.text
        self.__spaceFlavor = header['space-flavor'].text


    __doc__ = enhanced_doc_string('PyMVPAAtlas', locals(), XMLBasedAtlas)


    ##REF: Name was automagically refactored
    def _load_images(self):
        # shortcut
        imagefile = self.header.images.imagefile
        #self.nlevels = len(self._levels_by_id)

        # Set offset if defined in XML file
        # XXX: should just take one from the qoffset... now that one is
        #       defined... this origin might be misleading actually
        self._origin = np.array( (0, 0, 0) )
        if imagefile.attrib.has_key('offset'):
            self._origin = np.array( [int(x) for x in
                                     imagefile.get('offset').split(',')] )

        # Load the image file which has labels
        if self._force_image_file is not None:
            imagefilename = self._force_image_file
        else:
            imagefilename = imagefile.text
        imagefilename = reuse_absolute_path(self._filename, imagefilename)

        try:
            self._image = None
            for ext in ['', '.nii.gz']:
                try:
                    self._image  = nb.load(imagefilename + ext)
                    break
                except Exception, e:
                    pass
            if self._image is None:
                raise e
        except RuntimeError, e:
            raise RuntimeError, \
                  " Cannot open file %s due to %s" % (imagefilename, e)
Beispiel #2
0
class LabelsAtlas(PyMVPAAtlas):
    """
    Atlas which provides labels for the given coordinate
    """

    ##REF: Name was automagically refactored
    def label_voxel(self, c, levels=None):
        """
        Return labels for the given voxel at specified levels specified by index
        """
        levels = self._get_selected_levels(levels=levels)

        result = {'voxel_queried': c}

        # check range
        c = self._check_range(c)

        resultLevels = []
        for level in levels:
            if level in self._levels:
                level_ = self._levels[level]
            else:
                raise IndexError("Unknown index or description for level %d" %
                                 level)

            resultIndex =  int(self._data[ level_.index, \
                                            c[0], c[1], c[2] ])

            resultLevels += [{
                'index': level_.index,
                'id': level_.description,
                'label': level_[resultIndex]
            }]

        result['labels'] = resultLevels
        return result

    __doc__ = enhanced_doc_string('LabelsAtlas', locals(), PyMVPAAtlas)
Beispiel #3
0
    def __init__(cls, name, bases, dict):
        """
        Parameters
        ----------
        name : str
          Name of the class
        bases : iterable
          Base classes
        dict : dict
          Attributes.
        """
        if __debug__:
            debug(
                "COLR",
                "AttributesCollector call for %s.%s, where bases=%s, dict=%s ",
                (cls, name, bases, dict))

        super(AttributesCollector, cls).__init__(name, bases, dict)

        collections = {}
        for name, value in dict.iteritems():
            if isinstance(value, IndexedCollectable):
                baseclassname = value.__class__.__name__
                col = _known_collections[baseclassname][0]
                # XXX should we allow to throw exceptions here?
                if col not in collections:
                    collections[col] = {}
                collections[col][name] = value
                # and assign name if not yet was set
                if value.name is None:
                    value.name = name
                # !!! We do not keep copy of this attribute static in the class.
                #     Due to below traversal of base classes, we should be
                #     able to construct proper collections even in derived classes
                delattr(cls, name)

        # XXX can we first collect parent's ca and then populate with ours?
        # TODO

        for base in bases:
            if hasattr(base, "__class__") and \
                   base.__class__ == AttributesCollector:
                # TODO take care about overriding one from super class
                # for state in base.ca:
                #    if state[0] =
                newcollections = base._collections_template
                if len(newcollections) == 0:
                    continue
                if __debug__: # XXX RF:  and "COLR" in debug.active:
                    debug("COLR",
                          "Collect collections %s for %s from %s",
                          (newcollections, cls, base))
                for col, collection in newcollections.iteritems():
                    if col in collections:
                        collections[col].update(collection)
                    else:
                        collections[col] = collection


        if __debug__:
            debug("COLR",
                  "Creating ConditionalAttributesCollection template %s "
                  "with collections %s", (cls, collections.keys()))

        # if there is an explicit
        if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"):
            for col in cls._ATTRIBUTE_COLLECTIONS:
                if not col in _col2class:
                    raise ValueError, \
                          "Requested collection %s is unknown to collector" % \
                          col
                if not col in collections:
                    collections[col] = None

        # TODO: check on conflict in names of Collections' items!  since
        # otherwise even order is not definite since we use dict for
        # collections.
        # XXX should we switch to tuple?

        for col, colitems in collections.iteritems():
            # so far we collected the collection items in a dict, but the new
            # API requires to pass a _list_ of collectables instead of a dict.
            # So, whenever there are items, we pass just the values of the dict.
            # There is no information last, since the keys of the dict are the
            # name attributes of each collectable in the list.
            if not colitems is None:
                collections[col] = _col2class[col](items=colitems.values())
            else:
                collections[col] = _col2class[col]()

        setattr(cls, "_collections_template", collections)

        #
        # Expand documentation for the class based on the listed
        # parameters an if it is stateful
        #
        # TODO -- figure nice way on how to alter __init__ doc directly...
        textwrapper = TextWrapper(subsequent_indent="    ",
                                  initial_indent="    ",
                                  width=70)

        # Parameters
        paramsdoc = []
        paramscols = []
        for col in ('params', 'kernel_params'):
            if col in collections:
                paramscols.append(col)
                # lets at least sort the parameters for consistent output
                col_items = collections[col]
                iparams = [(v._instance_index, k)
                           for k,v in col_items.iteritems()]
                iparams.sort()
                paramsdoc += [(col_items[iparam[1]].name,
                               col_items[iparam[1]]._paramdoc())
                              for iparam in iparams]

        # Parameters collection could be taked hash of to decide if
        # any were changed? XXX may be not needed at all?
        setattr(cls, "_paramscols", paramscols)

        # States doc
        cadoc = ""
        if 'ca' in collections:
            paramsdoc += [
                ('enable_ca',
                 "enable_ca : None or list of str\n  "
                 "Names of the conditional attributes which should "
                 "be enabled in addition\n  to the default ones"),
                ('disable_ca',
                 "disable_ca : None or list of str\n  "
                 "Names of the conditional attributes which should "
                 "be disabled""")]
            if len(collections['ca']):
                cadoc += '\n'.join(['* ' + x
                                    for x in collections['ca'].listing])
                cadoc += "\n\n(Conditional attributes enabled by default suffixed with `+`)"
            if __debug__:
                debug("COLR", "Assigning __cadoc to be %s", (cadoc,))
            setattr(cls, "_cadoc", cadoc)

        if paramsdoc != "":
            if __debug__ and 'COLR' in debug.active:
                debug("COLR", "Assigning __paramsdoc to be %s", (paramsdoc,))
            setattr(cls, "_paramsdoc", paramsdoc)

        if len(paramsdoc) or cadoc != "":
            cls.__doc__ = enhanced_doc_string(cls, *bases)
Beispiel #4
0
    def __init__(cls, name, bases, dict):
        """
        Parameters
        ----------
        name : str
          Name of the class
        bases : iterable
          Base classes
        dict : dict
          Attributes.
        """
        if __debug__:
            debug(
                "COLR",
                "AttributesCollector call for %s.%s, where bases=%s, dict=%s ",
                (cls, name, bases, dict))

        super(AttributesCollector, cls).__init__(name, bases, dict)

        collections = {}
        for name, value in dict.iteritems():
            if isinstance(value, IndexedCollectable):
                baseclassname = value.__class__.__name__
                col = _known_collections[baseclassname][0]
                # XXX should we allow to throw exceptions here?
                if col not in collections:
                    collections[col] = {}
                collections[col][name] = value
                # and assign name if not yet was set
                if value.name is None:
                    value.name = name
                # !!! We do not keep copy of this attribute static in the class.
                #     Due to below traversal of base classes, we should be
                #     able to construct proper collections even in derived classes
                delattr(cls, name)

        # XXX can we first collect parent's ca and then populate with ours?
        # TODO

        for base in bases:
            if hasattr(base, "__class__") and \
                   base.__class__ == AttributesCollector:
                # TODO take care about overriding one from super class
                # for state in base.ca:
                #    if state[0] =
                newcollections = base._collections_template
                if len(newcollections) == 0:
                    continue
                if __debug__: # XXX RF:  and "COLR" in debug.active:
                    debug("COLR",
                          "Collect collections %s for %s from %s",
                          (newcollections, cls, base))
                for col, super_collection in newcollections.iteritems():
                    if col in collections:
                        if __debug__:
                            debug("COLR", "Updating existing collection %s with the one from super class" % col)
                        collection = collections[col]
                        # Current class could have overriden a parameter, so
                        # we need to keep it without updating
                        for pname, pval in super_collection.iteritems():
                            if pname not in collection:
                                collection[pname] = pval
                            elif __debug__:
                                debug("COLR", "Not overriding %s.%s of cls %s from base %s"
                                      % (col, pname, cls, base))
                    else:
                        collections[col] = super_collection


        if __debug__:
            debug("COLR",
                  "Creating ConditionalAttributesCollection template %s "
                  "with collections %s", (cls, collections.keys()))

        # if there is an explicit
        if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"):
            for col in cls._ATTRIBUTE_COLLECTIONS:
                if not col in _col2class:
                    raise ValueError, \
                          "Requested collection %s is unknown to collector" % \
                          col
                if not col in collections:
                    collections[col] = None

        # TODO: check on conflict in names of Collections' items!  since
        # otherwise even order is not definite since we use dict for
        # collections.
        # XXX should we switch to tuple?

        for col, colitems in collections.iteritems():
            # so far we collected the collection items in a dict, but the new
            # API requires to pass a _list_ of collectables instead of a dict.
            # So, whenever there are items, we pass just the values of the dict.
            # There is no information last, since the keys of the dict are the
            # name attributes of each collectable in the list.
            if colitems is not None:
                collections[col] = _col2class[col](items=colitems.values())
            else:
                collections[col] = _col2class[col]()

        setattr(cls, "_collections_template", collections)

        #
        # Expand documentation for the class based on the listed
        # parameters an if it is stateful
        #
        # TODO -- figure nice way on how to alter __init__ doc directly...
        textwrapper = TextWrapper(subsequent_indent="    ",
                                  initial_indent="    ",
                                  width=70)

        # Parameters
        paramsdoc = []
        paramscols = []
        for col in ('params', 'kernel_params'):
            if col in collections:
                paramscols.append(col)
                # lets at least sort the parameters for consistent output
                col_items = collections[col]
                iparams = [(v._instance_index, k)
                           for k,v in col_items.iteritems()]
                iparams.sort()
                paramsdoc += [(col_items[iparam[1]].name,
                               col_items[iparam[1]]._paramdoc())
                              for iparam in iparams]

        # Parameters collection could be taked hash of to decide if
        # any were changed? XXX may be not needed at all?
        setattr(cls, "_paramscols", paramscols)

        # States doc
        cadoc = ""
        if 'ca' in collections:
            paramsdoc += [
                ('enable_ca',
                 "enable_ca : None or list of str\n  "
                 "Names of the conditional attributes which should "
                 "be enabled in addition\n  to the default ones"),
                ('disable_ca',
                 "disable_ca : None or list of str\n  "
                 "Names of the conditional attributes which should "
                 "be disabled""")]
            if len(collections['ca']):
                cadoc += '\n'.join(['* ' + x
                                    for x in collections['ca'].listing])
                cadoc += "\n\n(Conditional attributes enabled by default suffixed with `+`)"
            if __debug__:
                debug("COLR", "Assigning __cadoc to be %s", (cadoc,))
            setattr(cls, "_cadoc", cadoc)

        if paramsdoc != "":
            if __debug__ and 'COLR' in debug.active:
                debug("COLR", "Assigning __paramsdoc to be %s", (paramsdoc,))
            setattr(cls, "_paramsdoc", paramsdoc)

        if len(paramsdoc) or cadoc != "":
            cls.__doc__ = enhanced_doc_string(cls, *bases)
Beispiel #5
0
class ReferencesAtlas(PyMVPAAtlas):
    """
    Atlas which provides references to the other atlases.

    Example: the atlas which has references to the closest points
    (closest Gray, etc) in another atlas.
    """
    def __init__(self, distance=0, reference_level=None, *args, **kwargs):
        """Initialize `ReferencesAtlas`
        """
        PyMVPAAtlas.__init__(self, *args, **kwargs)
        # sanity checks
        if not ('reference-atlas' in XMLBasedAtlas._children_tags(
                self.header)):
            raise XMLAtlasException(
                "ReferencesAtlas must refer to a some other atlas")

        referenceAtlasName = self.header["reference-atlas"].text

        # uff -- another evil import but we better use the factory method
        from mvpa2.atlases.warehouse import Atlas
        self.__referenceAtlas = Atlas(
            filename=reuse_absolute_path(self._filename, referenceAtlasName))

        if self.__referenceAtlas.space != self.space or \
           self.__referenceAtlas.space_flavor != self.space_flavor:
            raise XMLAtlasException(
                "Reference and original atlases should be in the same space")

        self.__referenceLevel = None  # pylint shut up
        if reference_level is not None:
            self.set_reference_level(reference_level)
        self.set_distance(distance)

    __doc__ = enhanced_doc_string('ReferencesAtlas', locals(), PyMVPAAtlas)

    # number of levels must be of the referenced atlas due to
    # handling of that in __getitem__
    #nlevels = property(fget=lambda self:self.__referenceAtlas.nlevels)
    ##REF: Name was automagically refactored
    def _get_nlevels_virtual(self):
        return self.__referenceAtlas.nlevels

    ##REF: Name was automagically refactored
    def set_reference_level(self, level):
        """
        Set the level which will be queried
        """
        if level in self._levels:
            self.__referenceLevel = self._levels[level]
        else:
            raise IndexError("Unknown reference level %r. " % level + \
                  "Known are %r" % (list(self._levels.keys()), ))

    ##REF: Name was automagically refactored
    def label_voxel(self, c, levels=None):

        if self.__referenceLevel is None:
            warning("You did not provide what level to use "
                    "for reference. Assigning 0th level -- '%s'" %
                    (self._levels[0], ))
            self.set_reference_level(0)
            # return self.__referenceAtlas.label_voxel(c, levels)

        c = self._check_range(c)

        # obtain coordinates of the closest voxel
        cref = self._data[self.__referenceLevel.indexes, c[0], c[1], c[2]]
        dist = norm((cref - c) * self.voxdim)
        if __debug__:
            debug(
                'ATL__', "Closest referenced point for %r is "
                "%r at distance %3.2f" % (c, cref, dist))
        if (self.distance - dist) >= 1e-3:  # neglect everything smaller
            result = self.__referenceAtlas.label_voxel(cref, levels)
            result['voxel_referenced'] = c
            result['distance'] = dist
        else:
            result = self.__referenceAtlas.label_voxel(c, levels)
            if __debug__:
                debug(
                    'ATL__', "Closest referenced point is "
                    "further than desired distance %.2f" % self.distance)
            result['voxel_referenced'] = None
            result['distance'] = 0
        return result

    ##REF: Name was automagically refactored
    def levels_listing(self):
        return self.__referenceAtlas.levels_listing()

    ##REF: Name was automagically refactored
    def _get_levels_virtual(self):
        return self.__referenceAtlas.levels

    ##REF: Name was automagically refactored
    def set_distance(self, distance):
        """Set desired maximal distance for the reference
        """
        if distance < 0:
            raise ValueError("Distance should not be negative. "
                             " Thus '%f' is not a legal value" % distance)
        if __debug__:
            debug('ATL__',
                  "Setting maximal distance for queries to be %d" % distance)
        self.__distance = distance

    distance = property(fget=lambda self: self.__distance, fset=set_distance)
    reference_level = property(fget=lambda self: self.__referenceLevel,
                               fset=set_reference_level)
Beispiel #6
0
class PyMVPAAtlas(XMLBasedAtlas):
    """Base class for PyMVPA atlases, such as LabelsAtlas and ReferenceAtlas
    """

    source = 'PyMVPA'

    def __init__(self, *args, **kwargs):
        XMLBasedAtlas.__init__(self, *args, **kwargs)

        # sanity checks
        header = self.header
        headerChildrenTags = XMLBasedAtlas._children_tags(header)
        if not ('space' in headerChildrenTags) or \
           not ('space-flavor' in headerChildrenTags):
            raise XMLAtlasException("PyMVPA Atlas requires specification of" +
                                    " the space in which atlas resides")

        self.__space = header.space.text
        self.__spaceFlavor = header['space-flavor'].text

    __doc__ = enhanced_doc_string('PyMVPAAtlas', locals(), XMLBasedAtlas)

    ##REF: Name was automagically refactored
    def _load_images(self):
        # shortcut
        imagefile = self.header.images.imagefile
        #self.nlevels = len(self._levels_by_id)

        # Set offset if defined in XML file
        # XXX: should just take one from the qoffset... now that one is
        #       defined... this origin might be misleading actually
        self._origin = np.array((0, 0, 0))
        if 'offset' in imagefile.attrib:
            self._origin = np.array(
                [int(x) for x in imagefile.get('offset').split(',')])

        # Load the image file which has labels
        if self._force_image_file is not None:
            imagefilename = self._force_image_file
        else:
            imagefilename = imagefile.text
        imagefilename = reuse_absolute_path(self._filename, imagefilename)

        try:
            self._image = None
            for ext in ['', '.nii.gz']:
                try:
                    self._image = nb.load(imagefilename + ext)
                    break
                except Exception as e:
                    pass
            if self._image is None:
                raise e
        except RuntimeError as e:
            raise RuntimeError(" Cannot open file %s due to %s" %
                               (imagefilename, e))

        self._data = self._image.get_data()
        # we get the data as x,y,z[,t] but we want to have the time axis first
        # if any
        if len(self._data.shape) == 4:
            self._data = np.rollaxis(self._data, -1)

        # remove bogus dimensions on top of 4th
        if len(self._data.shape[0:-4]) > 0:
            bogus_dims = self._data.shape[0:-4]
            if max(bogus_dims) > 1:
                raise RuntimeError("Atlas %s has more than 4 of non-singular" \
                      "dimensions" % imagefilename)
            new_shape = self._data.shape[-4:]
            self._data.reshape(new_shape)

        #if self._image.extent[3] != self.nlevels:
        #   raise XMLAtlasException("Atlas %s has %d levels defined whenever %s has %d volumes" % \
        #                           ( filename, self.nlevels, imagefilename, self._image.extent[3] ))

    ##REF: Name was automagically refactored
    def _load_metadata(self):
        # Load levels
        self._levels = {}
        # preprocess labels for different levels
        self._Nlevels = 0
        index_incr = 0
        for index, child in enumerate(self.data.getchildren()):
            if child.tag == 'level':
                level = Level.from_xml(child)
                self._levels[level.description] = level
                if hasattr(level, 'index'):
                    index = level.index
                else:
                    # to avoid collision if some levels do
                    # have indexes
                    while index_incr in self._levels:
                        index_incr += 1
                    index, index_incr = index_incr, index_incr + 1
                self._levels[index] = level
            else:
                raise XMLAtlasException("Unknown child '%s' within data" %
                                        child.tag)
            self._Nlevels += 1

    ##REF: Name was automagically refactored
    def _get_nlevels_virtual(self):
        return self._Nlevels

    ##REF: Name was automagically refactored
    def _get_nlevels(self):
        return self._get_nlevels_virtual()

    @staticmethod
    ##REF: Name was automagically refactored
    def _check_version(version):
        # For compatibility lets support "RUMBA" atlases
        return version.startswith("pymvpa-") or version.startswith("rumba-")

    space = property(fget=lambda self: self.__space)
    space_flavor = property(fget=lambda self: self.__spaceFlavor)
    nlevels = property(fget=_get_nlevels)
Beispiel #7
0
class ColumnData(dict):
    """Read data that is stored in columns of text files.

    All read data is available via a dictionary-like interface. If
    column headers are available, the column names serve as dictionary keys.
    If no header exists an articfical key is generated: str(number_of_column).

    Splitting of text file lines is performed by the standard split() function
    (which gets passed the `sep` argument as separator string) and each
    element is converted into the desired datatype.

    Because data is read into a dictionary no two columns can have the same
    name in the header! Each column is stored as a list in the dictionary.
    """
    def __init__(self, source, header=True, sep=None, headersep=None,
                 dtype=float, skiplines=0):
        """Read data from file into a dictionary.

        Parameters
        ----------
        source : str or dict
          If values is given as a string all data is read from the
          file and additonal keyword arguments can be used to
          customize the read procedure. If a dictionary is passed
          a deepcopy is performed.
        header : bool or list of str
          Indicates whether the column names should be read from the
          first line (`header=True`). If `header=False` unique
          column names will be generated (see class docs). If
          `header` is a python list, it's content is used as column
          header names and its length has to match the number of
          columns in the file.
        sep : str or None
          Separator string. The actual meaning depends on the output
          format (see class docs).
        headersep : str or None
          Separator string used in the header. The actual meaning
          depends on the output format (see class docs).
        dtype : type or list(types)
          Desired datatype(s). Datatype per column get be specified by
          passing a list of types.
        skiplines : int
          Number of lines to skip at the beginning of the file.
        """
        # init base class
        dict.__init__(self)

        # intialize with default
        self._header_order = None

        if isinstance(source, basestring):
            self._from_file(source, header=header, sep=sep, headersep=headersep,
                           dtype=dtype, skiplines=skiplines)

        elif isinstance(source, dict):
            for k, v in source.iteritems():
                self[k] = v
            # check data integrity
            self._check()

        else:
            raise ValueError, 'Unkown source for ColumnData [%r]' \
                              % type(source)

        # generate missing properties for each item in the header
        classdict = self.__class__.__dict__
        for k in self.keys():
            if not k in classdict:
                getter = "lambda self: self._get_attrib('%s')" % (k)
                # Sanitarize the key, substitute ' []' with '_'
                k_ = sub('[[\] ]', '_', k)
                # replace multipe _s
                k_ = sub('__+', '_', k_)
                # remove quotes
                k_ = sub('["\']', '', k_)
                if __debug__:
                    debug("IOH", "Registering property %s for ColumnData key %s"
                          % (k_, k))
                # make sure to import class directly into local namespace
                # otherwise following does not work for classes defined
                # elsewhere
                exec 'from %s import %s' % (self.__module__,
                                            self.__class__.__name__)
                exec "%s.%s = property(fget=%s)" % \
                     (self.__class__.__name__, k_, getter)
                # TODO!!! Check if it is safe actually here to rely on value of
                #         k in lambda. May be it is treated as continuation and
                #         some local space would override it????
                #setattr(self.__class__,
                #        k,
                #        property(fget=lambda x: x._get_attrib("%s" % k)))
                # it seems to be error-prone due to continuation...


    __doc__ = enhanced_doc_string('ColumnData', locals())


    ##REF: Name was automagically refactored
    def _get_attrib(self, key):
        """Return corresponding value if given key is known to current instance

        Is used for automatically added properties to the class.

        Raises
        ------
        ValueError:
          If `key` is not known to given instance

        Returns
        -------
        Value if `key` is known
        """
        if key in self:
            return self[key]
        else:
            raise ValueError, "Instance %r has no data about %r" \
                % (self, key)


    def __str__(self):
        s = self.__class__.__name__
        if len(self.keys()) > 0:
            s += " %d rows, %d columns [" % \
                 (self.nrows, self.ncolumns)
            s += reduce(lambda x, y: x + " %s" % y, self.keys())
            s += "]"
        return s

    def _check(self):
        """Performs some checks for data integrity.
        """
        length = None
        for k in self.keys():
            if length == None:
                length = len(self[k])
            else:
                if not len(self[k]) == length:
                    raise ValueError, "Data integrity lost. Columns do not " \
                                      "have equal length."


    def _from_file(self, filename, header, sep, headersep,
                   dtype, skiplines):
        """Loads column data from file -- clears object first.
        """
        # make a clean table
        self.clear()

        with open(filename, 'r') as file_:

            self._header_order = None

            [ file_.readline() for x in range(skiplines) ]
            """Simply skip some lines"""
            # make column names, either take header or generate
            if header == True:
                # read first line and split by 'sep'
                hdr = file_.readline().split(headersep)
                # remove bogus empty header titles
                hdr = [ x for x in hdr if len(x.strip()) ]
                self._header_order = hdr
            elif isinstance(header, list):
                hdr = header
            else:
                hdr = [ str(i) for i in xrange(len(file_.readline().split(sep))) ]
                # reset file to not miss the first line
                file_.seek(0)
                [ file_.readline() for x in range(skiplines) ]


            # string in lists: one per column
            tbl = [ [] for i in xrange(len(hdr)) ]

            # store whether dtype should be determined automagically
            auto_dtype = dtype is None

            # do per column dtypes
            if not isinstance(dtype, list):
                dtype = [dtype] * len(hdr)

            # parse line by line and feed into the lists
            for line in file_:
                # get rid of leading and trailing whitespace
                line = line.strip()
                # ignore empty lines and comment lines
                if not line or line.startswith('#'):
                    continue
                l = line.split(sep)

                if not len(l) == len(hdr):
                    raise RuntimeError, \
                          "Number of entries in line [%i] does not match number " \
                          "of columns in header [%i]." % (len(l), len(hdr))

                for i, v in enumerate(l):
                    if dtype[i] is not None:
                        try:
                            v = dtype[i](v)
                        except ValueError:
                            warning("Can't convert %r to desired datatype %r." %
                                    (v, dtype) + " Leaving original type")
                    tbl[i].append(v)

            if auto_dtype:
                attempt_convert_dtypes = (int, float)

                for i in xrange(len(tbl)):
                    values = tbl[i]

                    for attempt_convert_dtype in attempt_convert_dtypes:
                        try:
                            values = map(attempt_convert_dtype, values)
                            tbl[i] = values
                            break
                        except:
                            continue

            # check
            if not len(tbl) == len(hdr):
                raise RuntimeError, "Number of columns read from file does not " \
                                    "match the number of header entries."

            # fill dict
            for i, v in enumerate(hdr):
                self[v] = tbl[i]


    def __iadd__(self, other):
        """Merge column data.
        """
        # for all columns in the other object
        for k, v in other.iteritems():
            if not k in self:
                raise ValueError, 'Unknown key [%r].' % (k,)
            if not isinstance(v, list):
                raise ValueError, 'Can only merge list data, but got [%r].' \
                                  % type(v)
            # now it seems to be ok
            # XXX check for datatype?
            self[k] += v

        # look for problems, like columns present in self, but not in other
        self._check()

        return self


    ##REF: Name was automagically refactored
    def select_samples(self, selection):
        """Return new ColumnData with selected samples"""

        data = copy.deepcopy(self)
        for k, v in data.iteritems():
            data[k] = [v[x] for x in selection]

        data._check()
        return data

    @property
    def ncolumns(self):
        """Returns the number of columns.
        """
        return len(self.keys())


    def tofile(self, filename, header=True, header_order=None, sep=' '):
        """Write column data to a text file.

        Parameters
        ----------
        filename : str
          Target filename
        header : bool, optional
          If `True` a column header is written, using the column
          keys. If `False` no header is written.
        header_order : None or list of str
          If it is a list of strings, they will be used instead
          of simply asking for the dictionary keys. However
          these strings must match the dictionary keys in number
          and identity. This argument type can be used to
          determine the order of the columns in the output file.
          The default value is `None`. In this case the columns
          will be in an arbitrary order.
        sep : str, optional
          String that is written as a separator between to data columns.
        """

        with open(filename, 'w') as file_:

            # write header
            if header_order == None:
                if self._header_order is None:
                    col_hdr = self.keys()
                else:
                    # use stored order + newly added keys at the last columns
                    col_hdr = self._header_order + \
                              list(set(self.keys()).difference(
                                                    set(self._header_order)))
            else:
                if not len(header_order) == self.ncolumns:
                    raise ValueError, 'Header list does not match number of ' \
                                      'columns.'
                for k in header_order:
                    if not k in self:
                        raise ValueError, 'Unknown key [%r]' % (k,)
                col_hdr = header_order

            if header == True:
                file_.write(sep.join(col_hdr) + '\n')

            # for all rows
            for r in xrange(self.nrows):
                # get attributes for all keys
                l = [str(self[k][r]) for k in col_hdr]
                # write to file with proper separator
                file_.write(sep.join(l) + '\n')


    @property
    def nrows(self):
        """Returns the number of rows.
        """
        # no data no rows (after Bob Marley)
        if not len(self.keys()):
            return 0
        # otherwise first key is as good as any other
        else:
            return len(self[self.keys()[0]])
Beispiel #8
0
class Measure(Learner):
    """A measure computed from a `Dataset`

    All dataset measures support arbitrary transformation of the measure
    after it has been computed. Transformation are done by processing the
    measure with a functor that is specified via the `transformer` keyword
    argument of the constructor. Upon request, the raw measure (before
    transformations are applied) is stored in the `raw_results` conditional attribute.

    Additionally all dataset measures support the estimation of the
    probabilit(y,ies) of a measure under some distribution. Typically this will
    be the NULL distribution (no signal), that can be estimated with
    permutation tests. If a distribution estimator instance is passed to the
    `null_dist` keyword argument of the constructor the respective
    probabilities are automatically computed and stored in the `null_prob`
    conditional attribute.

    Notes
    -----
    For developers: All subclasses shall get all necessary parameters via
    their constructor, so it is possible to get the same type of measure for
    multiple datasets by passing them to the __call__() method successively.

    """

    null_prob = ConditionalAttribute(enabled=True)
    """Stores the probability of a measure under the NULL hypothesis"""
    null_t = ConditionalAttribute(enabled=False)
    """Stores the t-score corresponding to null_prob under assumption
    of Normal distribution"""
    def __init__(self, null_dist=None, **kwargs):
        """
        Parameters
        ----------
        null_dist : instance of distribution estimator
          The estimated distribution is used to assign a probability for a
          certain value of the computed measure.
        """
        Learner.__init__(self, **kwargs)

        null_dist_ = auto_null_dist(null_dist)
        if __debug__:
            debug(
                'SA', 'Assigning null_dist %s whenever original given was %s' %
                (null_dist_, null_dist))
        self.__null_dist = null_dist_

    __doc__ = enhanced_doc_string('Measure', locals(), Learner)

    def __repr__(self, prefixes=None):
        """String representation of a `Measure`

        Includes only arguments which differ from default ones
        """
        if prefixes is None:
            prefixes = []
        return super(Measure, self).__repr__(prefixes=prefixes +
                                             _repr_attrs(self, ['null_dist']))

    def _precall(self, ds):
        # estimate the NULL distribution when functor is given
        if self.__null_dist is not None:
            if __debug__:
                debug(
                    "STAT",
                    "Estimating NULL distribution using %s" % self.__null_dist)

            # we need a matching measure instance, but we have to disable
            # the estimation of the null distribution in that child to prevent
            # infinite looping.
            measure = copy.copy(self)
            measure.__null_dist = None
            self.__null_dist.fit(measure, ds)

    def _postcall(self, dataset, result):
        """Some postprocessing on the result
        """
        if self.__null_dist is None:
            # do base-class postcall and be done
            result = super(Measure, self)._postcall(dataset, result)
        else:
            # don't do a full base-class postcall, only do the
            # postproc-application here, to gain result compatibility with the
            # fitted null distribution -- necessary to be able to use
            # a Node's 'pass_attr' to pick up ca.null_prob
            result = self._apply_postproc(dataset, result)

            if self.ca.is_enabled('null_t'):
                # get probability under NULL hyp, but also request
                # either it belong to the right tail
                null_prob, null_right_tail = \
                           self.__null_dist.p(result, return_tails=True)
                self.ca.null_prob = null_prob

                externals.exists('scipy', raise_=True)
                from scipy.stats import norm

                # TODO: following logic should appear in NullDist,
                #       not here
                tail = self.null_dist.tail
                if tail == 'left':
                    acdf = np.abs(null_prob.samples)
                elif tail == 'right':
                    acdf = 1.0 - np.abs(null_prob.samples)
                elif tail in ['any', 'both']:
                    acdf = 1.0 - np.clip(np.abs(null_prob.samples), 0, 0.5)
                else:
                    raise RuntimeError('Unhandled tail %s' % tail)
                # We need to clip to avoid non-informative inf's ;-)
                # that happens due to lack of precision in mantissa
                # which is 11 bits in double. We could clip values
                # around 0 at as low as 1e-100 (correspond to z~=21),
                # but for consistency lets clip at 1e-16 which leads
                # to distinguishable value around p=1 and max z=8.2.
                # Should be sufficient range of z-values ;-)
                clip = 1e-16
                null_t = norm.ppf(np.clip(acdf, clip, 1.0 - clip))
                # assure that we deal with arrays:
                null_t = np.array(null_t, ndmin=1, copy=False)
                null_t[~null_right_tail] *= -1.0  # revert sign for negatives
                null_t_ds = null_prob.copy(deep=False)
                null_t_ds.samples = null_t
                self.ca.null_t = null_t_ds  # store as a Dataset
            else:
                # get probability of result under NULL hypothesis if available
                # and don't request tail information
                self.ca.null_prob = self.__null_dist.p(result)
            # now do the second half of postcall and invoke pass_attr
            result = self._pass_attr(dataset, result)
        return result

    @property
    def null_dist(self):
        """Return Null Distribution estimator"""
        return self.__null_dist
Beispiel #9
0
class FSLAtlas(XMLBasedAtlas):
    """Base class for FSL atlases

    """
    source = 'FSL'


    def __init__(self, *args, **kwargs):
        """
        """
        XMLBasedAtlas.__init__(self, *args, **kwargs)
        self.space = 'MNI'


    __doc__ = enhanced_doc_string('FSLAtlas', locals(), XMLBasedAtlas)


    ##REF: Name was automagically refactored
    def _load_images(self):
        resolution = self._resolution
        header = self.header
        images = header.images
        # Load present images
        # XXX might be refactored to avoid duplication of
        #     effort with PyMVPAAtlas
        ni_image = None
        resolutions = []
        if self._force_image_file is None:
            imagefile_candidates = [
                reuse_absolute_path(self._filename, i.imagefile.text, force=True)
                for i in images]
        else:
            imagefile_candidates = [self._force_image_file]

        for imagefilename in imagefile_candidates:
            try:
                if not os.path.exists(imagefilename):
                    # try with extension if filename doesn't exist
                    imagefilename += '.nii.gz'
                ni_image_  = nb.load(imagefilename)
            except RuntimeError as e:
                raise RuntimeError(" Cannot open file " + imagefilename)

            resolution_ = ni_image_.header.get_zooms()[0]
            if resolution is None:
                # select this one if the best
                if ni_image is None or \
                       resolution_ < ni_image.header.get_zooms()[0]:
                    ni_image = ni_image_
                    self._image_file = imagefilename
            else:
                if resolution_ == resolution:
                    ni_image = ni_image_
                    self._image_file = imagefilename
                    break
                else:
                    resolutions += [resolution_]
            # TODO: also make use of summaryimagefile may be?

        if ni_image is None:
            msg = "Could not find an appropriate atlas among %d atlases." \
                  % len(imagefile_candidates)
            if resolution is not None:
                msg += " Atlases had resolutions %s" % \
                      (resolutions,)
            raise RuntimeError(msg)
        if __debug__:
            debug('ATL__', "Loading atlas data from %s" % self._image_file)
        self._image = ni_image
        self._resolution = ni_image.header.get_zooms()[0]
        self._origin = np.abs(ni_image.header.get_qform()[:3,3])  # XXX

        self._data   = self._image.get_data()
        if len(self._data.shape) == 4:
            # want to have volume axis first
            self._data = np.rollaxis(self._data, -1)


    def _load_metadata(self):
        """   """
        # Load levels
        self._levels = {}
        # preprocess labels for different levels
        self.nlevels = 1
        #level = Level.from_xml(self.data, level_type='label')
        level = LabelsLevel.from_xml(self.data)#, level_type='label')
        level.description = self.header.name.text
        self._levels = {0: level}
        #for index, child in enumerate(self.data.getchildren()):
        #   if child.tag == 'level':
        #       level = Level.from_xml(child)
        #       self._levels[level.description] = level
        #       try:
        #           self._levels[level.index] = level
        #       except:
        #           pass
        #   else:
        #       raise XMLAtlasException("Unknown child '%s' within data" % child.tag)
        #   self.nlevels += 1
        #pass


    @staticmethod
    ##REF: Name was automagically refactored
    def _check_version(version):
        return re.search('^[0-9]+\.[0-9]', version) is not None
Beispiel #10
0
class FSLProbabilisticAtlas(FSLAtlas):
    """Probabilistic FSL atlases
    """

    def __init__(self, thr=0.0, strategy='all', sort=True, *args, **kwargs):
        """

        Parameters
        ----------
        thr : float
          Value to threshold at
        strategy : str
          Possible values
            all - all entries above thr
            max - entry with maximal value
        sort : bool
          Either to sort entries for 'all' strategy according to
          probability
        """

        FSLAtlas.__init__(self, *args, **kwargs)
        self.thr = thr
        self.strategy = strategy
        self.sort = sort

    __doc__ = enhanced_doc_string('FSLProbabilisticAtlas', locals(), FSLAtlas)

    ##REF: Name was automagically refactored
    def label_voxel(self, c, levels=None):
        """Return labels for the voxel

        Parameters
        ----------
        c : tuple of coordinates (xyz)
        - levels : just for API consistency (heh heh). Must be 0 for FSL atlases
        """

        if levels is not None and not (levels in [0, [0], (0,)]):
            raise ValueError("I guess we don't support levels other than 0 in FSL atlas." \
                  " Got levels=%s" % (levels,))
        # check range
        c = self._check_range(c)

        # XXX think -- may be we should better assign each map to a
        # different level
        level = 0
        resultLabels = []
        for index, area in enumerate(self._levels[level]):
            prob =  int(self._data[index, c[0], c[1], c[2]])
            if prob > self.thr:
                resultLabels += [dict(index=index,
                                      #id=
                                      label=area.text,
                                      prob=prob)]

        if self.sort or self.strategy == 'max':
            resultLabels.sort(cmp=lambda x,y: cmp(x['prob'], y['prob']),
                              reverse=True)

        if self.strategy == 'max':
            resultLabels = resultLabels[:1]
        elif self.strategy == 'all':
            pass
        else:
            raise ValueError('Unknown strategy %s' % self.strategy)

        result = {'voxel_queried' : c,
                  # in the list since we have only single level but
                  # with multiple entries
                  'labels': [resultLabels]}

        return result

    def find(self, *args, **kwargs):
        """Just a shortcut to the only level.

        See :class:`~mvpa2.atlases.base.Level.find` for more info
        """
        return self.levels[0].find(*args, **kwargs)

    def get_map(self, target, strategy='unique', axes_order='xyz'):
        """Return a probability map as an array

        Parameters
        ----------
        target : int or str or re._pattern_type
          If int, map for given index is returned. Otherwise, .find is called
          with ``unique=True`` to find matching area
        strategy : str in ('unique', 'max')
          If 'unique', then if multiple areas match, exception would be raised.
          In case of 'max', each voxel would get maximal value of probabilities
          from all matching areas
        axes_order : str in ('xyz', 'zyx')
          In what order axes of the returned array should follow.
        """
        if isinstance(target, int):
            res = self._data[target]
            # since we no longer support pynifti all is XYZ
            if axes_order == 'xyz':
                return res
            elif axes_order == 'zyx':
                return res.T
            else:
                raise ValueError("Unknown axes_order=%r provided" % (axes_order,))
        else:
            lev = self.levels[0]       # we have just 1 here
            if strategy == 'unique':
                return self.get_map(lev.find(target, unique=True).index,
                                    axes_order=axes_order)
            else:
                maps_dict = self.get_maps(target, axes_order=axes_order)
                maps = np.array(list(maps_dict.values()))
                return np.max(maps, axis=0)

    def get_maps(self, target, axes_order='xyz', key_attr=None,
                 overlaps=None):
        """Return a dictionary of probability maps for the target

        Each key is a `Label` instance, and value is the probability map

        Parameters
        ----------
        target : str or re._pattern_type
          .find is called with a target and unique=False to find all matches
        axes_order : str in ('xyz', 'zyx')
          In what order axes of the returned array should follow.
        key_attr : None or str
          What to use for the keys of the dictionary.  If None,
          `Label` instance would be used as a key.  If some attribute
          provided (e.g. 'text', 'abbr', 'index'), corresponding
          attribute of the `Label` instance would be taken as a key.
        overlaps : None or {'max'}
          How to treat overlaps in maps.  If None, nothing is done and maps
          might have overlaps.  If 'max', then maps would not overlap and
          competing maps will be resolved based on maximal value (e.g. if
          maps contain probabilities).
        """
        lev = self.levels[0]       # we have just 1 here
        if key_attr is None:
            key_gen = lambda x: x
        else:
            key_gen = lambda x: getattr(x, key_attr)

        res = [[key_gen(l),
                self.get_map(l.index, axes_order=axes_order)]
               for l in lev.find(target, unique=False)]

        if overlaps == 'max':
            # not efficient since it places all maps back into a single
            # ndarray... but well
            maps = np.array([x[1] for x in res])
            maximums = np.argmax(maps, axis=0)
            overlaps = np.sum(maps != 0, axis=0)>1
            # now lets go and infiltrate maps:
            # and do silly loop since we will reassign
            # the entries possibly
            for i in range(len(res)):
                n, m = res[i]
                loosers = np.logical_and(overlaps, ~(maximums == i))
                if len(loosers):
                    # copy and modify
                    m_new = m.copy()
                    m_new[loosers] = 0
                    res[i][1] = m_new
        elif overlaps is None:
            pass
        else:
            raise ValueError("Incorrect value of overlaps argument %s" % overlaps)
        return dict(res)
Beispiel #11
0
class FSLAtlas(XMLBasedAtlas):
    """Base class for FSL atlases

    """
    source = 'FSL'

    def __init__(self, *args, **kwargs):
        """
        """
        XMLBasedAtlas.__init__(self, *args, **kwargs)
        self.space = 'MNI'

    __doc__ = enhanced_doc_string('FSLAtlas', locals(), XMLBasedAtlas)

    ##REF: Name was automagically refactored
    def _load_images(self):
        resolution = self._resolution
        header = self.header
        images = header.images
        # Load present images
        # XXX might be refactored to avoid duplication of
        #     effort with PyMVPAAtlas
        ni_image = None
        resolutions = []
        if self._force_image_file is None:
            imagefile_candidates = [
                reuse_absolute_path(self._filename,
                                    i.imagefile.text,
                                    force=True) for i in images
            ]
        else:
            imagefile_candidates = [self._force_image_file]

        for imagefilename in imagefile_candidates:
            try:
                if not os.path.exists(imagefilename):
                    # try with extension if filename doesn't exist
                    imagefilename += '.nii.gz'
                ni_image_ = nb.load(imagefilename)
            except RuntimeError, e:
                raise RuntimeError, " Cannot open file " + imagefilename

            resolution_ = ni_image_.get_header().get_zooms()[0]
            if resolution is None:
                # select this one if the best
                if ni_image is None or \
                       resolution_ < ni_image.get_header().get_zooms()[0]:
                    ni_image = ni_image_
                    self._image_file = imagefilename
            else:
                if resolution_ == resolution:
                    ni_image = ni_image_
                    self._image_file = imagefilename
                    break
                else:
                    resolutions += [resolution_]
            # TODO: also make use of summaryimagefile may be?

        if ni_image is None:
            msg = "Could not find an appropriate atlas among %d atlases." \
                  % len(imagefile_candidates)
            if resolution is not None:
                msg += " Atlases had resolutions %s" % \
                      (resolutions,)
            raise RuntimeError, msg
        if __debug__:
            debug('ATL__', "Loading atlas data from %s" % self._image_file)
        self._image = ni_image
        self._resolution = ni_image.get_header().get_zooms()[0]
        self._origin = np.abs(ni_image.get_header().get_qform()[:3, 3])  # XXX

        self._data = self._image.get_data()
        if len(self._data.shape) == 4:
            # want to have volume axis first
            self._data = np.rollaxis(self._data, -1)
Beispiel #12
0
class ProjectionMapper(Mapper):
    """Linear mapping between multidimensional spaces.

    This class cannot be used directly. Sub-classes have to implement
    the `_train()` method, which has to compute the projection matrix
    `_proj` and optionally offset vectors `_offset_in` and
    `_offset_out` (if initialized with demean=True, which is default)
    given a dataset (see `_train()` docstring for more information).

    Once the projection matrix is available, this class provides
    functionality to perform forward and backwards linear mapping of
    data, the latter by default using pseudo-inverse (but could be
    altered in subclasses, like hermitian (conjugate) transpose in
    case of SVD).  Additionally, `ProjectionMapper` supports optional
    selection of arbitrary component (i.e. columns of the projection
    matrix) of the projection.

    Forward and back-projection matrices (a.k.a. *projection* and
    *reconstruction*) are available via the `proj` and `recon`
    properties.
    """

    _DEV__doc__ = """Think about renaming `demean`, may be `translation`?"""

    def __init__(self, demean=True, **kwargs):
        """Initialize the ProjectionMapper

        Parameters
        ----------
        demean : bool
          Either data should be demeaned while computing
          projections and applied back while doing reverse()
        """
        Mapper.__init__(self, **kwargs)

        # by default we want to wipe the feature attributes out during mapping
        self._fa_filter = []

        self._proj = None
        """Forward projection matrix."""
        self._recon = None
        """Reverse projection (reconstruction) matrix."""
        self._demean = demean
        """Flag whether to demean the to be projected data, prior to projection.
        """
        self._offset_in = None
        """Offset (most often just mean) in the input space"""
        self._offset_out = None
        """Offset (most often just mean) in the output space"""

    __doc__ = enhanced_doc_string('ProjectionMapper', locals(), Mapper)

    @accepts_dataset_as_samples
    def _pretrain(self, samples):
        """Determine the projection matrix.

        Parameters
        ----------
        dataset : Dataset
          Dataset to operate on
        """
        if self._demean:
            self._offset_in = samples.mean(axis=0)

    ##REF: Name was automagically refactored
    def _demean_data(self, data):
        """Helper which optionally demeans
        """
        if self._demean:
            # demean the training data
            data = data - self._offset_in

            if __debug__ and "MAP_" in debug.active:
                debug(
                    "MAP_",
                    "%s: Mean of data in input space %s was subtracted" %
                    (self.__class__.__name__, self._offset_in))
        return data

    def _forward_data(self, data):
        if self._proj is None:
            raise RuntimeError("Mapper needs to be train before used.")

        # local binding
        demean = self._demean

        d = np.asmatrix(data)

        # Remove input offset if present
        if demean and self._offset_in is not None:
            d = d - self._offset_in

        # Do forward projection
        res = (d * self._proj).A

        # Add output offset if present
        if demean and self._offset_out is not None:
            res += self._offset_out

        return res

    def _reverse_data(self, data):
        if self._proj is None:
            raise RuntimeError("Mapper needs to be trained before used.")
        d = np.asmatrix(data)
        # Remove offset if present in output space
        if self._demean and self._offset_out is not None:
            d = d - self._offset_out

        # Do reverse projection
        res = (d * self.recon).A

        # Add offset in input space
        if self._demean and self._offset_in is not None:
            res += self._offset_in

        return res

    ##REF: Name was automagically refactored
    def _compute_recon(self):
        """Given that a projection is present -- compute reconstruction matrix.
        By default -- pseudoinverse of projection matrix.  Might be overridden
        in derived classes for efficiency.
        """
        return np.linalg.pinv(self._proj)

    ##REF: Name was automagically refactored
    def _get_recon(self):
        """Compute (if necessary) and return reconstruction matrix
        """
        # (re)build reconstruction matrix
        recon = self._recon
        if recon is None:
            self._recon = recon = self._compute_recon()
        return recon

    proj = property(fget=lambda self: self._proj, doc="Projection matrix")
    recon = property(fget=_get_recon, doc="Backprojection matrix")