class PyMVPAAtlas(XMLBasedAtlas): """Base class for PyMVPA atlases, such as LabelsAtlas and ReferenceAtlas """ source = 'PyMVPA' def __init__(self, *args, **kwargs): XMLBasedAtlas.__init__(self, *args, **kwargs) # sanity checks header = self.header headerChildrenTags = XMLBasedAtlas._children_tags(header) if not ('space' in headerChildrenTags) or \ not ('space-flavor' in headerChildrenTags): raise XMLAtlasException("PyMVPA Atlas requires specification of" + " the space in which atlas resides") self.__space = header.space.text self.__spaceFlavor = header['space-flavor'].text __doc__ = enhanced_doc_string('PyMVPAAtlas', locals(), XMLBasedAtlas) ##REF: Name was automagically refactored def _load_images(self): # shortcut imagefile = self.header.images.imagefile #self.nlevels = len(self._levels_by_id) # Set offset if defined in XML file # XXX: should just take one from the qoffset... now that one is # defined... this origin might be misleading actually self._origin = np.array( (0, 0, 0) ) if imagefile.attrib.has_key('offset'): self._origin = np.array( [int(x) for x in imagefile.get('offset').split(',')] ) # Load the image file which has labels if self._force_image_file is not None: imagefilename = self._force_image_file else: imagefilename = imagefile.text imagefilename = reuse_absolute_path(self._filename, imagefilename) try: self._image = None for ext in ['', '.nii.gz']: try: self._image = nb.load(imagefilename + ext) break except Exception, e: pass if self._image is None: raise e except RuntimeError, e: raise RuntimeError, \ " Cannot open file %s due to %s" % (imagefilename, e)
class LabelsAtlas(PyMVPAAtlas): """ Atlas which provides labels for the given coordinate """ ##REF: Name was automagically refactored def label_voxel(self, c, levels=None): """ Return labels for the given voxel at specified levels specified by index """ levels = self._get_selected_levels(levels=levels) result = {'voxel_queried': c} # check range c = self._check_range(c) resultLevels = [] for level in levels: if level in self._levels: level_ = self._levels[level] else: raise IndexError("Unknown index or description for level %d" % level) resultIndex = int(self._data[ level_.index, \ c[0], c[1], c[2] ]) resultLevels += [{ 'index': level_.index, 'id': level_.description, 'label': level_[resultIndex] }] result['labels'] = resultLevels return result __doc__ = enhanced_doc_string('LabelsAtlas', locals(), PyMVPAAtlas)
def __init__(cls, name, bases, dict): """ Parameters ---------- name : str Name of the class bases : iterable Base classes dict : dict Attributes. """ if __debug__: debug( "COLR", "AttributesCollector call for %s.%s, where bases=%s, dict=%s ", (cls, name, bases, dict)) super(AttributesCollector, cls).__init__(name, bases, dict) collections = {} for name, value in dict.iteritems(): if isinstance(value, IndexedCollectable): baseclassname = value.__class__.__name__ col = _known_collections[baseclassname][0] # XXX should we allow to throw exceptions here? if col not in collections: collections[col] = {} collections[col][name] = value # and assign name if not yet was set if value.name is None: value.name = name # !!! We do not keep copy of this attribute static in the class. # Due to below traversal of base classes, we should be # able to construct proper collections even in derived classes delattr(cls, name) # XXX can we first collect parent's ca and then populate with ours? # TODO for base in bases: if hasattr(base, "__class__") and \ base.__class__ == AttributesCollector: # TODO take care about overriding one from super class # for state in base.ca: # if state[0] = newcollections = base._collections_template if len(newcollections) == 0: continue if __debug__: # XXX RF: and "COLR" in debug.active: debug("COLR", "Collect collections %s for %s from %s", (newcollections, cls, base)) for col, collection in newcollections.iteritems(): if col in collections: collections[col].update(collection) else: collections[col] = collection if __debug__: debug("COLR", "Creating ConditionalAttributesCollection template %s " "with collections %s", (cls, collections.keys())) # if there is an explicit if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"): for col in cls._ATTRIBUTE_COLLECTIONS: if not col in _col2class: raise ValueError, \ "Requested collection %s is unknown to collector" % \ col if not col in collections: collections[col] = None # TODO: check on conflict in names of Collections' items! since # otherwise even order is not definite since we use dict for # collections. # XXX should we switch to tuple? for col, colitems in collections.iteritems(): # so far we collected the collection items in a dict, but the new # API requires to pass a _list_ of collectables instead of a dict. # So, whenever there are items, we pass just the values of the dict. # There is no information last, since the keys of the dict are the # name attributes of each collectable in the list. if not colitems is None: collections[col] = _col2class[col](items=colitems.values()) else: collections[col] = _col2class[col]() setattr(cls, "_collections_template", collections) # # Expand documentation for the class based on the listed # parameters an if it is stateful # # TODO -- figure nice way on how to alter __init__ doc directly... textwrapper = TextWrapper(subsequent_indent=" ", initial_indent=" ", width=70) # Parameters paramsdoc = [] paramscols = [] for col in ('params', 'kernel_params'): if col in collections: paramscols.append(col) # lets at least sort the parameters for consistent output col_items = collections[col] iparams = [(v._instance_index, k) for k,v in col_items.iteritems()] iparams.sort() paramsdoc += [(col_items[iparam[1]].name, col_items[iparam[1]]._paramdoc()) for iparam in iparams] # Parameters collection could be taked hash of to decide if # any were changed? XXX may be not needed at all? setattr(cls, "_paramscols", paramscols) # States doc cadoc = "" if 'ca' in collections: paramsdoc += [ ('enable_ca', "enable_ca : None or list of str\n " "Names of the conditional attributes which should " "be enabled in addition\n to the default ones"), ('disable_ca', "disable_ca : None or list of str\n " "Names of the conditional attributes which should " "be disabled""")] if len(collections['ca']): cadoc += '\n'.join(['* ' + x for x in collections['ca'].listing]) cadoc += "\n\n(Conditional attributes enabled by default suffixed with `+`)" if __debug__: debug("COLR", "Assigning __cadoc to be %s", (cadoc,)) setattr(cls, "_cadoc", cadoc) if paramsdoc != "": if __debug__ and 'COLR' in debug.active: debug("COLR", "Assigning __paramsdoc to be %s", (paramsdoc,)) setattr(cls, "_paramsdoc", paramsdoc) if len(paramsdoc) or cadoc != "": cls.__doc__ = enhanced_doc_string(cls, *bases)
def __init__(cls, name, bases, dict): """ Parameters ---------- name : str Name of the class bases : iterable Base classes dict : dict Attributes. """ if __debug__: debug( "COLR", "AttributesCollector call for %s.%s, where bases=%s, dict=%s ", (cls, name, bases, dict)) super(AttributesCollector, cls).__init__(name, bases, dict) collections = {} for name, value in dict.iteritems(): if isinstance(value, IndexedCollectable): baseclassname = value.__class__.__name__ col = _known_collections[baseclassname][0] # XXX should we allow to throw exceptions here? if col not in collections: collections[col] = {} collections[col][name] = value # and assign name if not yet was set if value.name is None: value.name = name # !!! We do not keep copy of this attribute static in the class. # Due to below traversal of base classes, we should be # able to construct proper collections even in derived classes delattr(cls, name) # XXX can we first collect parent's ca and then populate with ours? # TODO for base in bases: if hasattr(base, "__class__") and \ base.__class__ == AttributesCollector: # TODO take care about overriding one from super class # for state in base.ca: # if state[0] = newcollections = base._collections_template if len(newcollections) == 0: continue if __debug__: # XXX RF: and "COLR" in debug.active: debug("COLR", "Collect collections %s for %s from %s", (newcollections, cls, base)) for col, super_collection in newcollections.iteritems(): if col in collections: if __debug__: debug("COLR", "Updating existing collection %s with the one from super class" % col) collection = collections[col] # Current class could have overriden a parameter, so # we need to keep it without updating for pname, pval in super_collection.iteritems(): if pname not in collection: collection[pname] = pval elif __debug__: debug("COLR", "Not overriding %s.%s of cls %s from base %s" % (col, pname, cls, base)) else: collections[col] = super_collection if __debug__: debug("COLR", "Creating ConditionalAttributesCollection template %s " "with collections %s", (cls, collections.keys())) # if there is an explicit if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"): for col in cls._ATTRIBUTE_COLLECTIONS: if not col in _col2class: raise ValueError, \ "Requested collection %s is unknown to collector" % \ col if not col in collections: collections[col] = None # TODO: check on conflict in names of Collections' items! since # otherwise even order is not definite since we use dict for # collections. # XXX should we switch to tuple? for col, colitems in collections.iteritems(): # so far we collected the collection items in a dict, but the new # API requires to pass a _list_ of collectables instead of a dict. # So, whenever there are items, we pass just the values of the dict. # There is no information last, since the keys of the dict are the # name attributes of each collectable in the list. if colitems is not None: collections[col] = _col2class[col](items=colitems.values()) else: collections[col] = _col2class[col]() setattr(cls, "_collections_template", collections) # # Expand documentation for the class based on the listed # parameters an if it is stateful # # TODO -- figure nice way on how to alter __init__ doc directly... textwrapper = TextWrapper(subsequent_indent=" ", initial_indent=" ", width=70) # Parameters paramsdoc = [] paramscols = [] for col in ('params', 'kernel_params'): if col in collections: paramscols.append(col) # lets at least sort the parameters for consistent output col_items = collections[col] iparams = [(v._instance_index, k) for k,v in col_items.iteritems()] iparams.sort() paramsdoc += [(col_items[iparam[1]].name, col_items[iparam[1]]._paramdoc()) for iparam in iparams] # Parameters collection could be taked hash of to decide if # any were changed? XXX may be not needed at all? setattr(cls, "_paramscols", paramscols) # States doc cadoc = "" if 'ca' in collections: paramsdoc += [ ('enable_ca', "enable_ca : None or list of str\n " "Names of the conditional attributes which should " "be enabled in addition\n to the default ones"), ('disable_ca', "disable_ca : None or list of str\n " "Names of the conditional attributes which should " "be disabled""")] if len(collections['ca']): cadoc += '\n'.join(['* ' + x for x in collections['ca'].listing]) cadoc += "\n\n(Conditional attributes enabled by default suffixed with `+`)" if __debug__: debug("COLR", "Assigning __cadoc to be %s", (cadoc,)) setattr(cls, "_cadoc", cadoc) if paramsdoc != "": if __debug__ and 'COLR' in debug.active: debug("COLR", "Assigning __paramsdoc to be %s", (paramsdoc,)) setattr(cls, "_paramsdoc", paramsdoc) if len(paramsdoc) or cadoc != "": cls.__doc__ = enhanced_doc_string(cls, *bases)
class ReferencesAtlas(PyMVPAAtlas): """ Atlas which provides references to the other atlases. Example: the atlas which has references to the closest points (closest Gray, etc) in another atlas. """ def __init__(self, distance=0, reference_level=None, *args, **kwargs): """Initialize `ReferencesAtlas` """ PyMVPAAtlas.__init__(self, *args, **kwargs) # sanity checks if not ('reference-atlas' in XMLBasedAtlas._children_tags( self.header)): raise XMLAtlasException( "ReferencesAtlas must refer to a some other atlas") referenceAtlasName = self.header["reference-atlas"].text # uff -- another evil import but we better use the factory method from mvpa2.atlases.warehouse import Atlas self.__referenceAtlas = Atlas( filename=reuse_absolute_path(self._filename, referenceAtlasName)) if self.__referenceAtlas.space != self.space or \ self.__referenceAtlas.space_flavor != self.space_flavor: raise XMLAtlasException( "Reference and original atlases should be in the same space") self.__referenceLevel = None # pylint shut up if reference_level is not None: self.set_reference_level(reference_level) self.set_distance(distance) __doc__ = enhanced_doc_string('ReferencesAtlas', locals(), PyMVPAAtlas) # number of levels must be of the referenced atlas due to # handling of that in __getitem__ #nlevels = property(fget=lambda self:self.__referenceAtlas.nlevels) ##REF: Name was automagically refactored def _get_nlevels_virtual(self): return self.__referenceAtlas.nlevels ##REF: Name was automagically refactored def set_reference_level(self, level): """ Set the level which will be queried """ if level in self._levels: self.__referenceLevel = self._levels[level] else: raise IndexError("Unknown reference level %r. " % level + \ "Known are %r" % (list(self._levels.keys()), )) ##REF: Name was automagically refactored def label_voxel(self, c, levels=None): if self.__referenceLevel is None: warning("You did not provide what level to use " "for reference. Assigning 0th level -- '%s'" % (self._levels[0], )) self.set_reference_level(0) # return self.__referenceAtlas.label_voxel(c, levels) c = self._check_range(c) # obtain coordinates of the closest voxel cref = self._data[self.__referenceLevel.indexes, c[0], c[1], c[2]] dist = norm((cref - c) * self.voxdim) if __debug__: debug( 'ATL__', "Closest referenced point for %r is " "%r at distance %3.2f" % (c, cref, dist)) if (self.distance - dist) >= 1e-3: # neglect everything smaller result = self.__referenceAtlas.label_voxel(cref, levels) result['voxel_referenced'] = c result['distance'] = dist else: result = self.__referenceAtlas.label_voxel(c, levels) if __debug__: debug( 'ATL__', "Closest referenced point is " "further than desired distance %.2f" % self.distance) result['voxel_referenced'] = None result['distance'] = 0 return result ##REF: Name was automagically refactored def levels_listing(self): return self.__referenceAtlas.levels_listing() ##REF: Name was automagically refactored def _get_levels_virtual(self): return self.__referenceAtlas.levels ##REF: Name was automagically refactored def set_distance(self, distance): """Set desired maximal distance for the reference """ if distance < 0: raise ValueError("Distance should not be negative. " " Thus '%f' is not a legal value" % distance) if __debug__: debug('ATL__', "Setting maximal distance for queries to be %d" % distance) self.__distance = distance distance = property(fget=lambda self: self.__distance, fset=set_distance) reference_level = property(fget=lambda self: self.__referenceLevel, fset=set_reference_level)
class PyMVPAAtlas(XMLBasedAtlas): """Base class for PyMVPA atlases, such as LabelsAtlas and ReferenceAtlas """ source = 'PyMVPA' def __init__(self, *args, **kwargs): XMLBasedAtlas.__init__(self, *args, **kwargs) # sanity checks header = self.header headerChildrenTags = XMLBasedAtlas._children_tags(header) if not ('space' in headerChildrenTags) or \ not ('space-flavor' in headerChildrenTags): raise XMLAtlasException("PyMVPA Atlas requires specification of" + " the space in which atlas resides") self.__space = header.space.text self.__spaceFlavor = header['space-flavor'].text __doc__ = enhanced_doc_string('PyMVPAAtlas', locals(), XMLBasedAtlas) ##REF: Name was automagically refactored def _load_images(self): # shortcut imagefile = self.header.images.imagefile #self.nlevels = len(self._levels_by_id) # Set offset if defined in XML file # XXX: should just take one from the qoffset... now that one is # defined... this origin might be misleading actually self._origin = np.array((0, 0, 0)) if 'offset' in imagefile.attrib: self._origin = np.array( [int(x) for x in imagefile.get('offset').split(',')]) # Load the image file which has labels if self._force_image_file is not None: imagefilename = self._force_image_file else: imagefilename = imagefile.text imagefilename = reuse_absolute_path(self._filename, imagefilename) try: self._image = None for ext in ['', '.nii.gz']: try: self._image = nb.load(imagefilename + ext) break except Exception as e: pass if self._image is None: raise e except RuntimeError as e: raise RuntimeError(" Cannot open file %s due to %s" % (imagefilename, e)) self._data = self._image.get_data() # we get the data as x,y,z[,t] but we want to have the time axis first # if any if len(self._data.shape) == 4: self._data = np.rollaxis(self._data, -1) # remove bogus dimensions on top of 4th if len(self._data.shape[0:-4]) > 0: bogus_dims = self._data.shape[0:-4] if max(bogus_dims) > 1: raise RuntimeError("Atlas %s has more than 4 of non-singular" \ "dimensions" % imagefilename) new_shape = self._data.shape[-4:] self._data.reshape(new_shape) #if self._image.extent[3] != self.nlevels: # raise XMLAtlasException("Atlas %s has %d levels defined whenever %s has %d volumes" % \ # ( filename, self.nlevels, imagefilename, self._image.extent[3] )) ##REF: Name was automagically refactored def _load_metadata(self): # Load levels self._levels = {} # preprocess labels for different levels self._Nlevels = 0 index_incr = 0 for index, child in enumerate(self.data.getchildren()): if child.tag == 'level': level = Level.from_xml(child) self._levels[level.description] = level if hasattr(level, 'index'): index = level.index else: # to avoid collision if some levels do # have indexes while index_incr in self._levels: index_incr += 1 index, index_incr = index_incr, index_incr + 1 self._levels[index] = level else: raise XMLAtlasException("Unknown child '%s' within data" % child.tag) self._Nlevels += 1 ##REF: Name was automagically refactored def _get_nlevels_virtual(self): return self._Nlevels ##REF: Name was automagically refactored def _get_nlevels(self): return self._get_nlevels_virtual() @staticmethod ##REF: Name was automagically refactored def _check_version(version): # For compatibility lets support "RUMBA" atlases return version.startswith("pymvpa-") or version.startswith("rumba-") space = property(fget=lambda self: self.__space) space_flavor = property(fget=lambda self: self.__spaceFlavor) nlevels = property(fget=_get_nlevels)
class ColumnData(dict): """Read data that is stored in columns of text files. All read data is available via a dictionary-like interface. If column headers are available, the column names serve as dictionary keys. If no header exists an articfical key is generated: str(number_of_column). Splitting of text file lines is performed by the standard split() function (which gets passed the `sep` argument as separator string) and each element is converted into the desired datatype. Because data is read into a dictionary no two columns can have the same name in the header! Each column is stored as a list in the dictionary. """ def __init__(self, source, header=True, sep=None, headersep=None, dtype=float, skiplines=0): """Read data from file into a dictionary. Parameters ---------- source : str or dict If values is given as a string all data is read from the file and additonal keyword arguments can be used to customize the read procedure. If a dictionary is passed a deepcopy is performed. header : bool or list of str Indicates whether the column names should be read from the first line (`header=True`). If `header=False` unique column names will be generated (see class docs). If `header` is a python list, it's content is used as column header names and its length has to match the number of columns in the file. sep : str or None Separator string. The actual meaning depends on the output format (see class docs). headersep : str or None Separator string used in the header. The actual meaning depends on the output format (see class docs). dtype : type or list(types) Desired datatype(s). Datatype per column get be specified by passing a list of types. skiplines : int Number of lines to skip at the beginning of the file. """ # init base class dict.__init__(self) # intialize with default self._header_order = None if isinstance(source, basestring): self._from_file(source, header=header, sep=sep, headersep=headersep, dtype=dtype, skiplines=skiplines) elif isinstance(source, dict): for k, v in source.iteritems(): self[k] = v # check data integrity self._check() else: raise ValueError, 'Unkown source for ColumnData [%r]' \ % type(source) # generate missing properties for each item in the header classdict = self.__class__.__dict__ for k in self.keys(): if not k in classdict: getter = "lambda self: self._get_attrib('%s')" % (k) # Sanitarize the key, substitute ' []' with '_' k_ = sub('[[\] ]', '_', k) # replace multipe _s k_ = sub('__+', '_', k_) # remove quotes k_ = sub('["\']', '', k_) if __debug__: debug("IOH", "Registering property %s for ColumnData key %s" % (k_, k)) # make sure to import class directly into local namespace # otherwise following does not work for classes defined # elsewhere exec 'from %s import %s' % (self.__module__, self.__class__.__name__) exec "%s.%s = property(fget=%s)" % \ (self.__class__.__name__, k_, getter) # TODO!!! Check if it is safe actually here to rely on value of # k in lambda. May be it is treated as continuation and # some local space would override it???? #setattr(self.__class__, # k, # property(fget=lambda x: x._get_attrib("%s" % k))) # it seems to be error-prone due to continuation... __doc__ = enhanced_doc_string('ColumnData', locals()) ##REF: Name was automagically refactored def _get_attrib(self, key): """Return corresponding value if given key is known to current instance Is used for automatically added properties to the class. Raises ------ ValueError: If `key` is not known to given instance Returns ------- Value if `key` is known """ if key in self: return self[key] else: raise ValueError, "Instance %r has no data about %r" \ % (self, key) def __str__(self): s = self.__class__.__name__ if len(self.keys()) > 0: s += " %d rows, %d columns [" % \ (self.nrows, self.ncolumns) s += reduce(lambda x, y: x + " %s" % y, self.keys()) s += "]" return s def _check(self): """Performs some checks for data integrity. """ length = None for k in self.keys(): if length == None: length = len(self[k]) else: if not len(self[k]) == length: raise ValueError, "Data integrity lost. Columns do not " \ "have equal length." def _from_file(self, filename, header, sep, headersep, dtype, skiplines): """Loads column data from file -- clears object first. """ # make a clean table self.clear() with open(filename, 'r') as file_: self._header_order = None [ file_.readline() for x in range(skiplines) ] """Simply skip some lines""" # make column names, either take header or generate if header == True: # read first line and split by 'sep' hdr = file_.readline().split(headersep) # remove bogus empty header titles hdr = [ x for x in hdr if len(x.strip()) ] self._header_order = hdr elif isinstance(header, list): hdr = header else: hdr = [ str(i) for i in xrange(len(file_.readline().split(sep))) ] # reset file to not miss the first line file_.seek(0) [ file_.readline() for x in range(skiplines) ] # string in lists: one per column tbl = [ [] for i in xrange(len(hdr)) ] # store whether dtype should be determined automagically auto_dtype = dtype is None # do per column dtypes if not isinstance(dtype, list): dtype = [dtype] * len(hdr) # parse line by line and feed into the lists for line in file_: # get rid of leading and trailing whitespace line = line.strip() # ignore empty lines and comment lines if not line or line.startswith('#'): continue l = line.split(sep) if not len(l) == len(hdr): raise RuntimeError, \ "Number of entries in line [%i] does not match number " \ "of columns in header [%i]." % (len(l), len(hdr)) for i, v in enumerate(l): if dtype[i] is not None: try: v = dtype[i](v) except ValueError: warning("Can't convert %r to desired datatype %r." % (v, dtype) + " Leaving original type") tbl[i].append(v) if auto_dtype: attempt_convert_dtypes = (int, float) for i in xrange(len(tbl)): values = tbl[i] for attempt_convert_dtype in attempt_convert_dtypes: try: values = map(attempt_convert_dtype, values) tbl[i] = values break except: continue # check if not len(tbl) == len(hdr): raise RuntimeError, "Number of columns read from file does not " \ "match the number of header entries." # fill dict for i, v in enumerate(hdr): self[v] = tbl[i] def __iadd__(self, other): """Merge column data. """ # for all columns in the other object for k, v in other.iteritems(): if not k in self: raise ValueError, 'Unknown key [%r].' % (k,) if not isinstance(v, list): raise ValueError, 'Can only merge list data, but got [%r].' \ % type(v) # now it seems to be ok # XXX check for datatype? self[k] += v # look for problems, like columns present in self, but not in other self._check() return self ##REF: Name was automagically refactored def select_samples(self, selection): """Return new ColumnData with selected samples""" data = copy.deepcopy(self) for k, v in data.iteritems(): data[k] = [v[x] for x in selection] data._check() return data @property def ncolumns(self): """Returns the number of columns. """ return len(self.keys()) def tofile(self, filename, header=True, header_order=None, sep=' '): """Write column data to a text file. Parameters ---------- filename : str Target filename header : bool, optional If `True` a column header is written, using the column keys. If `False` no header is written. header_order : None or list of str If it is a list of strings, they will be used instead of simply asking for the dictionary keys. However these strings must match the dictionary keys in number and identity. This argument type can be used to determine the order of the columns in the output file. The default value is `None`. In this case the columns will be in an arbitrary order. sep : str, optional String that is written as a separator between to data columns. """ with open(filename, 'w') as file_: # write header if header_order == None: if self._header_order is None: col_hdr = self.keys() else: # use stored order + newly added keys at the last columns col_hdr = self._header_order + \ list(set(self.keys()).difference( set(self._header_order))) else: if not len(header_order) == self.ncolumns: raise ValueError, 'Header list does not match number of ' \ 'columns.' for k in header_order: if not k in self: raise ValueError, 'Unknown key [%r]' % (k,) col_hdr = header_order if header == True: file_.write(sep.join(col_hdr) + '\n') # for all rows for r in xrange(self.nrows): # get attributes for all keys l = [str(self[k][r]) for k in col_hdr] # write to file with proper separator file_.write(sep.join(l) + '\n') @property def nrows(self): """Returns the number of rows. """ # no data no rows (after Bob Marley) if not len(self.keys()): return 0 # otherwise first key is as good as any other else: return len(self[self.keys()[0]])
class Measure(Learner): """A measure computed from a `Dataset` All dataset measures support arbitrary transformation of the measure after it has been computed. Transformation are done by processing the measure with a functor that is specified via the `transformer` keyword argument of the constructor. Upon request, the raw measure (before transformations are applied) is stored in the `raw_results` conditional attribute. Additionally all dataset measures support the estimation of the probabilit(y,ies) of a measure under some distribution. Typically this will be the NULL distribution (no signal), that can be estimated with permutation tests. If a distribution estimator instance is passed to the `null_dist` keyword argument of the constructor the respective probabilities are automatically computed and stored in the `null_prob` conditional attribute. Notes ----- For developers: All subclasses shall get all necessary parameters via their constructor, so it is possible to get the same type of measure for multiple datasets by passing them to the __call__() method successively. """ null_prob = ConditionalAttribute(enabled=True) """Stores the probability of a measure under the NULL hypothesis""" null_t = ConditionalAttribute(enabled=False) """Stores the t-score corresponding to null_prob under assumption of Normal distribution""" def __init__(self, null_dist=None, **kwargs): """ Parameters ---------- null_dist : instance of distribution estimator The estimated distribution is used to assign a probability for a certain value of the computed measure. """ Learner.__init__(self, **kwargs) null_dist_ = auto_null_dist(null_dist) if __debug__: debug( 'SA', 'Assigning null_dist %s whenever original given was %s' % (null_dist_, null_dist)) self.__null_dist = null_dist_ __doc__ = enhanced_doc_string('Measure', locals(), Learner) def __repr__(self, prefixes=None): """String representation of a `Measure` Includes only arguments which differ from default ones """ if prefixes is None: prefixes = [] return super(Measure, self).__repr__(prefixes=prefixes + _repr_attrs(self, ['null_dist'])) def _precall(self, ds): # estimate the NULL distribution when functor is given if self.__null_dist is not None: if __debug__: debug( "STAT", "Estimating NULL distribution using %s" % self.__null_dist) # we need a matching measure instance, but we have to disable # the estimation of the null distribution in that child to prevent # infinite looping. measure = copy.copy(self) measure.__null_dist = None self.__null_dist.fit(measure, ds) def _postcall(self, dataset, result): """Some postprocessing on the result """ if self.__null_dist is None: # do base-class postcall and be done result = super(Measure, self)._postcall(dataset, result) else: # don't do a full base-class postcall, only do the # postproc-application here, to gain result compatibility with the # fitted null distribution -- necessary to be able to use # a Node's 'pass_attr' to pick up ca.null_prob result = self._apply_postproc(dataset, result) if self.ca.is_enabled('null_t'): # get probability under NULL hyp, but also request # either it belong to the right tail null_prob, null_right_tail = \ self.__null_dist.p(result, return_tails=True) self.ca.null_prob = null_prob externals.exists('scipy', raise_=True) from scipy.stats import norm # TODO: following logic should appear in NullDist, # not here tail = self.null_dist.tail if tail == 'left': acdf = np.abs(null_prob.samples) elif tail == 'right': acdf = 1.0 - np.abs(null_prob.samples) elif tail in ['any', 'both']: acdf = 1.0 - np.clip(np.abs(null_prob.samples), 0, 0.5) else: raise RuntimeError('Unhandled tail %s' % tail) # We need to clip to avoid non-informative inf's ;-) # that happens due to lack of precision in mantissa # which is 11 bits in double. We could clip values # around 0 at as low as 1e-100 (correspond to z~=21), # but for consistency lets clip at 1e-16 which leads # to distinguishable value around p=1 and max z=8.2. # Should be sufficient range of z-values ;-) clip = 1e-16 null_t = norm.ppf(np.clip(acdf, clip, 1.0 - clip)) # assure that we deal with arrays: null_t = np.array(null_t, ndmin=1, copy=False) null_t[~null_right_tail] *= -1.0 # revert sign for negatives null_t_ds = null_prob.copy(deep=False) null_t_ds.samples = null_t self.ca.null_t = null_t_ds # store as a Dataset else: # get probability of result under NULL hypothesis if available # and don't request tail information self.ca.null_prob = self.__null_dist.p(result) # now do the second half of postcall and invoke pass_attr result = self._pass_attr(dataset, result) return result @property def null_dist(self): """Return Null Distribution estimator""" return self.__null_dist
class FSLAtlas(XMLBasedAtlas): """Base class for FSL atlases """ source = 'FSL' def __init__(self, *args, **kwargs): """ """ XMLBasedAtlas.__init__(self, *args, **kwargs) self.space = 'MNI' __doc__ = enhanced_doc_string('FSLAtlas', locals(), XMLBasedAtlas) ##REF: Name was automagically refactored def _load_images(self): resolution = self._resolution header = self.header images = header.images # Load present images # XXX might be refactored to avoid duplication of # effort with PyMVPAAtlas ni_image = None resolutions = [] if self._force_image_file is None: imagefile_candidates = [ reuse_absolute_path(self._filename, i.imagefile.text, force=True) for i in images] else: imagefile_candidates = [self._force_image_file] for imagefilename in imagefile_candidates: try: if not os.path.exists(imagefilename): # try with extension if filename doesn't exist imagefilename += '.nii.gz' ni_image_ = nb.load(imagefilename) except RuntimeError as e: raise RuntimeError(" Cannot open file " + imagefilename) resolution_ = ni_image_.header.get_zooms()[0] if resolution is None: # select this one if the best if ni_image is None or \ resolution_ < ni_image.header.get_zooms()[0]: ni_image = ni_image_ self._image_file = imagefilename else: if resolution_ == resolution: ni_image = ni_image_ self._image_file = imagefilename break else: resolutions += [resolution_] # TODO: also make use of summaryimagefile may be? if ni_image is None: msg = "Could not find an appropriate atlas among %d atlases." \ % len(imagefile_candidates) if resolution is not None: msg += " Atlases had resolutions %s" % \ (resolutions,) raise RuntimeError(msg) if __debug__: debug('ATL__', "Loading atlas data from %s" % self._image_file) self._image = ni_image self._resolution = ni_image.header.get_zooms()[0] self._origin = np.abs(ni_image.header.get_qform()[:3,3]) # XXX self._data = self._image.get_data() if len(self._data.shape) == 4: # want to have volume axis first self._data = np.rollaxis(self._data, -1) def _load_metadata(self): """ """ # Load levels self._levels = {} # preprocess labels for different levels self.nlevels = 1 #level = Level.from_xml(self.data, level_type='label') level = LabelsLevel.from_xml(self.data)#, level_type='label') level.description = self.header.name.text self._levels = {0: level} #for index, child in enumerate(self.data.getchildren()): # if child.tag == 'level': # level = Level.from_xml(child) # self._levels[level.description] = level # try: # self._levels[level.index] = level # except: # pass # else: # raise XMLAtlasException("Unknown child '%s' within data" % child.tag) # self.nlevels += 1 #pass @staticmethod ##REF: Name was automagically refactored def _check_version(version): return re.search('^[0-9]+\.[0-9]', version) is not None
class FSLProbabilisticAtlas(FSLAtlas): """Probabilistic FSL atlases """ def __init__(self, thr=0.0, strategy='all', sort=True, *args, **kwargs): """ Parameters ---------- thr : float Value to threshold at strategy : str Possible values all - all entries above thr max - entry with maximal value sort : bool Either to sort entries for 'all' strategy according to probability """ FSLAtlas.__init__(self, *args, **kwargs) self.thr = thr self.strategy = strategy self.sort = sort __doc__ = enhanced_doc_string('FSLProbabilisticAtlas', locals(), FSLAtlas) ##REF: Name was automagically refactored def label_voxel(self, c, levels=None): """Return labels for the voxel Parameters ---------- c : tuple of coordinates (xyz) - levels : just for API consistency (heh heh). Must be 0 for FSL atlases """ if levels is not None and not (levels in [0, [0], (0,)]): raise ValueError("I guess we don't support levels other than 0 in FSL atlas." \ " Got levels=%s" % (levels,)) # check range c = self._check_range(c) # XXX think -- may be we should better assign each map to a # different level level = 0 resultLabels = [] for index, area in enumerate(self._levels[level]): prob = int(self._data[index, c[0], c[1], c[2]]) if prob > self.thr: resultLabels += [dict(index=index, #id= label=area.text, prob=prob)] if self.sort or self.strategy == 'max': resultLabels.sort(cmp=lambda x,y: cmp(x['prob'], y['prob']), reverse=True) if self.strategy == 'max': resultLabels = resultLabels[:1] elif self.strategy == 'all': pass else: raise ValueError('Unknown strategy %s' % self.strategy) result = {'voxel_queried' : c, # in the list since we have only single level but # with multiple entries 'labels': [resultLabels]} return result def find(self, *args, **kwargs): """Just a shortcut to the only level. See :class:`~mvpa2.atlases.base.Level.find` for more info """ return self.levels[0].find(*args, **kwargs) def get_map(self, target, strategy='unique', axes_order='xyz'): """Return a probability map as an array Parameters ---------- target : int or str or re._pattern_type If int, map for given index is returned. Otherwise, .find is called with ``unique=True`` to find matching area strategy : str in ('unique', 'max') If 'unique', then if multiple areas match, exception would be raised. In case of 'max', each voxel would get maximal value of probabilities from all matching areas axes_order : str in ('xyz', 'zyx') In what order axes of the returned array should follow. """ if isinstance(target, int): res = self._data[target] # since we no longer support pynifti all is XYZ if axes_order == 'xyz': return res elif axes_order == 'zyx': return res.T else: raise ValueError("Unknown axes_order=%r provided" % (axes_order,)) else: lev = self.levels[0] # we have just 1 here if strategy == 'unique': return self.get_map(lev.find(target, unique=True).index, axes_order=axes_order) else: maps_dict = self.get_maps(target, axes_order=axes_order) maps = np.array(list(maps_dict.values())) return np.max(maps, axis=0) def get_maps(self, target, axes_order='xyz', key_attr=None, overlaps=None): """Return a dictionary of probability maps for the target Each key is a `Label` instance, and value is the probability map Parameters ---------- target : str or re._pattern_type .find is called with a target and unique=False to find all matches axes_order : str in ('xyz', 'zyx') In what order axes of the returned array should follow. key_attr : None or str What to use for the keys of the dictionary. If None, `Label` instance would be used as a key. If some attribute provided (e.g. 'text', 'abbr', 'index'), corresponding attribute of the `Label` instance would be taken as a key. overlaps : None or {'max'} How to treat overlaps in maps. If None, nothing is done and maps might have overlaps. If 'max', then maps would not overlap and competing maps will be resolved based on maximal value (e.g. if maps contain probabilities). """ lev = self.levels[0] # we have just 1 here if key_attr is None: key_gen = lambda x: x else: key_gen = lambda x: getattr(x, key_attr) res = [[key_gen(l), self.get_map(l.index, axes_order=axes_order)] for l in lev.find(target, unique=False)] if overlaps == 'max': # not efficient since it places all maps back into a single # ndarray... but well maps = np.array([x[1] for x in res]) maximums = np.argmax(maps, axis=0) overlaps = np.sum(maps != 0, axis=0)>1 # now lets go and infiltrate maps: # and do silly loop since we will reassign # the entries possibly for i in range(len(res)): n, m = res[i] loosers = np.logical_and(overlaps, ~(maximums == i)) if len(loosers): # copy and modify m_new = m.copy() m_new[loosers] = 0 res[i][1] = m_new elif overlaps is None: pass else: raise ValueError("Incorrect value of overlaps argument %s" % overlaps) return dict(res)
class FSLAtlas(XMLBasedAtlas): """Base class for FSL atlases """ source = 'FSL' def __init__(self, *args, **kwargs): """ """ XMLBasedAtlas.__init__(self, *args, **kwargs) self.space = 'MNI' __doc__ = enhanced_doc_string('FSLAtlas', locals(), XMLBasedAtlas) ##REF: Name was automagically refactored def _load_images(self): resolution = self._resolution header = self.header images = header.images # Load present images # XXX might be refactored to avoid duplication of # effort with PyMVPAAtlas ni_image = None resolutions = [] if self._force_image_file is None: imagefile_candidates = [ reuse_absolute_path(self._filename, i.imagefile.text, force=True) for i in images ] else: imagefile_candidates = [self._force_image_file] for imagefilename in imagefile_candidates: try: if not os.path.exists(imagefilename): # try with extension if filename doesn't exist imagefilename += '.nii.gz' ni_image_ = nb.load(imagefilename) except RuntimeError, e: raise RuntimeError, " Cannot open file " + imagefilename resolution_ = ni_image_.get_header().get_zooms()[0] if resolution is None: # select this one if the best if ni_image is None or \ resolution_ < ni_image.get_header().get_zooms()[0]: ni_image = ni_image_ self._image_file = imagefilename else: if resolution_ == resolution: ni_image = ni_image_ self._image_file = imagefilename break else: resolutions += [resolution_] # TODO: also make use of summaryimagefile may be? if ni_image is None: msg = "Could not find an appropriate atlas among %d atlases." \ % len(imagefile_candidates) if resolution is not None: msg += " Atlases had resolutions %s" % \ (resolutions,) raise RuntimeError, msg if __debug__: debug('ATL__', "Loading atlas data from %s" % self._image_file) self._image = ni_image self._resolution = ni_image.get_header().get_zooms()[0] self._origin = np.abs(ni_image.get_header().get_qform()[:3, 3]) # XXX self._data = self._image.get_data() if len(self._data.shape) == 4: # want to have volume axis first self._data = np.rollaxis(self._data, -1)
class ProjectionMapper(Mapper): """Linear mapping between multidimensional spaces. This class cannot be used directly. Sub-classes have to implement the `_train()` method, which has to compute the projection matrix `_proj` and optionally offset vectors `_offset_in` and `_offset_out` (if initialized with demean=True, which is default) given a dataset (see `_train()` docstring for more information). Once the projection matrix is available, this class provides functionality to perform forward and backwards linear mapping of data, the latter by default using pseudo-inverse (but could be altered in subclasses, like hermitian (conjugate) transpose in case of SVD). Additionally, `ProjectionMapper` supports optional selection of arbitrary component (i.e. columns of the projection matrix) of the projection. Forward and back-projection matrices (a.k.a. *projection* and *reconstruction*) are available via the `proj` and `recon` properties. """ _DEV__doc__ = """Think about renaming `demean`, may be `translation`?""" def __init__(self, demean=True, **kwargs): """Initialize the ProjectionMapper Parameters ---------- demean : bool Either data should be demeaned while computing projections and applied back while doing reverse() """ Mapper.__init__(self, **kwargs) # by default we want to wipe the feature attributes out during mapping self._fa_filter = [] self._proj = None """Forward projection matrix.""" self._recon = None """Reverse projection (reconstruction) matrix.""" self._demean = demean """Flag whether to demean the to be projected data, prior to projection. """ self._offset_in = None """Offset (most often just mean) in the input space""" self._offset_out = None """Offset (most often just mean) in the output space""" __doc__ = enhanced_doc_string('ProjectionMapper', locals(), Mapper) @accepts_dataset_as_samples def _pretrain(self, samples): """Determine the projection matrix. Parameters ---------- dataset : Dataset Dataset to operate on """ if self._demean: self._offset_in = samples.mean(axis=0) ##REF: Name was automagically refactored def _demean_data(self, data): """Helper which optionally demeans """ if self._demean: # demean the training data data = data - self._offset_in if __debug__ and "MAP_" in debug.active: debug( "MAP_", "%s: Mean of data in input space %s was subtracted" % (self.__class__.__name__, self._offset_in)) return data def _forward_data(self, data): if self._proj is None: raise RuntimeError("Mapper needs to be train before used.") # local binding demean = self._demean d = np.asmatrix(data) # Remove input offset if present if demean and self._offset_in is not None: d = d - self._offset_in # Do forward projection res = (d * self._proj).A # Add output offset if present if demean and self._offset_out is not None: res += self._offset_out return res def _reverse_data(self, data): if self._proj is None: raise RuntimeError("Mapper needs to be trained before used.") d = np.asmatrix(data) # Remove offset if present in output space if self._demean and self._offset_out is not None: d = d - self._offset_out # Do reverse projection res = (d * self.recon).A # Add offset in input space if self._demean and self._offset_in is not None: res += self._offset_in return res ##REF: Name was automagically refactored def _compute_recon(self): """Given that a projection is present -- compute reconstruction matrix. By default -- pseudoinverse of projection matrix. Might be overridden in derived classes for efficiency. """ return np.linalg.pinv(self._proj) ##REF: Name was automagically refactored def _get_recon(self): """Compute (if necessary) and return reconstruction matrix """ # (re)build reconstruction matrix recon = self._recon if recon is None: self._recon = recon = self._compute_recon() return recon proj = property(fget=lambda self: self._proj, doc="Projection matrix") recon = property(fget=_get_recon, doc="Backprojection matrix")