def _scale(coord, scale=1.0, to_pixel=True): """Convert pixel cordinates to scaled co-ordinates or visa versa. Args: coord(int,float or iterable): Coordinates to be scaled Keyword Arguments: scale(float): Microns per Pixel scale of image to_pixel(bool): Force the conversion to be to pixels Returns: scaled co-ordinates. """ if isinstance(coord, int): if not to_pixel: coord = float(coord) * scale elif isinstance(coord, float): if to_pixel: coord = int(round(coord / scale)) elif isiterable(coord): coord = tuple([_scale(c, scale, to_pixel) for c in coord]) else: raise ValueError( "coord should be an integer or a float or an iterable of integers and floats" ) return coord
def _extractor(group, trail, metadata): results = group.type() results.metadata = group[0].metadata headers = [] ok_data = list() for m in metadata: # Sanity check the metadata to include try: test = results[m] if not isiterable(test) or isinstance(test, string_types): test = array([test]) else: test = array(test) except Exception: continue else: ok_data.append(m) headers.extend([m] * len(test)) for d in group: row = array([]) for m in ok_data: row = append(row, array(d[m])) results += row results.column_headers = headers return results
def extract(self, *metadata, **kargs): """Walks through the terminal group and gets the listed metadata from each file and constructsa replacement metadataObject. Args: *metadata (str): One or more metadata indices that should be used to construct the new data file. Ketyword Arguments: copy (bool): Take a copy of the :py:class:`DataFolder` before starting the extract (default is True) Returns: An instance of a metadataObject like object. """ copy = kargs.pop("copy", True) args = [] for m in metadata: if isinstance(m, string_types): args.append(m) elif isiterable(m): args.extend(m) else: raise TypeError("Metadata values should be strings, or lists of strings, not {}".format(type(m))) metadata = args def _extractor(group, trail, metadata): results = group.type() results.metadata = group[0].metadata headers = [] ok_data = list() for m in metadata: # Sanity check the metadata to include try: test = results[m] if not isiterable(test) or isinstance(test, string_types): test = array([test]) else: test = array(test) except Exception: continue else: ok_data.append(m) headers.extend([m] * len(test)) for d in group: row = array([]) for m in ok_data: row = append(row, array(d[m])) results += row results.column_headers = headers return results if copy: ret = self.clone else: ret = self return ret.walk_groups(_extractor, group=True, replace_terminal=True, walker_args={"metadata": metadata})
def __getattr__(self, name): """Get a column using the setas attribute.""" # Overrides __getattr__ to allow access as row.x etc. col_check = { "x": "xcol", "d": "xerr", "y": "ycol", "e": "yerr", "z": "zcol", "f": "zerr", "u": "ucol", "v": "vcol", "w": "wcol", } if name in self.setas.cols: return self.setas.__getattr__(name) if name not in col_check: return super(DataArray, self).__getattribute__(name) indexer = [slice(0, dim, 1) for ix, dim in enumerate(self.shape)] col = col_check[name] if col.startswith("x"): if self._setas.cols[col] is not None: indexer[-1] = self._setas.cols[col] ret = self[tuple(indexer)] if ret.ndim > 0: ret.column_headers = self.column_headers[ self._setas.cols[col]] else: ret = None else: if isiterable( self._setas.cols[col]) and len(self._setas.cols[col]) > 0: indexer[-1] = self._setas.cols[col][0] elif isiterable(self._setas.cols[col]): indexer[-1] = self._setas.cols[col] else: return None ret = self[tuple(indexer)] if ret.ndim > 0: ret.column_headers = self.column_headers[indexer[-1]] if ret is None: raise StonerSetasError( "Tried accessing a {} column, but setas is not defined.". format(name)) else: return ret
def pattern(self, value): """Sets the filename searching pattern[s] for the :py:class:`Stoner.Core.metadataObject`s.""" if isinstance(value, string_types): self._pattern = (value,) elif isinstance(value, _pattern_type): self._pattern = (value,) elif isiterable(value): self._pattern = [x for x in value] else: raise ValueError( "pattern should be a string, regular expression or iterable object not a {}".format(type(value)) )
def i(self, value): if self.ndim == 0: pass elif self.ndim == 1 and self.isrow: if isiterable(value) and value: self._ibase = _np_.array([min(value)]) else: self._ibase = _np_.array([value]) elif self.ndim >= 1: r = self.shape[0] if isiterable(value) and len( value ) == r: # Iterable and the correct length - assing straight self._ibase = _np_.array(value) elif isiterable(value) and len( value ) > 0: # Iterable but not the correct length - count from min of value self._ibase = _np_.arange(min(value), min(value) + r) elif ( isiterable(value) and len(value) == 0 ): # Iterable but not the correct length - count from min of value self._ibase = _np_.arange(0, r, r) else: # No iterable self._ibase = _np_.arange(value, value + r)
def slice_metadata(self, key=None, values_only=False): # pylint: disable=arguments-differ """Return a list of the metadata dictionaries for each item/file in the top level group Keyword Arguments: key(string or list of strings): if given then only return the item(s) requested from the metadata values_only(bool): if given only return tuples of the dictionary values. Mostly useful when given a single key string Returns: ret(list of dict, tuple or values): depending on values_only returns the sliced dictionaries or tuples/ values of the items To do: this should probably be a func in baseFolder and should use have recursive options (build a dictionary of metadata values). And probably options to extract other parts of objects (first row or whatever). """ metadata = [k.metadata for k in self ] #this can take some time if it's loading in the images if isinstance(key, string_types): key = metadata[0].__lookup__(key, multiple=True) elif isiterable(key): newkey = [] for k in key: newkey.extnd(metadata[0].__lookup__(k, multiple=True)) key = newkey if isinstance(key, list): for i, met in enumerate(metadata): assert all([k in met for k in key ]), 'key requested not in item {}'.format(i) metadata[i] = { k: v for k, v in metadata[i].items() if k in key } if values_only: for i, met in enumerate(metadata): metadata[i] = [v for k, v in met.items()] if len(metadata[0]) == 1: #single key metadata = [m[0] for m in metadata] return metadata
def __setattr__(self, name, value): """Setting the attrbute on .each sets it on all instantiated objects and in _object_attrs. Args: name(str): Attribute to set value (any): Value to set Notes: If *name* is not present on the empty member instance, then the first member of the folder is checked as well. This allows the attributes of a :py:class:`Stoner.Data` object that derive from the Lpy:attr:`Stoner.Data.setas` attribute (such as *.x*, *.y* or *.e* etc) can be accessed. If *value* is iterable and the same length as the folder, then each element in the folder is loaded and the corresponding element of *value* is assigned to the attribute of the member. """ if name in self.__dict__ or name.startswith( "_"): # Handle setting our own attributes super(item, self).__setattr__(name, value) elif name in dir( self._folder.instance) or (len(self._folder) and hasattr( self._folder[0], name)): # This is an instance attribute if isiterable(value) and len(value) == len(self._folder): force_load = True else: force_load = False self._folder._object_attrs[ name] = value # Add to attributes to be set on load value = [value] * len(self._folder) for d, v in zip(self._folder.__names__(), value): # And set on all instantiated objects if force_load or isinstance( self._folder.__getter__(d, instantiate=False), self._folder.type): d = self._folder.__getter__(d) setattr(d, name, v) else: raise AttributeError("Unknown attribute {}".format(name))
def _scale(coord, scale=1.0, to_pixel=True): """Convert pixel cordinates to scaled co-ordinates or visa versa. Args: coord(int,float or iterable): Coordinates to be scaled Keyword Arguments: scale(float): Microns per Pixel scale of image to_pixel(bool): Force the conversion to be to pixels Returns: scaled co-ordinates. """ if isinstance(coord, int): if not to_pixel: coord = float(coord) * scale elif isinstance(coord, float): if to_pixel: coord = int(round(coord / scale)) elif isiterable(coord): coord = tuple([_scale(c, scale, to_pixel) for c in coord]) else: raise ValueError("coord should be an integer or a float or an iterable of integers and floats") return coord
def slice(self, *args, **kwargs): # pylint: disable=arguments-differ """Return a list of the metadata dictionaries for each item/file in the top level group Keyword Arguments: *args (string or list of strings): if given then only return the item(s) requested from the metadata values_only(bool): if given and *output* not set only return tuples of the dictionary values. Mostly useful when given a single key string output (str or type): Controls the output format from slice_metadata. Possible values are - "dict" or dict - return a list of dictionary subsets of the metadata from each image - "list" or list - return a list of values of each item pf the metadata - "array" or np.array - return a single array - like list above, but returns as a numpy array. This can create a 2D array from multiple keys - "data" or Stoner.Data - returns the metadata in a Stoner.Data object where the column headers are the metadata keys. - "frame" - returns the metadata as a Pandas DataFrame object - "smart" - switch between *dict* and *list* depending whether there is one or more keys. mask_missing (bool): If true, then metadata entries missing in members of the folder are returned as masked values (or None), If False, then an exception is raised if any entries are missing. Returns: ret(list of dict, tuple of values or :py:class:`Stoner.Data`): depending on *values_only* or (output* returns the sliced dictionaries or tuples/ values of the items To do: this should probably be a func in baseFolder and should use have recursive options (build a dictionary of metadata values). And probably options to extract other parts of objects (first row or whatever). """ values_only = kwargs.pop("values_only", False) output = kwargs.pop("output", None) mask_missing = kwargs.pop("mask_missing", False) if kwargs: raise SyntaxError("Unused keyword arguments : {}".format(kwargs)) if output is None: # Sort out a definitive value of output output = "dict" if not values_only else "smart" if isinstance(output, string_types): output = output.lower() if output not in [ "dict", "list", "array", "data", "frame", "smart", dict, list, np.ndarray, DataFile, ]: # Check for good output value raise SyntaxError( "output of slice metadata must be either dict, list, or array not {}" .format(output)) keys = [] for k in args: if isinstance(k, string_types): keys.append(k) elif isiterable(k) and all_type(k, string_types): keys.extend(k) else: raise KeyError( "{} cannot be used as a key name or set of key names". format(type(k))) if not mask_missing: for k in keys: if k not in self.common_keys: raise KeyError( "{} is not a key in all members of the folder".format( k)) results = [] for d in self._folder: results.append({k: d[k] for k in keys if k in d}) for r in results: # Expand the results where a result contains a list for k in keys: if k in r and islike_list(r[k]) and len(r[k]) > 0: v = r[k] del r[k] r.update( {"{}[{}]".format(k, i): vi for i, vi in enumerate(v)}) if output == "smart": if np.all([ len(r) == 1 and list(r.keys())[0] == list(results[0].keys())[0] for r in results ]): output = "list" else: output = "dict" if output in ["list", list]: keys = set() for r in results: keys |= set(r.keys()) keys = list(keys) if len(keys) == 1: ret = [r.get(keys[0], None) for r in results] else: ret = [] for r in results: ret.append(tuple(r.get(k, None) for k in keys)) elif output == "dict": ret = results else: from pandas import DataFrame from Stoner import Data frame = DataFrame(results) mask = frame.isna() if output == "frame": ret = frame else: ret = Data(frame) ret.mask = mask if output in ["array", np.ndarray]: ret = ret.data return ret
def _col_args(self, scalar=True, xcol=None, ycol=None, zcol=None, ucol=None, vcol=None, wcol=None, xerr=None, yerr=None, zerr=None, **kargs): """Utility method that creates an object which has keys based either on arguments or setas attribute.""" cols = { "xcol": xcol, "ycol": ycol, "zcol": zcol, "ucol": ucol, "vcol": vcol, "wcol": wcol, "xerr": xerr, "yerr": yerr, "zerr": zerr, } no_guess = True for i in cols.values(): if not i is None: # User specification wins out break else: # User didn't set any values, setas will win no_guess = False ret = _attribute_store(self.setas._get_cols(no_guess=no_guess)) for c in list(cols.keys()): if isNone(cols[c]): # Not defined, fallback on setas del cols[c] continue elif isinstance( cols[c], bool) and not cols[c]: # False, delete column altogether del cols[c] if c in ret: del ret[c] continue elif c in ret and isinstance(ret[c], list): if isinstance(cols[c], float) or (isinstance(cols[c], _np_.ndarray) and cols[c].size == len(self)): continue elif isinstance(cols[c], float): continue cols[c] = self.setas.find_col(cols[c]) ret.update(cols) if scalar: for c in ret: if isinstance(ret[c], list): if ret[c]: ret[c] = ret[c][0] else: ret[c] = None elif isinstance(scalar, bool) and not scalar: for c in ret: if c.startswith("x") or c.startswith("has_"): continue if not isiterable(ret[c]) and ret[c] is not None: ret[c] = list([ret[c]]) elif ret[c] is None: ret[c] = [] return ret
def __getitem__(self, ix): """Indexing function for DataArray. Args: ix (various): Index to find. Returns: An indexed part of the DataArray object with extra attributes. Notes: This tries to support all of the indexing operations of a regular numpy array, plus the special operations where one columns are named. Warning: Teh code almost certainly makes some assumptiuons that DataArray is one or 2D and may blow up with 3D arrays ! On the other hand it has a special case exception for where you give a string as the first index element and assumes that you've forgotten that we're row major and tries to do the right thing. """ # Is this goign to be a single row ? single_row = isinstance( ix, int_types) or (isinstance(ix, tuple) and len(ix) > 0 and isinstance(ix[0], int_types)) # If the index is a single string type, then build a column accessing index if isinstance(ix, string_types): if self.ndim > 1: ix = (slice(None, None, None), self._setas.find_col(ix)) else: ix = (self._setas.find_col(ix), ) if isinstance(ix, (int_types, slice)): ix = (ix, ) elif isinstance(ix, tuple) and ix and isinstance( ix[-1], string_types): # index still has a string type in it ix = list(ix) ix[-1] = self._setas.find_col(ix[-1]) ix = tuple(ix) elif (isinstance(ix, tuple) and ix and isinstance(ix[-1], _np_.ndarray) and self.ndim == 1): # Indexing with a numpy array if len(ix) == 1: ix = ix[0] elif isinstance(ix, tuple) and ix and isiterable( ix[-1]): # indexing with a list of columns ix = list(ix) if all_type(ix[-1], bool): ix[-1] = _np_.arange(len(ix[-1]))[ix[-1]] ix[-1] = [self._setas.find_col(c) for c in ix[-1]] ix = tuple(ix) elif isinstance(ix, tuple) and ix and isinstance( ix[0], string_types): # oops! backwards indexing c = ix[0] ix = list(ix[1:]) ix.append(self._setas.find_col(c)) ix = tuple(ix) # Now can index with our constructed multidimesnional indexer ret = super(DataArray, self).__getitem__(ix) if ret.ndim == 0 or isinstance(ret, _np_.ndarray) and ret.size == 1: return ret.dtype.type(ret) elif not isinstance(ret, _np_.ndarray): # bugout for scalar resturns return ret elif ret.ndim >= 2: # Potentially 2D array here if ix[-1] is None: # Special case for increasing an array dimension if self.ndim == 1: # Going from 1 D to 2D ret.setas = self.setas.clone ret.i = self.i ret.name = getattr(self, "name", "Column") return ret else: # A regular 2D array ret.isrow = single_row ret.setas = self.setas.clone ret.column_headers = copy.copy(self.column_headers) if len(ix) > 0 and isiterable(ix[-1]): # pylint: disable=len-as-condition ret.column_headers = list( _np_.array(ret.column_headers)[ix[-1]]) # Sort out whether we need an array of row labels if isinstance(self.i, _np_.ndarray) and len(ix) > 0: # pylint: disable=len-as-condition if isiterable(ix[0]) or isinstance(ix[0], int_types): ret.i = self.i[ix[0]] else: ret.i = 0 else: ret.i = self.i elif ret.ndim == 1: # Potentially a single row or single column ret.isrow = single_row if len(ix) == len(self.setas): tmp = _np_.array(self.setas)[ix[-1]] ret.setas(tmp) tmpcol = _np_.array(self.column_headers)[ix[-1]] ret.column_headers = tmpcol else: ret.setas = self.setas.clone ret.column_headers = copy.copy(self.column_headers) # Sort out whether we need an array of row labels if single_row and isinstance(self.i, _np_.ndarray): ret.i = self.i[ix[0]] else: # This is a single element? ret.i = self.i if not single_row: ret.name = self.column_headers return ret