def sub_core(other, newdata): """Actually do the subtraction.""" if isinstance(other, (slice, int_types)) or callable(other): newdata.del_rows(other) elif isinstance(other, list) and (all_type(other, int_types) or all_type(other, bool)): newdata.del_rows(other) else: newdata = NotImplemented newdata._data._setas.shape = newdata.shape return newdata
def std(self, column=None, sigma=None, bounds=None): """Find standard deviation value of col_a data column. Args: column (index): Column to look for the maximum in Keyword Arguments: sigma (column index or array): The uncertainity noted for each value in the mean bounds (callable): col_a callable function that takes col_a single argument list of numbers representing one row, and returns True for all rows to search in. Returns: (float): The standard deviation of the data. Note: If column is not defined (or is None) the :py:attr:`DataFile.setas` column assignments are used. .. todo:: Fix the row index when the bounds function is used - see note of :py:meth:`AnalysisMixin.max` """ _ = self._col_args(scalar=True, ycol=column, yerr=sigma) if bounds is not None: self._push_mask() self._set_mask(bounds, True, _.ycol) if isIterable(sigma) and len(sigma) == len(self) and all_type(sigma, float): sigma = np.array(sigma) elif _.yerr: sigma = self.data[:, _.yerr] else: sigma = np.ones(len(self)) ydata = self.data[:, _.ycol] sigma = np.abs(sigma) / np.nanmax(np.abs(sigma)) sigma = np.where(sigma < 1e-8, 1e-8, sigma) weights = 1 / sigma ** 2 weights[np.isnan(weights)] = 0.0 result = np.sqrt(np.cov(ydata, aweights=weights)) if bounds is not None: self._pop_mask() return result
def mean(self, column=None, sigma=None, bounds=None): """Find mean value of col_a data column. Args: column (index): Column to look for the maximum in Keyword Arguments: sigma (column index or array): The uncertainity noted for each value in the mean bounds (callable): col_a callable function that takes col_a single argument list of numbers representing one row, and returns True for all rows to search in. Returns: (float): The mean of the data. Note: If column is not defined (or is None) the :py:attr:`DataFile.setas` column assignments are used. .. todo:: Fix the row index when the bounds function is used - see note of :py:meth:`AnalysisMixin.max` """ _ = self._col_args(scalar=True, ycol=column, yerr=sigma) if bounds is not None: self._push_mask() self._set_mask(bounds, True, _.ycol) if isIterable(sigma) and len(sigma) == len(self) and all_type(sigma, float): sigma = np.array(sigma) _["has_yerr"] = True elif _.has_yerr: sigma = self.data[:, _.yerr] if not _.has_yerr: result = self.data[:, _.ycol].mean() else: ydata = self.data[:, _.ycol] w = 1 / (sigma ** 2 + 1e-8) norm = w.sum(axis=0) error = np.sqrt((sigma ** 2).sum(axis=0)) / len(sigma) result = (ydata * w).mean(axis=0) / norm, error if bounds is not None: self._pop_mask() return result
def swap_column(self, *swp, **kargs): """Swaps pairs of columns in the data. Useful for reordering data for idiot programs that expect columns in a fixed order. Args: swp (tuple of list of tuples of two elements): Each element will be iused as a column index (using the normal rules for matching columns). The two elements represent the two columns that are to be swapped. headers_too (bool): Indicates the column headers are swapped as well Returns: self: A copy of the modified :py:class:`DataFile` objects Note: If swp is a list, then the function is called recursively on each element of the list. Thus in principle the @swp could contain lists of lists of tuples """ headers_too = kargs.pop("headers_too", True) setas_too = kargs.pop("setas_too", True) if len(swp) == 1: swp = swp[0] if isinstance(swp, list) and all_type(swp, tuple) and all_size(swp, 2): for item in swp: self.swap_column(item, headers_too=headers_too) elif isinstance(swp, tuple): col1 = self._setas.find_col(swp[0]) col2 = self._setas.find_col(swp[1]) self[:, [col1, col2]] = self[:, [col2, col1]] if headers_too: self._setas.column_headers[col1], self._setas.column_headers[ col2] = ( self._setas.column_headers[col2], self._setas.column_headers[col1], ) if setas_too: self._setas[col1], self._setas[col2] = self._setas[ col2], self._setas[col1] else: raise TypeError("Swap parameter must be either a tuple or a \ list of tuples")
def _average_list(listob): """Average a list of items picking an appropriate average given the type. If no appropriate average is found None will be returned. if listob contains nested lists or dicts of numbers then try to average individual items within the lists/keys. """ if len(listob) == 0: return None if not all_type(listob, type(listob[0])): return None # all of the list isn't the same type typex = listob[0] if isinstance(typex, numbers.Number): ret = sum(listob) / float(len(listob)) elif isinstance(typex, np.ndarray): try: ret = np.average(tuple(listob)) except Exception: # probably incompatible array sizes ret = None elif isinstance( typex, (tuple, list)): # recursively go through sub lists averaging values nl = zip(*listob) ret = [_average_list(list(i)) for i in nl] if isinstance(typex, tuple): ret = tuple(ret) elif isinstance( typex, dict): # recursively go through dictionary keys averaging values ret = {} for k in typex.keys(): ret[k] = _average_list([listob[i][k] for i in listob]) elif isinstance(typex, string_types): if all(i == typex for i in listob): ret = listob[0] # all the same text return that string else: ret = None else: return None return ret
def _average_list(listob): """Average a list of items picking an appropriate average given the type. If no appropriate average is found None will be returned. if listob contains nested lists or dicts of numbers then try to average individual items within the lists/keys. """ if len(listob) == 0: return None if not all_type(listob, type(listob[0])): return None # all of the list isn't the same type typex = listob[0] if isinstance(typex, numbers.Number): ret = sum(listob) / float(len(listob)) elif isinstance(typex, np.ndarray): try: ret = np.average(tuple(listob)) except Exception: # probably incompatible array sizes ret = None elif isinstance(typex, (tuple, list)): # recursively go through sub lists averaging values nl = zip(*listob) ret = [_average_list(list(i)) for i in nl] if isinstance(typex, tuple): ret = tuple(ret) elif isinstance(typex, dict): # recursively go through dictionary keys averaging values ret = {} for k in typex.keys(): ret[k] = _average_list([listob[i][k] for i in listob]) elif isinstance(typex, string_types): if all(i == typex for i in listob): ret = listob[0] # all the same text return that string else: ret = None else: return None return ret
def slice(self, *args, **kwargs): # pylint: disable=arguments-differ """Return a list of the metadata dictionaries for each item/file in the top level group Keyword Arguments: *args (string or list of strings): if given then only return the item(s) requested from the metadata values_only(bool): if given and *output* not set only return tuples of the dictionary values. Mostly useful when given a single key string output (str or type): Controls the output format from slice_metadata. Possible values are - "dict" or dict - return a list of dictionary subsets of the metadata from each image - "list" or list - return a list of values of each item pf the metadata - "array" or np.array - return a single array - like list above, but returns as a numpy array. This can create a 2D array from multiple keys - "data" or Stoner.Data - returns the metadata in a Stoner.Data object where the column headers are the metadata keys. - "frame" - returns the metadata as a Pandas DataFrame object - "smart" - switch between *dict* and *list* depending whether there is one or more keys. mask_missing (bool): If true, then metadata entries missing in members of the folder are returned as masked values (or None), If False, then an exception is raised if any entries are missing. Returns: ret(list of dict, tuple of values or :py:class:`Stoner.Data`): depending on *values_only* or (output* returns the sliced dictionaries or tuples/ values of the items To do: this should probably be a func in baseFolder and should use have recursive options (build a dictionary of metadata values). And probably options to extract other parts of objects (first row or whatever). """ values_only = kwargs.pop("values_only", False) output = kwargs.pop("output", None) mask_missing = kwargs.pop("mask_missing", False) if kwargs: raise SyntaxError("Unused keyword arguments : {}".format(kwargs)) if output is None: # Sort out a definitive value of output output = "dict" if not values_only else "smart" if isinstance(output, string_types): output = output.lower() if output not in [ "dict", "list", "array", "data", "frame", "smart", dict, list, np.ndarray, DataFile, ]: # Check for good output value raise SyntaxError( "output of slice metadata must be either dict, list, or array not {}" .format(output)) keys = [] for k in args: if isinstance(k, string_types): keys.append(k) elif isiterable(k) and all_type(k, string_types): keys.extend(k) else: raise KeyError( "{} cannot be used as a key name or set of key names". format(type(k))) if not mask_missing: for k in keys: if k not in self.common_keys: raise KeyError( "{} is not a key in all members of the folder".format( k)) results = [] for d in self._folder: results.append({k: d[k] for k in keys if k in d}) for r in results: # Expand the results where a result contains a list for k in keys: if k in r and islike_list(r[k]) and len(r[k]) > 0: v = r[k] del r[k] r.update( {"{}[{}]".format(k, i): vi for i, vi in enumerate(v)}) if output == "smart": if np.all([ len(r) == 1 and list(r.keys())[0] == list(results[0].keys())[0] for r in results ]): output = "list" else: output = "dict" if output in ["list", list]: keys = set() for r in results: keys |= set(r.keys()) keys = list(keys) if len(keys) == 1: ret = [r.get(keys[0], None) for r in results] else: ret = [] for r in results: ret.append(tuple(r.get(k, None) for k in keys)) elif output == "dict": ret = results else: from pandas import DataFrame from Stoner import Data frame = DataFrame(results) mask = frame.isna() if output == "frame": ret = frame else: ret = Data(frame) ret.mask = mask if output in ["array", np.ndarray]: ret = ret.data return ret
def __getitem__(self, ix): """Indexing function for DataArray. Args: ix (various): Index to find. Returns: An indexed part of the DataArray object with extra attributes. Notes: This tries to support all of the indexing operations of a regular numpy array, plus the special operations where one columns are named. Warning: Teh code almost certainly makes some assumptiuons that DataArray is one or 2D and may blow up with 3D arrays ! On the other hand it has a special case exception for where you give a string as the first index element and assumes that you've forgotten that we're row major and tries to do the right thing. """ # Is this goign to be a single row ? single_row = isinstance( ix, int_types) or (isinstance(ix, tuple) and len(ix) > 0 and isinstance(ix[0], int_types)) # If the index is a single string type, then build a column accessing index if isinstance(ix, string_types): if self.ndim > 1: ix = (slice(None, None, None), self._setas.find_col(ix)) else: ix = (self._setas.find_col(ix), ) if isinstance(ix, (int_types, slice)): ix = (ix, ) elif isinstance(ix, tuple) and ix and isinstance( ix[-1], string_types): # index still has a string type in it ix = list(ix) ix[-1] = self._setas.find_col(ix[-1]) ix = tuple(ix) elif (isinstance(ix, tuple) and ix and isinstance(ix[-1], _np_.ndarray) and self.ndim == 1): # Indexing with a numpy array if len(ix) == 1: ix = ix[0] elif isinstance(ix, tuple) and ix and isiterable( ix[-1]): # indexing with a list of columns ix = list(ix) if all_type(ix[-1], bool): ix[-1] = _np_.arange(len(ix[-1]))[ix[-1]] ix[-1] = [self._setas.find_col(c) for c in ix[-1]] ix = tuple(ix) elif isinstance(ix, tuple) and ix and isinstance( ix[0], string_types): # oops! backwards indexing c = ix[0] ix = list(ix[1:]) ix.append(self._setas.find_col(c)) ix = tuple(ix) # Now can index with our constructed multidimesnional indexer ret = super(DataArray, self).__getitem__(ix) if ret.ndim == 0 or isinstance(ret, _np_.ndarray) and ret.size == 1: return ret.dtype.type(ret) elif not isinstance(ret, _np_.ndarray): # bugout for scalar resturns return ret elif ret.ndim >= 2: # Potentially 2D array here if ix[-1] is None: # Special case for increasing an array dimension if self.ndim == 1: # Going from 1 D to 2D ret.setas = self.setas.clone ret.i = self.i ret.name = getattr(self, "name", "Column") return ret else: # A regular 2D array ret.isrow = single_row ret.setas = self.setas.clone ret.column_headers = copy.copy(self.column_headers) if len(ix) > 0 and isiterable(ix[-1]): # pylint: disable=len-as-condition ret.column_headers = list( _np_.array(ret.column_headers)[ix[-1]]) # Sort out whether we need an array of row labels if isinstance(self.i, _np_.ndarray) and len(ix) > 0: # pylint: disable=len-as-condition if isiterable(ix[0]) or isinstance(ix[0], int_types): ret.i = self.i[ix[0]] else: ret.i = 0 else: ret.i = self.i elif ret.ndim == 1: # Potentially a single row or single column ret.isrow = single_row if len(ix) == len(self.setas): tmp = _np_.array(self.setas)[ix[-1]] ret.setas(tmp) tmpcol = _np_.array(self.column_headers)[ix[-1]] ret.column_headers = tmpcol else: ret.setas = self.setas.clone ret.column_headers = copy.copy(self.column_headers) # Sort out whether we need an array of row labels if single_row and isinstance(self.i, _np_.ndarray): ret.i = self.i[ix[0]] else: # This is a single element? ret.i = self.i if not single_row: ret.name = self.column_headers return ret