def _scale(coord, scale=1.0, to_pixel=True): """Convert pixel cordinates to scaled co-ordinates or visa versa. Args: coord(int,float or iterable): Coordinates to be scaled Keyword Arguments: scale(float): Microns per Pixel scale of image to_pixel(bool): Force the conversion to be to pixels Returns: scaled co-ordinates. """ if isinstance(coord, int): if not to_pixel: coord = float(coord) * scale elif isinstance(coord, float): if to_pixel: coord = int(round(coord / scale)) elif isIterable(coord): coord = tuple([_scale(c, scale, to_pixel) for c in coord]) else: raise ValueError( "coord should be an integer or a float or an iterable of integers and floats" ) return coord
def _extractor(group, trail, metadata): results = group.type() results.metadata = group[0].metadata headers = [] ok_data = list() for m in metadata: # Sanity check the metadata to include try: test = results[m] if not isIterable(test) or isinstance(test, string_types): test = array([test]) else: test = array(test) except (IndexError, KeyError, TypeError, ValueError): continue else: ok_data.append(m) headers.extend([m] * len(test)) for d in group: row = array([]) for m in ok_data: row = append(row, array(d[m])) results += row results.column_headers = headers return results
def __getattr__(self, name): """Get a column using the setas attribute.""" # Overrides __getattr__ to allow access as row.x etc. col_check = { "x": "xcol", "d": "xerr", "y": "ycol", "e": "yerr", "z": "zcol", "f": "zerr", "u": "ucol", "v": "vcol", "w": "wcol", } if name in self.setas.cols: return self.setas.__getattr__(name) if name not in col_check: return super(DataArray, self).__getattribute__(name) indexer = [slice(0, dim, 1) for ix, dim in enumerate(self.shape)] col = col_check[name] if col.startswith("x"): if self._setas.cols[col] is not None: indexer[-1] = self._setas.cols[col] ret = self[tuple(indexer)] if ret.ndim > 0: ret.column_headers = self.column_headers[ self._setas.cols[col]] else: ret = None else: if isIterable( self._setas.cols[col]) and len(self._setas.cols[col]) > 0: indexer[-1] = self._setas.cols[col][0] elif isIterable(self._setas.cols[col]): indexer[-1] = self._setas.cols[col] else: return None ret = self[tuple(indexer)] if ret.ndim > 0: ret.column_headers = self.column_headers[indexer[-1]] if ret is None: raise StonerSetasError( "Tried accessing a {} column, but setas is not defined.". format(name)) return ret
def pattern(self, value): """Sets the filename searching pattern[s] for the :py:class:`Stoner.Core.metadataObject`s.""" if isinstance(value, string_types): self._pattern = (value, ) elif isinstance(value, _pattern_type): self._pattern = (value, ) elif isIterable(value): self._pattern = [x for x in value] else: raise ValueError( "pattern should be a string, regular expression or iterable object not a {}" .format(type(value)))
def std(self, column=None, sigma=None, bounds=None): """Find standard deviation value of col_a data column. Args: column (index): Column to look for the maximum in Keyword Arguments: sigma (column index or array): The uncertainity noted for each value in the mean bounds (callable): col_a callable function that takes col_a single argument list of numbers representing one row, and returns True for all rows to search in. Returns: (float): The standard deviation of the data. Note: If column is not defined (or is None) the :py:attr:`DataFile.setas` column assignments are used. .. todo:: Fix the row index when the bounds function is used - see note of :py:meth:`AnalysisMixin.max` """ _ = self._col_args(scalar=True, ycol=column, yerr=sigma) if bounds is not None: self._push_mask() self._set_mask(bounds, True, _.ycol) if isIterable(sigma) and len(sigma) == len(self) and all_type(sigma, float): sigma = np.array(sigma) elif _.yerr: sigma = self.data[:, _.yerr] else: sigma = np.ones(len(self)) ydata = self.data[:, _.ycol] sigma = np.abs(sigma) / np.nanmax(np.abs(sigma)) sigma = np.where(sigma < 1e-8, 1e-8, sigma) weights = 1 / sigma ** 2 weights[np.isnan(weights)] = 0.0 result = np.sqrt(np.cov(ydata, aweights=weights)) if bounds is not None: self._pop_mask() return result
def i(self, value): if self.ndim == 0: pass elif self.ndim == 1 and self.isrow: if isIterable(value) and value: self._ibase = np.array([min(value)]) else: self._ibase = np.array([value]) elif self.ndim >= 1: r = self.shape[0] if isIterable(value) and len( value ) == r: # Iterable and the correct length - assing straight self._ibase = np.array(value) elif isIterable(value) and len( value ) > 0: # Iterable but not the correct length - count from min of value self._ibase = np.arange(min(value), min(value) + r) elif ( isIterable(value) and len(value) == 0 ): # Iterable but not the correct length - count from min of value self._ibase = np.arange(0, r, r) else: # No iterable self._ibase = np.arange(value, value + r)
def mean(self, column=None, sigma=None, bounds=None): """Find mean value of col_a data column. Args: column (index): Column to look for the maximum in Keyword Arguments: sigma (column index or array): The uncertainity noted for each value in the mean bounds (callable): col_a callable function that takes col_a single argument list of numbers representing one row, and returns True for all rows to search in. Returns: (float): The mean of the data. Note: If column is not defined (or is None) the :py:attr:`DataFile.setas` column assignments are used. .. todo:: Fix the row index when the bounds function is used - see note of :py:meth:`AnalysisMixin.max` """ _ = self._col_args(scalar=True, ycol=column, yerr=sigma) if bounds is not None: self._push_mask() self._set_mask(bounds, True, _.ycol) if isIterable(sigma) and len(sigma) == len(self) and all_type(sigma, float): sigma = np.array(sigma) _["has_yerr"] = True elif _.has_yerr: sigma = self.data[:, _.yerr] if not _.has_yerr: result = self.data[:, _.ycol].mean() else: ydata = self.data[:, _.ycol] w = 1 / (sigma ** 2 + 1e-8) norm = w.sum(axis=0) error = np.sqrt((sigma ** 2).sum(axis=0)) / len(sigma) result = (ydata * w).mean(axis=0) / norm, error if bounds is not None: self._pop_mask() return result
def __setattr__(self, name, value): """Setting the attrbute on .each sets it on all instantiated objects and in _object_attrs. Args: name(str): Attribute to set value (any): Value to set Notes: If *name* is not present on the empty member instance, then the first member of the folder is checked as well. This allows the attributes of a :py:class:`Stoner.Data` object that derive from the Lpy:attr:`Stoner.Data.setas` attribute (such as *.x*, *.y* or *.e* etc) can be accessed. If *value* is iterable and the same length as the folder, then each element in the folder is loaded and the corresponding element of *value* is assigned to the attribute of the member. """ if hasattr(self.__class__, name) or name.startswith( "_"): # Handle setting our own attributes super(item, self).__setattr__(name, value) elif name in dir( self._folder.instance) or (len(self._folder) and hasattr( self._folder[0], name)): # This is an instance attribute if isIterable(value) and len(value) == len(self._folder): force_load = True else: force_load = False self._folder._object_attrs[ name] = value # Add to attributes to be set on load value = [value] * len(self._folder) for d, v in zip(self._folder.__names__(), value): # And set on all instantiated objects if force_load or isinstance( self._folder.__getter__(d, instantiate=False), self._folder.type): d = self._folder.__getter__(d) setattr(d, name, v) else: raise AttributeError("Unknown attribute {}".format(name))
def _slice_keys(args, possible=None): """Work through the arguments to slice() and construct a list of keys.""" keys = [] for k in args: if isinstance(k, string_types): if k not in possible: sub_k = fnmatch.filter(possible, k) if len(sub_k) > 0: keys.extend(_slice_keys(sub_k, possible)) else: raise KeyError(f"No matching keys for {sub_k} in metadata") else: keys.append(k) elif isinstance(k, type) and issubclass(k, Model): model = k.__name__ for name in k().param_names: for sub_k in [f"{model}:{name}", f"{model}:{name} err"]: if sub_k not in possible: raise KeyError( f"No matching keys for {sub_k} in metadata") keys.append(sub_k) elif isinstance(k, Model): model = k.__class__.__name__ for name in k.param_names: for sub_k in [f"{model}:{name}", f"{model}:{name} err"]: if sub_k not in possible: raise KeyError( f"No matching keys for {sub_k} in metadata") keys.append(sub_k) elif isIterable(k): keys.extend(_slice_keys(k, possible)) else: raise KeyError( "{} cannot be used as a key name or set of key names".format( type(k))) return keys
def peaks(self, **kargs): """Locates peaks and/or troughs in a column of data by using SG-differentiation. Args: ycol (index): the column name or index of the data in which to search for peaks width (int or float): the expected minium halalf-width of a peak in terms of the number of data points (int) or distance in x (float). This is used in the differnetiation code to find local maxima. Bigger equals less sensitive to experimental noise, smaller means better eable to see sharp peaks poly (int): the order of polynomial to use when differentiating the data to locate a peak. Must >=2, higher numbers will find sharper peaks more accurately but at the risk of finding more false positives. Keyword Arguments: significance (float): used to decide whether a local maxmima is a significant peak. Essentially just the curvature of the data. Bigger means less sensistive, smaller means more likely to detect noise. Default is the maximum curvature/(2*width) xcol (index or None): name or index of data column that p[rovides the x-coordinate (default None) peaks (bool): select whether to measure peaks in data (default True) troughs (bool): select whether to measure troughs in data (default False) sort (bool): Sor the results by significance of peak modify (book): If true, then the returned object is a copy of self with only the peaks/troughs left in the data. full_data (bool): If True (default) then all columns of the data at which peaks in the *ycol* column are found. *modify* true implies *full_data* is also true. If *full_data* is False, then only the x-column values of the peaks are returned. Returns: (various): If *modify* is true, then returns a the AnalysisMixin with the data set to just the peaks/troughs. If *modify* is false (default), then the return value depends on *ycol* and *xcol*. If *ycol* is not None and *xcol* is None, then returns conplete rows of data corresponding to the found peaks/troughs. If *xcol* is not None, or *ycol* is None and *xcol* is None, then returns a 1D array of the x positions of the peaks/troughs. See Also: User guide section :ref:`peak_finding` """ width = kargs.pop("width", int(len(self) / 20)) peaks = kargs.pop("peaks", True) troughs = kargs.pop("troughs", False) poly = kargs.pop("poly", 2) assertion( poly >= 2, "poly must be at least 2nd order in peaks for checking for significance of peak or trough" ) sort = kargs.pop("sort", False) modify = kargs.pop("modify", False) full_data = kargs.pop("full_data", True) _ = self._col_args(scalar=False, xcol=kargs.pop("xcol", None), ycol=kargs.pop("ycol", None)) xcol, ycol = _.xcol, _.ycol if isIterable(ycol): ycol = ycol[0] if isinstance( width, float): # Convert a floating point width unto an integer. xmin, xmax = self.span(xcol) width = int(len(self) * width / (xmax - xmin)) width = max(width, poly + 1) setas = self.setas.clone # pylint: disable=E0203 self.setas = "" d1 = self.SG_Filter(ycol, xcol=xcol, points=width, poly=poly, order=1).ravel() d2 = self.SG_Filter( ycol, xcol=xcol, points=2 * width, poly=poly, order=2).ravel() # 2nd differential requires more smoothing # We're going to ignore the start and end of the arrays index_offset = int(width / 2) d1 = d1[index_offset:-index_offset] d2 = d2[index_offset:-index_offset] # Pad the ends of d2 with the mean value pad = np.mean(d2[index_offset:-index_offset]) d2[:index_offset] = pad d2[-index_offset:] = pad # Set the significance from the 2nd ifferential if not already set significance = kargs.pop( "significance", np.max(np.abs(d2)) / (2 * width)) # Base an apriori significance on max d2y/dx2 / 20 if isinstance(significance, int): # integer significance is inverse to floating significance = np.max( np.abs(d2) ) / significance # Base an apriori significance on max d2y/dx2 / 20 d2_interp = interp1d(np.arange(len(d2)), d2, kind="cubic") # Ensure we have some X-data if xcol is None: xdata = np.arange(len(self)) else: xdata = self.column(xcol) xdata = interp1d(np.arange(len(self)), xdata, kind="cubic") possible_peaks = np.array( threshold(0, d1, rising=troughs, falling=peaks)) curvature = np.abs(d2_interp(possible_peaks)) # Filter just the significant peaks possible_peaks = np.array([ p for ix, p in enumerate(possible_peaks) if abs(curvature[ix]) > significance ]) # Sort in order of significance if sort: possible_peaks = np.take( possible_peaks, np.argsort(np.abs(d2_interp(possible_peaks)))) xdat = xdata(possible_peaks + index_offset) if modify: self.data = self.interpolate(xdat, xcol=xcol, kind="cubic") ret = self elif full_data: ret = self.interpolate(xdat, kind="cubic", xcol=False) else: ret = xdat self.setas = setas # Return - but remembering to add back on the offset that we took off due to differentials not working at # start and end return ret
def threshold(self, threshold, **kargs): """Finds partial indices where the data in column passes the threshold, rising or falling. Args: threshold (float): Value to look for in column col Keyword Arguments: col (index): Column index to look for data in rising (bool): look for case where the data is increasing in value (defaukt True) falling (bool): look for case where data is fallinh in value (default False) xcol (index, bool or None): rather than returning a fractional row index, return the interpolated value in column xcol. If xcol is False, then return a complete row all_vals (bool): return all crossing points of the threshold or just the first. (default False) transpose (bbool): Swap the x and y columns around - this is most useful when the column assignments have been done via the setas attribute all_vals (bool): Return all values that match the criteria, or just the first in the file. Returns: (float): Either a sing;le fractional row index, or an in terpolated x value Note: If you don't sepcify a col value or set it to None, then the assigned columns via the :py:attr:`DataFile.setas` attribute will be used. Warning: There has been an API change. Versions prior to 0.1.9 placed the column before the threshold in the positional argument list. In order to support the use of assigned columns, this has been swapped to the present order. """ DataArray = self.data.__class__ col = kargs.pop("col", None) if col is None: col = self.setas._get_cols("ycol") xcol = kargs.pop("xcol", self.setas._get_cols("xcol")) else: xcol = kargs.pop("xcol", None) rising = kargs.pop("rising", True) falling = kargs.pop("falling", False) all_vals = kargs.pop("all_vals", False) current = self.column(col) # Recursively call if we've got an iterable threshold if isIterable(threshold): if isinstance(xcol, bool) and not xcol: ret = np.zeros((len(threshold), self.shape[1])) else: ret = np.zeros_like(threshold).view(type=DataArray) for ix, th in enumerate(threshold): ret[ix] = self.threshold(th, col=col, xcol=xcol, rising=rising, falling=falling, all_vals=all_vals) # Now we have to clean up the retujrn list into a DataArray if isinstance( xcol, bool ) and not xcol: # if xcol was False we got a complete row back ch = self.column_headers ret.setas = self.setas.clone ret.column_headers = ch ret.i = ret[0].i else: # Either xcol was None so we got indices or we got a specified column back if xcol is not None: # Specific column ret = np.atleast_2d(ret) ret.column_headers = [ self.column_headers[self.find_col(xcol)] ] ret.i = [r.i for r in ret] ret.setas = "x" ret.isrow = False else: ret.column_headers = ["Index"] ret.isrow = False return ret ret = _threshold(threshold, current, rising=rising, falling=falling) if not all_vals: ret = [ret[0]] if np.any(ret) else [] if isinstance(xcol, bool) and not xcol: retval = self.interpolate(ret, xcol=False) retval.setas = self.setas.clone retval.setas.shape = retval.shape retval.i = ret ret = retval elif xcol is not None: retval = self.interpolate(ret, xcol=False)[:, self.find_col(xcol)] # if retval.ndim>0: #not sure what this bit does but it's throwing errors for a simple threshold # retval.setas=self.setas.clone # retval.setas.shape=retval.shape # retval.i=ret ret = retval else: ret = DataArray(ret) if not all_vals: if ret.size == 1: pass elif ret.size > 1: ret = ret[0] else: ret = [] if isinstance(ret, DataArray): ret.isrow = True return ret
def stitch(self, other, xcol=None, ycol=None, overlap=None, min_overlap=0.0, mode="All", func=None, p0=None): r"""Apply a scaling to this data set to make it stich to another dataset. Args: other (DataFile): Another data set that is used as the base to stitch this one on to xcol,ycol (index or None): The x and y data columns. If left as None then the current setas attribute is used. Keyword Arguments: overlap (tuple of (lower,higher) or None): The band of x values that are used in both data sets to match, if left as None, thenthe common overlap of the x data is used. min_overlap (float): If you know that overlap must be bigger than a certain amount, the bounds between the two data sets needs to be adjusted. In this case min_overlap shifts the boundary of the overlap on this DataFile. mode (str): Unless *func* is specified, controls which parameters are actually variable, defaults to all of them. func (callable): a stitching function that transforms :math:`(x,y)\rightarrow(x',y')`. Default is to use functions defined by *mode* p0 (iterable): if func is not None then p0 should be the starting values for the stitching function parameters Returns: (:py:class:`Stoner.Data`): A copy of the current :py:class:`AnalysisMixin` with the x and y data columns adjusted to stitch To stitch the data together, the x and y data in the current data file is transforms so that :math:`x'=x+A` and :math:`y'=By+C` where :math:`A,B,C` are constants and :math:`(x',y')` are close matches to the :math:`(x,y)` data in *other*. The algorithm assumes that the overlap region contains equal numbers of :math:`(x,y)` points *mode* controls whether A,B, and C are fixed or adjustable - "All" - all three parameters adjustable - "Scale y, shift x" - C is fixed at 0.0 - "Scale and shift y" A is fixed at 0.0 - "Scale y" - only B is adjustable - "Shift y" - Only c is adjsutable - "Shift x" - Only A is adjustable - "Shift both" - B is fixed at 1.0 See Also: User Guide section :ref:`stitch_guide` """ _ = self._col_args(xcol=xcol, ycol=ycol, scalar=True) points = self.column([_.xcol, _.ycol]) points = points[points[:, 0].argsort(), :] points[:, 0] += min_overlap otherpoints = other.column([_.xcol, _.ycol]) otherpoints = otherpoints[otherpoints[:, 0].argsort(), :] self_second = np.max(points[:, 0]) > np.max(otherpoints[:, 0]) if overlap is None: # Calculate the overlap lower = max(np.min(points[:, 0]), np.min(otherpoints[:, 0])) upper = min(np.max(points[:, 0]), np.max(otherpoints[:, 0])) elif isinstance(overlap, int) and overlap > 0: if self_second: lower = points[0, 0] upper = points[0, overlap] else: lower = points[0, -overlap - 1] upper = points[0, -1] elif (isinstance(overlap, tuple) and len(overlap) == 2 and isinstance( overlap[0], float and isinstance(overlap[1], float))): lower = min(overlap) upper = max(overlap) inrange = np.logical_and(points[:, 0] >= lower, points[:, 0] <= upper) points = points[inrange] num_pts = points.shape[0] if self_second: otherpoints = otherpoints[-num_pts - 1:-1] else: otherpoints = otherpoints[0:num_pts] x = points[:, 0] y = points[:, 1] xp = otherpoints[:, 0] yp = otherpoints[:, 1] if func is None: opts = { "all": (lambda x, y, A, B, C: (x + A, y * B + C)), "scale y and shift x": (lambda x, y, A, B: (x + A, B * y)), "scale and shift y": (lambda x, y, B, C: (x, y * B + C)), "scale y": (lambda x, y, B: (x, y * B)), "shift y": (lambda x, y, C: (x, y + C)), "shift both": (lambda x, y, A, C: (x + A, y + C)), } defaults = { "all": [1, 2, 3], "scale y,shift x": [1, 2], "scale and shift y": [2, 3], "scale y": [2], "shift y": [3], "shift both": [1, 3], } A0 = np.mean(xp) - np.mean(x) C0 = np.mean(yp) - np.mean(y) B0 = (np.max(yp) - np.min(yp)) / (np.max(y) - np.min(y)) p = np.array([0, A0, B0, C0]) assertion( isinstance(mode, string_types), "mode keyword should be a string if func is not defined") mode = mode.lower() assertion(mode in opts, "mode keyword should be one of {}".format(opts.keys)) func = opts[mode] p0 = p[defaults[mode]] else: assertion(callable(func), "Keyword func should be callable if given") args = getfullargspec(func)[0] # pylint: disable=W1505 assertion( isIterable(p0), "Keyword parameter p0 shoiuld be iterable if keyword func is given" ) assertion( len(p0) == len(args) - 2, "Keyword p0 should be the same length as the optional arguments to func" ) # This is a bit of a hack, we turn (x,y) points into a 1D array of x and then y data set1 = np.append(x, y) set2 = np.append(xp, yp) assertion( len(set1) == len(set2), "The number of points in the overlap are different in the two data sets" ) def transform(set1, *p): """Wrapper function to fit for transform.""" m = int(len(set1) / 2) x = set1[:m] y = set1[m:] tmp = func(x, y, *p) out = np.append(tmp[0], tmp[1]) return out popt, pcov = curve_fit(transform, set1, set2, p0=p0) # Curve fit for optimal A,B,C perr = np.sqrt(np.diagonal(pcov)) self.data[:, _.xcol], self.data[:, _.ycol] = func(self.data[:, _.xcol], self.data[:, _.ycol], *popt) self["Stitching Coefficients"] = list(popt) self["Stitching Coeffient Errors"] = list(perr) self["Stitching overlap"] = (lower, upper) self["Stitching Window"] = num_pts return self
def apply(self, func, col=None, replace=True, header=None, **kargs): """Applies the given function to each row in the data set and adds to the data set. Args: func (callable): The function to apply to each row of the data. col (index): The column in which to place the result of the function Keyword Arguments: replace (bool): Either replace the existing column/complete data or create a new column or data file. header (string or None): The new column header(s) (defaults to the name of the function func Note: If any extra keyword arguments are supplied then these are passed to the function directly. If you need to pass any arguments that overlap with the keyword arguments to :py:math:`AnalysisMixin.apply` then these can be supplied in a dictionary argument *_extra*. The callable *func* should have a signature:: def func(row,**kargs): and should return either a single float, in which case it will be used to repalce the specified column, or an array, in which case it is used to completely replace the row of data. If the function returns a complete row of data, then the *replace* parameter will cause the return value to be a new datafile, leaving the original unchanged. The *headers* parameter can give the complete column headers for the new data file. Returns: (:py:class:`Stoner.Data`): The newly modified Data object. """ if col is None: col = self.setas.get("y", [0])[0] col = self.find_col(col) kargs.update(kargs.pop("_extra", dict())) # Check the dimension of the output ret = func(next(self.rows()), **kargs) try: next(self.rows(reset=True)) except (RuntimeError, StopIteration): pass if isIterable(ret): nc = np.zeros((len(self), len(ret))) else: nc = np.zeros(len(self)) # Evaluate the data row by row for ix, r in enumerate(self.rows()): ret = func(r, **kargs) if isIterable(ret) and not isinstance(ret, np.ndarray): ret = np.ma.MaskedArray(ret) nc[ix] = ret # Work out how to handle the result if nc.ndim == 1: if header is None: header = func.__name__ self = self.add_column(nc, header=header, index=col, replace=replace, setas=self.setas[col]) ret = self else: if not replace: ret = self.clone else: ret = self ret.data = nc if header is not None: ret.column_headers = header return ret
def smooth(self, window="boxcar", xcol=None, ycol=None, size=None, **kargs): """Smooth data by convoluting with a window. Args: window (string or tuple): Defines the window type to use by passing to :py:func:`scipy.signal.get_window`. Keyword Arguments: xcol(column index or None): Data to use as x data if needed to define a window. If None, use :py:attr:`Stoner.Core.DataFile.setas` ycvol (column index or None): Data to be smoothed size (int or float): If int, then the number of points to use in the smoothing window. If float, then the size in x-data to be used. result (bool or column index): Whether to add the smoothed data to the dataset and if so where. replace (bool): Replace the exiting data or insert as a new column. header (string): New column header for the new data. Returns: (self or array): If result is False, then the return value will be a copy of the smoothed data, otherwise the return value is a copy of the AnalysisMixin object with the smoothed data added, Notes: If size is float, then it is necessary to map the X-data to a number of rows and to ensure that the data is evenly spaced in x. To do this, the number of rows in the window is found by dividing the span in x by the size and multiplying by the total lenfth. Then the data is interpolated to a new set of evenly space X over the same range, smoothed and then interpoalted back to the original x values. """ _ = self._col_args(xcol=xcol, ycol=ycol) replace = kargs.pop("replace", True) result = kargs.pop("result", True) # overwirte existing y column data header = kargs.pop("header", self.column_headers[_.ycol]) # Sort out window size if isinstance(size, float): interp_data = True xl, xh = self.span(_.xcol) size = int(np.ceil((size / (xh - xl)) * len(self))) nx = np.linspace(xl, xh, len(self)) data = self.interpolate(nx, kind="linear", xcol=_.xcol, replace=False) self["Smoothing window size"] = size elif isinstance(size, int_types): data = copy(self.data) interp_data = False else: raise ValueError( "size should either be a float or integer, not a {}".format( type(size))) window = get_window(window, size) # Handle multiple or single y columns if not isIterable(_.ycol): _.ycol = [_.ycol] # Do the convolution itself for yc in _.ycol: data[:, yc] = convolve(data[:, yc], window, mode="same") / size # Reinterpolate the smoothed data back if necessary if interp_data: nx = self.data[:, _.xcol] tmp = self.clone tmp.data = data data = tmp.interpolate(nx, kind="linear", xcol=_.xcol, replace=False) # Fix return value if isinstance(result, bool) and not result: return data[:, _.ycol] for yc in _.ycol: self.add_column(data[:, yc], header=header, index=result, replace=replace) return self
def extract(self, *metadata, **kargs): """Extracts metadata from each of the files in the terminal group. Walks through the terminal group and gets the listed metadata from each file and constructsa replacement metadataObject. Args: *metadata (str): One or more metadata indices that should be used to construct the new data file. Ketyword Arguments: copy (bool): Take a copy of the :py:class:`DataFolder` before starting the extract (default is True) Returns: An instance of a metadataObject like object. """ copy = kargs.pop("copy", True) args = [] for m in metadata: if isinstance(m, string_types): args.append(m) elif isIterable(m): args.extend(m) else: raise TypeError( "Metadata values should be strings, or lists of strings, not {}" .format(type(m))) metadata = args def _extractor(group, trail, metadata): results = group.type() results.metadata = group[0].metadata headers = [] ok_data = list() for m in metadata: # Sanity check the metadata to include try: test = results[m] if not isIterable(test) or isinstance(test, string_types): test = array([test]) else: test = array(test) except (IndexError, KeyError, TypeError, ValueError): continue else: ok_data.append(m) headers.extend([m] * len(test)) for d in group: row = array([]) for m in ok_data: row = append(row, array(d[m])) results += row results.column_headers = headers return results if copy: ret = self.clone else: ret = self return ret.walk_groups(_extractor, group=True, replace_terminal=True, walker_args={"metadata": metadata})
def find_peaks(self, **kargs): """Interface to :py:func:`scipy.signal.find_peaks` for loacating peaks in data. Args: ycol (index): the column name or index of the data in which to search for peaks Keyword Arguments: xcol (index): the column name or index of the x data that the peaks correspond to. height : number or ndarray or sequence, optional Required height of peaks. Either a number, ``None``, an array matching `ycol` or a 2-element sequence of the former. The first element is always interpreted as the minimal and the second, if supplied, as the maximal required height. threshold : number or ndarray or sequence, optional Required threshold of peaks, the vertical distance to its neighbouring samples. Either a number, ``None``, an array matching `ycol` or a 2-element sequence of the former. The first element is always interpreted as the minimal and the second, if supplied, as the maximal required threshold. distance : number, optional Required minimal horizontal distance (>= 1) in samples between neighbouring peaks. Smaller peaks are removed first until the condition is fulfilled for all remaining peaks. If this is a *float* and *xcol* is set, then the units are in terms of the x-data, otherwise in rwo indices. prominence : number or ndarray or sequence, optional Required prominence of peaks. Either a number, ``None``, an array matching `ycol` or a 2-element sequence of the former. The first element is always interpreted as the minimal and the second, if supplied, as the maximal required prominence. width : number or ndarray or sequence, optional Required width of peaks in samples. Either a number, ``None``, an array matching `ycol` or a 2-element sequence of the former. The first element is always interpreted as the minimal and the second, if supplied, as the maximal required width. If this is a *float* and *xcol* is set, then the units are in terms of the x-data, otherwise in rwo indices. wlen : int, optional Used for calculation of the peaks prominences, thus it is only used if one of the arguments `prominence` or `width` is given. See argument `wlen` in `peak_prominences` for a full description of its effects. rel_height : float, optional Used for calculation of the peaks width, thus it is only used if `width` is given. See argument `rel_height` in `peak_widths` for a full description of its effects. plateau_size : number or ndarray or sequence, optional Required size of the flat top of peaks in samples. Either a number, ``None``, an array matching `ycol` or a 2-element sequence of the former. The first element is always interpreted as the minimal and the second, if supplied as the maximal required plateau size. If this is a *float* and *xcol* is set, then the units are in terms of the x-data, otherwise in rwo indices. prefix (str): If et, then the metadata keys that return information about the peaks is returned with the given prefix. Default is None - no prefix. sort (bool): Sor the results by prominence of peak modify (book): If true, then the returned object is a copy of self with only the peaks left in the data. full_data (bool): If True (default) then all columns of the data at which peaks in the *ycol* column are found. *modify* true implies *full_data* is also true. If *full_data* is False, then only the x-column values of the peaks are returned. Returns: (various): If *modify* is true, then returns a the AnalysisMixin with the data set to just the peaks/troughs. If *modify* is false (default), then the return value depends on *ycol* and *xcol*. If *ycol* is not None and *xcol* is None, then returns conplete rows of data corresponding to the found peaks/troughs. If *xcol* is not None, or *ycol* is None and *xcol* is None, then returns a 1D array of the x positions of the peaks/troughs. See Also: User guide section :ref:`peak_finding` """ distance = kargs.pop("distance", None) width = kargs.pop("width", None) plateau_size = kargs.pop("plateau_size", None) sort = kargs.pop("sort", False) modify = kargs.pop("modify", False) bounds = kargs.pop("bounds", lambda x, y: True) prefix = kargs.pop("prefix", None) full_data = kargs.pop("full_data", True) _ = self._col_args(scalar=False, xcol=kargs.pop("xcol", None), ycol=kargs.pop("ycol", None)) xcol, ycol = _.xcol, _.ycol if isIterable(ycol): ycol = ycol[0] if isinstance( width, float): # Convert a floating point width unto an integer. xmin, xmax = self.span(xcol) width = int(len(self) * width / (xmax - xmin)) elif isTuple(width, float, float): xmin, xmax = self.span(xcol) width = int(len(self) * width[0] / (xmax - xmin)), int( len(self) * width[1] / (xmax - xmin)) if width is not None: kargs["width"] = width if isinstance( distance, float): # Convert a floating point width unto an integer. xmin, xmax = self.span(xcol) distance = int(np.ceil(len(self) * distance / (xmax - xmin))) if distance is not None: kargs["distance"] = distance if isinstance( plateau_size, float ): # Convert a floating point plateau_size unto an integer. xmin, xmax = self.span(xcol) plateau_size = int(len(self) * plateau_size / (xmax - xmin)) elif isTuple(plateau_size, float, float): xmin, xmax = self.span(xcol) plateau_size = ( int(len(self) * plateau_size[0] / (xmax - xmin)), int(len(self) * plateau_size[1] / (xmax - xmin)), ) if plateau_size is not None: kargs["plateau_size"] = plateau_size seek = self.search(xcol, bounds) peaks, data = find_peaks(seek[:, ycol], **kargs) peaks = self.data.i[seek.i[ peaks]] # de-reference frombounded data back to main dataset for sort_key in ["prominences", "peak_heights", "widths"]: if sort_key in data: break else: sort_key = None if sort and sort_key: idx = np.sort(np.array( list(zip(data[sort_key], np.arange(peaks.size)))), axis=0)[:, 1].astype(int) peaks = peaks[idx] for k in data: data[k] = data[k][idx] xmin, xmax = self.span(_.xcol) xconv = len(self) / (xmax - xmin) for k, v in data.items(): if k.startswith("left") or k.startswith("right") or k == "widths": data[k] = v / xconv + (xmin if k != "widths" else 0) peak_data = self.data[peaks, :] for k, v in data.items(): if prefix is None: self[k] = v else: self[f"{prefix}:{k}"] = v if modify: self.data = peak_data return self if full_data: return peak_data, data return peak_data[_.xcol], peak_data[_.yxol], data
def _col_args( self, scalar=True, xcol=None, ycol=None, zcol=None, ucol=None, vcol=None, wcol=None, xerr=None, yerr=None, zerr=None, **kargs, ): """Utility method that creates an object which has keys based either on arguments or setas attribute.""" cols = { "xcol": xcol, "ycol": ycol, "zcol": zcol, "ucol": ucol, "vcol": vcol, "wcol": wcol, "xerr": xerr, "yerr": yerr, "zerr": zerr, } no_guess = True for i in cols.values(): if not i is None: # User specification wins out break else: # User didn't set any values, setas will win no_guess = False ret = AttributeStore(self.setas._get_cols(no_guess=no_guess)) for c in list(cols.keys()): if isNone(cols[c]): # Not defined, fallback on setas del cols[c] continue elif isinstance( cols[c], bool) and not cols[c]: # False, delete column altogether del cols[c] if c in ret: del ret[c] continue elif c in ret and isinstance(ret[c], list): if isinstance(cols[c], float) or (isinstance(cols[c], np.ndarray) and cols[c].size == len(self)): continue elif isinstance(cols[c], float): continue cols[c] = self.setas.find_col(cols[c]) ret.update(cols) if scalar: for c in ret: if isinstance(ret[c], list): if ret[c]: ret[c] = ret[c][0] else: ret[c] = None elif isinstance(scalar, bool) and not scalar: for c in ret: if c.startswith("x") or c.startswith("has_"): continue if not isIterable(ret[c]) and ret[c] is not None: ret[c] = list([ret[c]]) elif ret[c] is None: ret[c] = [] for n in [ "xcol", "xerr", "ycol", "yerr", "zcol", "zerr", "ucol", "vcol", "wcol", "axes" ]: ret[f"has_{n}"] = n in ret and not (ret[n] is None or (isinstance(ret[n], list) and not ret[n])) return ret
def __getitem__(self, ix): """Indexing function for DataArray. Args: ix (various): Index to find. Returns: An indexed part of the DataArray object with extra attributes. Notes: This tries to support all of the indexing operations of a regular numpy array, plus the special operations where one columns are named. Warning: Teh code almost certainly makes some assumptiuons that DataArray is one or 2D and may blow up with 3D arrays ! On the other hand it has a special case exception for where you give a string as the first index element and assumes that you've forgotten that we're row major and tries to do the right thing. """ # Is this goign to be a single row ? single_row = isinstance( ix, int_types) or (isinstance(ix, tuple) and len(ix) > 0 and isinstance(ix[0], int_types)) # If the index is a single string type, then build a column accessing index if isinstance(ix, string_types): if self.ndim > 1: ix = (slice(None, None, None), self._setas.find_col(ix)) else: ix = (self._setas.find_col(ix), ) if isinstance(ix, (int_types, slice)): ix = (ix, ) elif isinstance(ix, tuple) and ix and isinstance( ix[-1], string_types): # index still has a string type in it ix = list(ix) ix[-1] = self._setas.find_col(ix[-1]) ix = tuple(ix) elif (isinstance(ix, tuple) and ix and isinstance(ix[-1], np.ndarray) and self.ndim == 1): # Indexing with a numpy array if len(ix) == 1: ix = ix[0] elif isinstance(ix, tuple) and ix and isIterable( ix[-1]): # indexing with a list of columns ix = list(ix) if all_type(ix[-1], bool): ix[-1] = np.arange(len(ix[-1]))[ix[-1]] ix[-1] = [self._setas.find_col(c) for c in ix[-1]] ix = tuple(ix) elif isinstance(ix, tuple) and ix and isinstance( ix[0], string_types): # oops! backwards indexing c = ix[0] ix = list(ix[1:]) ix.append(self._setas.find_col(c)) ix = tuple(ix) # Now can index with our constructed multidimesnional indexer ret = super(DataArray, self).__getitem__(ix) if ret.ndim == 0 or isinstance(ret, np.ndarray) and ret.size == 1: if isinstance(ret, ma.core.MaskedConstant): if ret.mask: return self.fill_value if isinstance(ret, ma.MaskedArray): ret = ma.filled(ret) return ret.dtype.type(ret) if not isinstance(ret, np.ndarray): # bugout for scalar resturns return ret if ret.ndim >= 2: # Potentially 2D array here if ix[-1] is None: # Special case for increasing an array dimension if self.ndim == 1: # Going from 1 D to 2D ret.setas = self.setas.clone ret.i = self.i ret.name = getattr(self, "name", "Column") return ret ret.isrow = single_row ret.setas = self.setas.clone ret.column_headers = copy.copy(self.column_headers) if len(ix) > 0 and isIterable(ix[-1]): # pylint: disable=len-as-condition ret.column_headers = list(np.array(ret.column_headers)[ix[-1]]) # Sort out whether we need an array of row labels if isinstance(self.i, np.ndarray) and len(ix) > 0: # pylint: disable=len-as-condition if isIterable(ix[0]) or isinstance(ix[0], int_types): ret.i = self.i[ix[0]] else: ret.i = 0 else: ret.i = self.i elif ret.ndim == 1: # Potentially a single row or single column ret.isrow = single_row if len(ix) == len(self.setas): tmp = np.array(self.setas)[ix[-1]] ret.setas(tmp) tmpcol = np.array(self.column_headers)[ix[-1]] ret.column_headers = tmpcol else: ret.setas = self.setas.clone ret.column_headers = copy.copy(self.column_headers) # Sort out whether we need an array of row labels if single_row and isinstance(self.i, np.ndarray): ret.i = self.i[ix[0]] else: # This is a single element? ret.i = self.i if not single_row: ret.name = self.column_headers return ret
def align(im, ref, method="scharr", **kargs): """Use one of a variety of algroithms to align two images. Args: im (ndarray) image to align ref (ndarray) reference array Keyword Args: method (str or None): If given specifies which module to try and use. Options: 'scharr', 'chi2_shift', 'imreg_dft', 'cv2' box (integer, float, tuple of images or floats): Used with ImageArray.crop to select a subset of the image to use for the aligning process. oversample (int): Rescale the image and reference image by constant factor before finding the translation vector. **kargs (various): All other keyword arguments are passed to the specific algorithm. Returns (ImageArray or ndarray) aligned image Notes: Currently three algorithms are supported: - image_registration module's chi^2 shift: This uses a dft with an automatic up-sampling of the fourier transform for sub-pixel alignment. The metadata key *chi2_shift* contains the translation vector and errors. - imreg_dft module's similarity function. This implements a full scale, rotation, translation algorithm (by default cosntrained for just translation). It's unclear how much sub-pixel translation is accomodated. - cv2 module based affine transform on a gray scale image. from: http://www.learnopencv.com/image-alignment-ecc-in-opencv-c-python/ """ # To be consistent with x-y co-ordinate systems align_methods = { "scharr": (_align_scharr, imreg_dft), "chi2_shift": (_align_chi2_shift, chi2_shift), "imreg_dft": (_align_imreg_dft, imreg_dft), "cv2": (_align_cv2, cv2), } for meth in list(align_methods.keys()): mod = align_methods[meth][1] if mod is None: del align_methods[meth] method = method.lower() new_type = im.dtype if not len(align_methods): raise ImportError( "align requires one of imreg_dft, chi2_shift or cv2 modules to be available." ) if method not in align_methods: raise ValueError( f"{method} is not available either because it is not recognised or there is a missing module" ) if "box" in kargs: box = kargs.pop("box") if not isIterable(box): box = [box] working = im.crop(*box, copy=True) if ref.shape != working.shape: ref = ref.view(ImageArray).crop(*box, copy=True) else: working = im scale = kargs.pop("scale", None) if scale: working = working.rescale(scale, order=3) ref = transform.rescale(ref, scale, order=3) prefilter = kargs.pop("prefilter", True) tvec, data = align_methods[method][0](working, ref, **kargs) if scale: tvec /= scale new_im = im.shift((tvec[1], tvec[0]), prefilter=prefilter).astype(new_type) for k, v in data.items(): new_im[k] = v new_im["tvec"] = tuple(tvec) new_im["translation_limits"] = new_im.translate_limits("tvec") return new_im
def extrapolate(self, new_x, xcol=None, ycol=None, yerr=None, overlap=20, kind="linear"): """Extrapolate data based on local fit to x,y data. Args: new_x (float or array): New values of x data. Keyword Arguments: xcol (column index, None): column containing x-data or None to use setas attribute ycol (column index(es) or None): column(s) containing the y-data or None to use setas attribute. yerr (column index(es) or None): y error data column or None to use setas attribute overlap (float or int): range of x-data used for the local fit for extrapolating. If int then overlap number of points is used, if float then that range x-axis space is used. kind (str or callable): Determines local fitting function. If string should be "linear", "quadratic" or "cubic" if callable, then represents a function to be fitted to the data. Returns: (array): Extrapolated values. Note: If the new_x values lie outside the span of the x-data, then the nearest *overlap* portion of the data is used to estimate the values. If the new_x values are within the span of the x-data then the portion of the data centred about the point and overlap points long will be used to interpolate a value. If *kind* is callable, it should take x values in the first parameter and free fitting parameters as the other parameters (i.e. as with :py:meth:`AnalysisMixin.curve_fit`). """ _ = self._col_args(xcol=xcol, ycol=ycol, yerr=yerr, scalar=False) kinds = { "linear": lambda x, m, c: m * x + c, "quadratic": lambda x, a, b, c: a * x**2 + b * x + c, "cubic": lambda x, a, b, c, d: a * x**3 + b * x**2 + c * x + d, } errs = { "linear": lambda x, me, ce: np.sqrt((me * x)**2 + ce**2), "quadratic": lambda x, ae, be, ce: np.sqrt((2 * x**2 * ae)**2 + (x * be)**2 + ce**2), "cubic": lambda x, ae, be, ce, de: np.sqrt( (3 * ae * x**3)**2 + (2 * x**2 * be)**2 + (x * ce)**2 + de**2), } if callable(kind): pass elif kind in kinds: kindf = kinds[kind] else: raise RuntimeError( "Failed to recognise extrpolation function '{}'".format(kind)) scalar_x = not isIterable(new_x) if scalar_x: new_x = [new_x] if isinstance(new_x, ma.MaskedArray): new_x = new_x.compressed results = np.zeros((len(new_x), 2 * len(_.ycol))) work = self.clone for ix, x in enumerate(new_x): r = self.closest(x, xcol=_.xcol) if isinstance(overlap, int): if (r.i - overlap / 2) < 0: ll = 0 hl = min(len(self), overlap) elif (r.i + overlap / 2) > len(self): hl = len(self) ll = max(hl - overlap, 0) else: ll = r.i - overlap / 2 hl = r.i + overlap / 2 bounds = {"_i__between": (ll, hl)} mid_x = (self[ll, _.xcol] + self[hl - 1, _.xcol]) / 2.0 elif isinstance(overlap, float): if (r[_.xcol] - overlap / 2) < self.min(_.xcol)[0]: ll = self.min(_.xcol)[0] hl = ll + overlap elif (r[_.xcol] + overlap / 2) > self.max(_.xcol)[0]: hl = self.max(_.xcol)[0] ll = hl - overlap else: ll = r[_.xcol] - overlap / 2 hl = r[_.xcol] + overlap / 2 bounds = { "{}__between".format(self.column_headers[_.xcol]): (ll, hl) } mid_x = (ll + hl) / 2.0 pointdata = work.select(**bounds) pointdata.data[:, _.xcol] = pointdata.column(_.xcol) - mid_x ret = pointdata.curve_fit(kindf, _.xcol, _.ycol, sigma=_.yerr, absolute_sigma=True) if isinstance(ret, tuple): ret = [ret] for iy, rt in enumerate(ret): popt, pcov = rt perr = np.sqrt(np.diag(pcov)) results[ix, 2 * iy] = kindf(x - mid_x, *popt) results[ix, 2 * iy + 1] = errs[kind](x - mid_x, *perr) if scalar_x: results = results[0] return results