def sanity_invertIndexSelectionExample(self): """ Example of invertIndexSelection. """ import numpy as np from PyAstronomy import pyaC as pc # Create "data" values and select some x = np.exp(np.arange(20.0) / 20.0) indi = np.where(np.logical_and(x > 1.4, x < 1.7)) print("Selected indices and values:") print(" indices: ", indi) print(" values : ", x[indi]) indiInv = pc.invertIndexSelection(x, indi) print() print("Inverted selection:") print(" indices: ", indiInv) print(" values : ", x[indiInv]) # Check that the same result is obtained by simply # passing the length of the array `x` indiInvLen = pc.invertIndexSelection(len(x), indi) print() print("Are indiInv and indiInvLen are the same? ") print(" ", np.all(indiInvLen == indiInv))
def sanity_invertIndexSelectionExample(self): """ Example of invertIndexSelection. """ import numpy as np from PyAstronomy import pyaC as pc # Create "data" values and select some x = np.exp(np.arange(20.0)/20.0) indi = np.where(np.logical_and(x > 1.4, x < 1.7)) print("Selected indices and values:") print(" indices: ", indi) print(" values : ", x[indi]) indiInv = pc.invertIndexSelection(x, indi) print() print("Inverted selection:") print(" indices: ", indiInv) print(" values : ", x[indiInv]) # Check that the same result is obtained by simply # passing the length of the array `x` indiInvLen = pc.invertIndexSelection(len(x), indi) print() print("Are indiInv and indiInvLen are the same? ") print(" ", np.all(indiInvLen == indiInv))
def sanity_invertIndexSelection(self): """ Sanity of invertIndexSelection. """ import numpy as np from PyAstronomy import pyaC as pc x = np.arange(20.0) indi = np.where(np.logical_or(x < 5.0, x > 17.0))[0] i1 = pc.invertIndexSelection(x, indi) self.assertEqual(len(x), len(indi)+len(i1), "indi and inverse do not make up the original array.") for i in indi: self.assertFalse(i in i1, "found an index of indi in inverse.") i2 = pc.invertIndexSelection(len(x), indi) self.assertTrue(np.all(i1 == i2), "i1 and i2 are not the same.") i3 = pc.invertIndexSelection(x, i1) self.assertTrue(np.all(i3 == indi), "re-inverse of i1 does not match indi.")
def sanity_invertIndexSelection(self): """ Sanity of invertIndexSelection. """ import numpy as np from PyAstronomy import pyaC as pc x = np.arange(20.0) indi = np.where(np.logical_or(x < 5.0, x > 17.0))[0] i1 = pc.invertIndexSelection(x, indi) self.assertEqual( len(x), len(indi) + len(i1), "indi and inverse do not make up the original array.") for i in indi: self.assertFalse(i in i1, "found an index of indi in inverse.") i2 = pc.invertIndexSelection(len(x), indi) self.assertTrue(np.all(i1 == i2), "i1 and i2 are not the same.") i3 = pc.invertIndexSelection(x, i1) self.assertTrue(np.all(i3 == indi), "re-inverse of i1 does not match indi.")
def polyResOutlier(x, y, deg=0, stdlim=3.0, controlPlot=False, fullOutput=False, mode="both"): """ Simple outlier detection based on residuals. This algorithm fits a polynomial of the specified degree to the data, subtracts it to find the residuals, determines the standard deviations of the residuals, and, finally, identifies all points with residuals further than the specified number of standard deviations from the fit. Parameters ---------- x, y : arrays The abscissa and ordinate of the data. deg : int, optional The degree of the polynomial to be fitted. The default is 0, i.e., a constant. stdlim : float, optional The number of standard deviations acceptable for points not categorized as outliers. mode : string, {both, above, below} If 'both' (default), outliers may be located on both sides of the polynomial. If 'above/below', outliers are only expected above/below it. controlPlot : boolean, optional If True, a control plot will be generated showing the location of outliers (default is False). fullOutput : boolean, optional If True, the fitted polynomial and the resulting model will be returned. Returns ------- indiin : array The indices of the points *not* being categorized as outliers. indiout : array Indices of the oulier points. p : array, optional Coefficients of the fitted polynomial (only returned if `fullOutput` is True). model : array, optional The polynomial model (only returned if `fullOutput` is True). """ if len(x) < deg + 1: raise(PE.PyAValError("Only " + str(len(x)) + " points given to fit a polynomial of degree " + str(deg) + ".", \ solution="Use more points and/or change degree of polynomial.", \ where="polyResOutlier")) if len(x) != len(y): raise(PE.PyAValError("x and y need to have the same length.", \ solution="Check the lengths of the input arrays.", \ where="polyResOutlier")) if deg < 0: raise (PE.PyAValError("Polynomial degree must be > 0.", where="polyResOutlier")) p = np.polyfit(x, y, deg) model = np.polyval(p, x) residuals = y - model std = np.std(residuals) # Find points too far off if mode == 'both': # Outliers above and/or below the curve indi = np.where(np.abs(residuals) >= stdlim * std)[0] elif mode == 'above': indi = np.where(residuals >= stdlim * std)[0] elif mode == 'below': indi = np.where(residuals <= -stdlim * std)[0] else: raise(PE.PyAValError("No such mode: " + str(mode), \ where="polyResOutlier", \ solution="Use any of 'both', 'above', or 'below'.")) indiin = pyaC.invertIndexSelection(residuals, indi) if controlPlot: # Produce control plot import matplotlib.pylab as plt plt.title( "polyResOutlier control plot (red: potential outliers, green: polynomial model)" ) plt.plot(x, y, 'b.') s = np.argsort(x) plt.plot(x[s], model[s], 'g--') plt.plot(x[indi], y[indi], 'rp') plt.show() if fullOutput: return indiin, indi, p, model return indiin, indi
def polyResOutlier(x, y, deg=0, stdlim=3.0, controlPlot=False, fullOutput=False): """ Simple outlier detection based on residuals. This algorithm fits a polynomial of the specified degree to the data, subtracts it to find the residuals, determines the standard deviations of the residuals, and, finally, identifies all points with residuals further than the specified number of standard deviations from the fit. Parameters ---------- x, y : arrays The abscissa and ordinate of the data. deg : int, optional The degree of the polynomial to be fitted. The default is 0, i.e., a constant. stdlim : float, optional The number of standard deviations acceptable for points not categorized as outliers. controlPlot : boolean, optional If True, a control plot will be generated showing the location of outliers (default is False). fullOutput : boolean, optional If True, the fitted polynomial and the resulting model will be returned. Returns ------- indiin : array The indices of the points *not* being categorized as outliers. indiout : array Indices of the oulier points. p : array, optional Coefficients of the fitted polynomial (only returned if `fullOutput` is True). model : array, optional The polynomial model (only returned if `fullOutput` is True). """ if len(x) < deg + 1: raise(PE.PyAValError("Only " + str(len(x)) + " points given to fit a polynomial of degree " + str(deg) + ".", \ solution="Use more points and/or change degree of polynomial.", \ where="polyResOutlier")) if len(x) != len(y): raise(PE.PyAValError("x and y need to have the same length.", \ solution="Check the lengths of the input arrays.", \ where="polyResOutlier")) if deg < 0: raise(PE.PyAValError("Polynomial degree must be > 0.", where="polyResOutlier")) p = np.polyfit(x, y, deg) model = np.polyval(p, x) residuals = y - model std = np.std(residuals) # Find points too far off indi = np.where(np.abs(residuals) >= stdlim*std)[0] indiin = pyaC.invertIndexSelection(residuals, indi) if controlPlot: # Produce control plot import matplotlib.pylab as plt plt.title("polyResOutlier control plot (red: potential outliers, green: polynomial model)") plt.plot(x, y, 'b.') s = np.argsort(x) plt.plot(x[s], model[s], 'g--') plt.plot(x[indi], y[indi], 'rp') plt.show() if fullOutput: return indiin, indi, p, model return indiin, indi
def intep(x, y, xinter, boundsError=True): """ The INTEP interpolation algorithm The INTEP interpolation algorithm is described by Hill 1982, PDAO 16, 67 ("Intep - an Effective Interpolation Subroutine"). The implementation at hand is based on the FORTRAN code stated therein. The aim of the algorithm is to imitate the curve "an experienced scientist" would draw through a given set of points. Parameters ---------- x : array Independent values. y : array Dependent values. xinter : array Values at which to interpolate the tabulated data given by `x` and `y`. boundsError : boolean, optional If True, an exception will be raised if values need to be extrapolated beyond the limits of the given tabulated data. Values beyond the limits are simply replaced with the closest valid value available, which might not be a good approximation. Set this flag to False suppress the exception. Returns ------- Interpolated values : array Interpolated values at the locations specified by `xinter`. """ # Check whether x-array is sorted if not np.all((x[1:] - x[0:-1]) > 0.0): raise(PE.PyAValError("The array of independent values (`x`) is not sorted in (strictly) " + \ "ascending order, but it needs to be.", \ solution=["Sort the arrays `x` (and `y` accordingly), e.g., using numpy.argsort.", \ "Check whether there are duplicate values in `x`."])) # Create result array result = np.zeros(len(xinter)) # Treat extrapolation points ilow = np.where(xinter < min(x))[0] result[ilow] = y[0] iup = np.where(xinter > max(x))[0] result[iup] = y[-1] noepo = invertIndexSelection(xinter, np.concatenate((ilow, iup))) if (len(noepo) > 0) and boundsError: raise(PE.PyAValError("There are " + str(len(noepo)) + " points, which need to be extrapolated. " + \ "Attention: Extrapolation is simply done by using the closest valid value.", \ solution=["Use only interpolation points (`xinter`) within the valid range.", "Set the `boundsError` flag to False (will suppress this exception)."])) # Loop over all indices of xinter, which were # not already subject to extrapolation. for i in noepo: xp = xinter[i] # Index of the first entry in x larger (or equal) than # `val` infl = np.where(x >= xp)[0][0] infl -= 1 lp1 = 1.0/(x[infl] - x[infl+1]) lp2 = -lp1 if infl <= 0: # Special treatment for first point fp1 = (y[1] -y[0]) / (x[1] - x[0]) else: fp1 = (y[infl+1] - y[infl-1]) / (x[infl+1] - x[infl-1]) if infl >= (len(x) - 2): # Special treatment for last points fp2 = (y[-1] - y[-2]) / (x[-1] - x[-2]) else: fp2 = (y[infl+2] - y[infl]) / (x[infl+2] - x[infl]) xpi1 = xp - x[infl+1] xpi = xp - x[infl] l1 = xpi1 * lp1 l2 = xpi * lp2 result[i] = y[infl]*(1. - 2.*lp1*xpi)*l1**2 + \ y[infl+1]*(1. - 2.*lp2*xpi1)*l2**2 + \ fp2*xpi1*l2**2 + fp1*xpi*l1**2 return result
def intep(x, y, xinter, boundsError=True, fillValue=None): """ The INTEP interpolation algorithm The INTEP interpolation algorithm is described by Hill 1982, PDAO 16, 67 ("Intep - an Effective Interpolation Subroutine"). The implementation at hand is based on the FORTRAN code stated therein. The aim of the algorithm is to imitate the curve "an experienced scientist" would draw through a given set of points. Parameters ---------- x : array Independent values. y : array Dependent values. xinter : array Values at which to interpolate the tabulated data given by `x` and `y`. boundsError : boolean, optional If True, an exception will be raised if values need to be extrapolated beyond the limits of the given tabulated data. Values beyond the limits are simply replaced with the closest valid value available, which might not be a good approximation. Set this flag to False suppress the exception. fillValue : float, optional If given (i.e., not None), this value will be used to represent values outside of the given bounds. Note that `boundsError` must be set False for this to have an effect. For instance, use np.NaN. Returns ------- Interpolated values : array Interpolated values at the locations specified by `xinter`. """ # Check whether x-array is sorted if not np.all((x[1:] - x[0:-1]) > 0.0): raise(PE.PyAValError("The array of independent values (`x`) is not sorted in (strictly) " + \ "ascending order, but it needs to be.", \ solution=["Sort the arrays `x` (and `y` accordingly), e.g., using numpy.argsort.", \ "Check whether there are duplicate values in `x`."])) # Create result array result = np.zeros(len(xinter)) # Treat extrapolation points ilow = np.where(xinter < min(x))[0] if fillValue is None: # Use first point beyond limit result[ilow] = y[0] else: result[ilow] = fillValue iup = np.where(xinter > max(x))[0] if fillValue is None: # Use last point beyond limit result[iup] = y[-1] else: result[iup] = fillValue # No extrapolation (noepo) noepo = invertIndexSelection(xinter, np.concatenate((ilow, iup))) if (len(noepo) < len(xinter)) and boundsError: raise(PE.PyAValError("There are " + str(len(xinter) - len(noepo)) + " points, which need to be extrapolated. " + \ "Attention: Extrapolation is simply done by using the closest valid value.", \ solution=["Use only interpolation points (`xinter`) within the valid range.", "Set the `boundsError` flag to False (will suppress this exception)."])) # Loop over all indices of xinter, which were # not already subject to extrapolation. for i in noepo: xp = xinter[i] # Index of the first entry in x larger (or equal) than # `val` infl = np.where(x >= xp)[0][0] infl -= 1 lp1 = 1.0 / (x[infl] - x[infl + 1]) lp2 = -lp1 if infl <= 0: # Special treatment for first point fp1 = (y[1] - y[0]) / (x[1] - x[0]) else: fp1 = (y[infl + 1] - y[infl - 1]) / (x[infl + 1] - x[infl - 1]) if infl >= (len(x) - 2): # Special treatment for last points fp2 = (y[-1] - y[-2]) / (x[-1] - x[-2]) else: fp2 = (y[infl + 2] - y[infl]) / (x[infl + 2] - x[infl]) xpi1 = xp - x[infl + 1] xpi = xp - x[infl] l1 = xpi1 * lp1 l2 = xpi * lp2 result[i] = y[infl]*(1. - 2.*lp1*xpi)*l1**2 + \ y[infl+1]*(1. - 2.*lp2*xpi1)*l2**2 + \ fp2*xpi1*l2**2 + fp1*xpi*l1**2 return result