Beispiel #1
0
def test_biweight_location_constant_axis_3d():
    shape = (10, 5, 2)
    data = np.ones(shape)
    cbl = biweight_location(data, axis=0)
    assert_allclose(cbl, np.ones((shape[1], shape[2])))
    cbl = biweight_location(data, axis=1)
    assert_allclose(cbl, np.ones((shape[0], shape[2])))
    cbl = biweight_location(data, axis=2)
    assert_allclose(cbl, np.ones((shape[0], shape[1])))
Beispiel #2
0
def test_biweight_location_constant_axis_3d():
    shape = (10, 5, 2)
    data = np.ones(shape)
    cbl = biweight_location(data, axis=0)
    assert_allclose(cbl, np.ones((shape[1], shape[2])))
    cbl = biweight_location(data, axis=1)
    assert_allclose(cbl, np.ones((shape[0], shape[2])))
    cbl = biweight_location(data, axis=2)
    assert_allclose(cbl, np.ones((shape[0], shape[1])))
Beispiel #3
0
def test_biweight_location_ignore_nan():
    data1d = np.array([1, 3, 5, 500, 2, np.nan])
    data2d = np.array([data1d, data1d])

    assert np.isnan(biweight_location(data1d, ignore_nan=False))

    biw_expected = biweight_location(data1d[:-1], ignore_nan=False)
    assert_equal(biweight_location(data1d, ignore_nan=True), biw_expected)

    assert_equal(biweight_location(data2d, axis=0, ignore_nan=True), data1d)
    assert_equal(biweight_location(data2d, axis=1, ignore_nan=True),
                 [biw_expected, biw_expected])
Beispiel #4
0
def test_biweight_location_constant_axis_2d():
    shape = (10, 5)
    data = np.ones(shape)
    cbl = biweight_location(data, axis=0)
    assert_allclose(cbl, np.ones(shape[1]))
    cbl = biweight_location(data, axis=1)
    assert_allclose(cbl, np.ones(shape[0]))

    val1 = 100.
    val2 = 2.
    data = np.arange(50).reshape(10, 5)
    data[2] = val1
    data[7] = val2
    cbl = biweight_location(data, axis=1)
    assert_allclose(cbl[2], val1)
    assert_allclose(cbl[7], val2)
Beispiel #5
0
def test_biweight_location_axis_3d():
    """Test a 3D array with the axis keyword."""
    with NumpyRNGContext(12345):
        nz = 3
        ny = 4
        nx = 5
        data = np.random.normal(5, 2, (nz, ny, nx))
        bw = biweight_location(data, axis=0)
        assert bw.shape == (ny, nx)

        y = 0
        bwi = []
        for i in range(nx):
            bwi.append(biweight_location(data[:, y, i]))
        bwi = np.array(bwi)
        assert_allclose(bw[y], bwi)
Beispiel #6
0
def get_mean_cov(X, robust=True):
    """Get Mean Covariance.

    Parameters
    ----------
    X : array-like
    robust : bool

    Returns
    -------
    mean_tot : array-like
    cov_tot : array-like

    """
    from astropy.stats.biweight import (
        biweight_midcovariance,
        biweight_location,
    )

    if robust:
        # Robust
        mean_tot = biweight_location(X, axis=0)
        cov_tot = biweight_midcovariance(X.T)
    else:
        # Classical
        mean_tot = np.mean(X, axis=0)
        cov_tot = np.cov(X)

    return mean_tot, cov_tot
Beispiel #7
0
def test_biweight_location_constant_axis_2d():
    shape = (10, 5)
    data = np.ones(shape)
    cbl = biweight_location(data, axis=0)
    assert_allclose(cbl, np.ones(shape[1]))
    cbl = biweight_location(data, axis=1)
    assert_allclose(cbl, np.ones(shape[0]))

    val1 = 100.
    val2 = 2.
    data = np.arange(50).reshape(10, 5)
    data[2] = val1
    data[7] = val2
    cbl = biweight_location(data, axis=1)
    assert_allclose(cbl[2], val1)
    assert_allclose(cbl[7], val2)
Beispiel #8
0
def xbin_ybwt(x, y, xbins, Nmin=1):
    """
    Take x,y pairs. Bin in x, find biweight location and scale
    
    Input: x and y, xbins
    
    Nmin : default 1
        minimum number of points per bin to be used (otherwise nan)
    
    Return: xbins centers, yloc, yscale
    """
    assert len(x) == len(y)
    
    xp = (xbins[1:]+xbins[:-1])/2
    Nbins = len(xp)
    yloc = np.zeros(Nbins) + np.nan
    yscale = np.zeros(Nbins) + np.nan
    
    bin_nums = np.digitize(x, xbins)
    for ibin in range(Nbins):
        bin_num = ibin + 1
        vals = y[bin_nums == bin_num]
        if len(vals) < Nmin: continue
        yloc[ibin] = biweight_location(vals)
        yscale[ibin] = biweight_scale(vals)
    
    return xp, yloc, yscale
Beispiel #9
0
def test_biweight_location_axis_3d():
    """Test a 3D array with the axis keyword."""
    with NumpyRNGContext(12345):
        nz = 3
        ny = 4
        nx = 5
        data = np.random.normal(5, 2, (nz, ny, nx))
        bw = biweight_location(data, axis=0)
        assert bw.shape == (ny, nx)

        y = 0
        bwi = []
        for i in range(nx):
            bwi.append(biweight_location(data[:, y, i]))
        bwi = np.array(bwi)
        assert_allclose(bw[y], bwi)
Beispiel #10
0
def test_biweight_location_nan():
    data1d = np.array([1, 3, 5, 500, 2, np.nan])
    all_nan = data1d.copy()
    all_nan[:] = np.nan
    data2d = np.array([data1d, data1d, all_nan])
    data1d_masked = np.ma.masked_invalid(data1d)
    data1d_masked.data[0] = np.nan
    data2d_masked = np.ma.masked_invalid(data2d)

    assert np.isnan(biweight_location(data1d))
    bw_loc = biweight_location(data1d_masked)
    assert not isinstance(bw_loc, np.ma.MaskedArray)
    assert np.isnan(biweight_location(data2d))

    for axis in (0, 1):
        assert np.all(np.isnan(biweight_location(data2d, axis=axis)))
        assert isinstance(biweight_location(data2d_masked, axis=axis),
                          np.ma.MaskedArray)
Beispiel #11
0
def test_biweight_location_axis():
    """Test a 2D array with the axis keyword."""
    with NumpyRNGContext(12345):
        ny = 100
        nx = 200
        data = np.random.normal(5, 2, (ny, nx))

        bw = biweight_location(data, axis=0)
        bwi = []
        for i in range(nx):
            bwi.append(biweight_location(data[:, i]))
        bwi = np.array(bwi)
        assert_allclose(bw, bwi)

        bw = biweight_location(data, axis=1)
        bwi = []
        for i in range(ny):
            bwi.append(biweight_location(data[i, :]))
        bwi = np.array(bwi)
        assert_allclose(bw, bwi)
Beispiel #12
0
def test_biweight_location_axis():
    """Test a 2D array with the axis keyword."""
    with NumpyRNGContext(12345):
        ny = 100
        nx = 200
        data = np.random.normal(5, 2, (ny, nx))

        bw = biweight_location(data, axis=0)
        bwi = []
        for i in range(nx):
            bwi.append(biweight_location(data[:, i]))
        bwi = np.array(bwi)
        assert_allclose(bw, bwi)

        bw = biweight_location(data, axis=1)
        bwi = []
        for i in range(ny):
            bwi.append(biweight_location(data[i, :]))
        bwi = np.array(bwi)
        assert_allclose(bw, bwi)
Beispiel #13
0
def filter_columns(data_2d):
    # for each column, compute some robust average like the biweight
    # print("data 2d shape ", data_2d.shape)
    ny, nx = data_2d.shape
    smdata = np.zeros(nx)
    for i in range(nx):
        coldata = data_2d[:, i].astype(float)
        bwt = biweight.biweight_location(coldata)
        smdata[i] = bwt
    # print("data 1d shape ", smdata.shape)
    return smdata
Beispiel #14
0
def find_resolution(multispec_fname,
                    initial_fwhm=.05,
                    usepercentile=True,
                    percentiles=[60, 80, 95],
                    Rguess=None,
                    full_output=False,
                    useclip=True,
                    findpeak=False,
                    makeplot=True):
    """
    """
    from .spectrum import Spectrum1D
    from astropy.stats import sigma_clip, biweight
    from ..robust_polyfit import gaussfit
    from ..utils import find_distribution_peak
    import time
    arcs = Spectrum1D.read(multispec_fname, flux_ext=4)
    line_centers = np.loadtxt(os.path.dirname(__file__) +
                              "/../data/linelists/thar_list",
                              usecols=0)

    start = time.time()

    alllinefits = []
    allRmed = []
    allRerr = []
    allwmid = []
    allR = []
    for i, arc in enumerate(arcs):
        linefits = []
        wave = arc.dispersion
        flux = arc.flux
        wmin, wmax = wave[0], wave[-1]
        wmid = (wmin + wmax) / 2.
        lcs = line_centers[(line_centers > wmin) & (line_centers < wmax)]
        for lc in lcs:
            fwhm = initial_fwhm
            # get subpiece of arc
            ii = (wave > lc - 5 * fwhm) & (wave < lc + 5 * fwhm)
            _x, _y = wave[ii], flux[ii]
            # guess amplitude, center, sigma
            p0 = [np.max(_y), lc, fwhm / 2.355]
            try:
                popt = gaussfit(_x, _y, p0)
            except:
                pass
            else:
                if popt[0] > 0 and abs(popt[1] - lc) < .05:
                    linefits.append(popt)
        try:
            A, w, s = np.array(linefits).T
        except ValueError:
            print("This order did not have any good lines I guess")
            #allR.append(np.nan); allRmed.append(np.nan); allRerr.append(np.nan); allwmid.append(wmid)
            continue
        alllinefits.append(linefits)
        R = w / (s * 2.355)
        if useclip: R = sigma_clip(R)
        if findpeak:
            if Rguess is None: Rguess = np.nanmedian(R)
            try:
                Rmed = find_distribution_peak(R, Rguess)
            except (ValueError, RuntimeError):
                print("--Could not find peak for arc {:02}".format(i))
                print("--{}".format(sys.exc_info()))
                Rmed = np.median(R)
        elif usepercentile:
            assert len(percentiles) == 3
            Rlo, Rmed, Rhi = np.percentile(R, percentiles)
            #Rerr = max(Rhi-Rmed, Rmed-Rlo)
            Rerr = (Rmed - Rlo, Rhi - Rmed)
        else:
            Rmed = biweight.biweight_location(R)
            Rerr = biweight.biweight_scale(R)
        allR.append(R)
        allRmed.append(Rmed)
        allRerr.append(Rerr)
        allwmid.append(wmid)
    if usepercentile:
        allRerr = np.array(allRerr).T

    if makeplot:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        ax.errorbar(allwmid, allRmed, yerr=allRerr, fmt='o')
        plt.show()

    if full_output:
        return allRmed, allRerr, allwmid, allR, arcs, alllinefits
    return allRmed, allRerr, allwmid
Beispiel #15
0
def test_biweight_location_small():
    cbl = biweight_location([1, 3, 5, 500, 2])
    assert abs(cbl - 2.745) < 1e-3
Beispiel #16
0
def test_biweight_location_constant():
    cbl = biweight_location(np.ones((10, 5)))
    assert cbl == 1.
Beispiel #17
0
def test_biweight_location_small():
    bw_loc = biweight_location([1, 3, 5, 500, 2])
    assert_allclose(bw_loc, 2.7456117)
Beispiel #18
0
def test_biweight_location_masked():
    data1d = np.array([1, 3, 5, 500, 2, np.nan])
    data2d = np.array([data1d, data1d])

    data1d_masked = np.ma.masked_invalid(data1d)
    data2d_masked = np.ma.masked_invalid(data2d)

    assert_equal(biweight_location(data1d, ignore_nan=True),
                 biweight_location(data1d_masked))
    assert_equal(biweight_location(data2d, ignore_nan=True),
                 biweight_location(data2d_masked))

    bw_loc = biweight_location(data2d, ignore_nan=True, axis=1)
    bw_loc_masked = biweight_location(data2d_masked, axis=1)
    assert isinstance(bw_loc_masked, np.ma.MaskedArray)
    assert ~np.any(bw_loc_masked.mask)  # mask is all False
    assert_equal(bw_loc, bw_loc_masked.data)

    bw_loc = biweight_location(data2d, ignore_nan=True, axis=0)
    bw_loc_masked = biweight_location(data2d_masked, axis=0)
    assert_equal(bw_loc_masked.data[:-1], bw_loc[:-1])
    assert bw_loc_masked.mask[-1]  # last mask element is True

    data1d_masked.data[0] = np.nan  # unmasked NaN
    assert biweight_location(data1d_masked) is np.ma.masked
    assert_equal(biweight_location(data1d_masked, ignore_nan=True),
                 biweight_location(data1d[1:], ignore_nan=True))

    # ensure that input masked array is not modified
    assert np.isnan(data1d_masked[0])
Beispiel #19
0
def test_biweight_location():
    with NumpyRNGContext(12345):
        # test that it runs
        randvar = np.random.randn(10000)
        cbl = biweight_location(randvar)
        assert abs(cbl - 0) < 1e-2
Beispiel #20
0
def test_biweight_location_axis_tuple():
    """Test a 3D array with a tuple axis keyword."""

    data = np.arange(24).reshape(2, 3, 4)
    data[0, 0] = 100.

    assert_equal(biweight_location(data, axis=0),
                 biweight_location(data, axis=(0,)))
    assert_equal(biweight_location(data, axis=-1),
                 biweight_location(data, axis=(2,)))
    assert_equal(biweight_location(data, axis=(0, 1)),
                 biweight_location(data, axis=(1, 0)))
    assert_equal(biweight_location(data, axis=(0, 2)),
                 biweight_location(data, axis=(0, -1)))
    assert_equal(biweight_location(data, axis=(0, 1, 2)),
                 biweight_location(data, axis=(2, 0, 1)))
    assert_equal(biweight_location(data, axis=(0, 1, 2)),
                 biweight_location(data, axis=None))
Beispiel #21
0
def test_biweight_location():
    with NumpyRNGContext(12345):
        # test that it runs
        randvar = np.random.randn(10000)
        cbl = biweight_location(randvar)
        assert abs(cbl - 0) < 1e-2
Beispiel #22
0
def test_biweight_location_constant():
    cbl = biweight_location(np.ones((10, 5)))
    assert cbl == 1.
def histogauss(SAMPLE,domain=[0,0]):

#+
#NAME:
#       HISTOGAUSS
#
# PURPOSE:
#       Histograms data and overlays it with a fitted Gaussian.
#       Uses "MPFIT" to perform the fitting, which can be done on a restricted domain.
#
# CALLING SEQUENCE:
#       model_params = histogauss.histogauss(mydata)
#       model_params = histogauss.histogauss(mydata,[lower,upper])
#
# INPUT:
#       mydata = nparray of data values (near-normally distributed) to be histogrammed
#       domain = optional 2-valued list, specifying domain endpoints for Gaussian fit;
#                default is to fit Gaussian to entire domain of data values
#
# OUTPUT ARGUMENTS:
#       model_param = list of coefficients of the Gaussian fit:
#
#               model_param[0]= the normalization ("height") of the fitted Gaussian
#               model_param[1]= the mean of the fitted Gaussian
#               model_param[2]= the standard deviation of the fitted Gaussian
#               model_param[3]= the half-width of the 95% confidence interval
#                               of the biweight mean (not fitted)
#
# REVISION HISTORY:
#       Written, H. Freudenreich, STX, 12/89
#       More quantities returned in A, 2/94, HF
#       Added NOPLOT keyword and print if Gaussian, 3/94
#       Stopped printing confidence limits on normality 3/31/94 HF
#       Added CHARSIZE keyword, changed annotation format, 8/94 HF
#       Simplified calculation of Gaussian height, 5/95 HF
#       Convert to V5.0, use T_CVF instead of STUDENT_T, GAUSSFIT instead of
#           FITAGAUSS  W. Landsman April 2002
#       Correct call to T_CVF for calculation of A[3], 95% confidence interval
#                P. Broos/W. Landsman   July 2003
#       Allow FONT keyword to be passed.  T. Robishaw Apr. 2006
#       Use Coyote Graphics for plotting W.L. Mar 2011
#       Better formatting of text output W.L. May 2012
#-
#       (Crudely) converted to Python3 by N. Grogin July 2020
#
    DATA = SAMPLE.copy()
    N = len(DATA)

# Make sure that not everything is in the same bin. If most
# data = 0, reject zeroes. If they are all some other value,
#  complain and give up.
    DATA.sort()
    ### boundaries of first and third quartiles
    N3 = int(0.75*N)-1
    N1 = int(0.25*N)-1

    if (DATA[N3] == DATA[N1]):

        if (DATA[int(N/2)-1] == 0):

            if (sum(DATA!=0) > 15):
                print('Suppressing Zeroes')
                DATA = DATA[np.where(DATA!=0)]
                N = len(DATA)

            else:
                print('Too Few Non-0 Values!')
                return 0

        else:
            print('Too Many Identical Values: '+str(DATA[int(N/2)]))
            return 0

    # legacy structure from the IDL version; A[0] was an effective height
    A = np.zeros(4)

    NTOT = len(DATA)
    # Outlier-resistant estimator of sample "mean":
    A[1] = biweight_location(DATA)
    # Outlier-resistant estimator of sample "standard deviation":
    A[2] = biweight_scale(DATA)
    # Compute he 95% confidence half-interval on the above mean:
    M=0.7*(NTOT-1)  #appropriate for a biweighted mean
    CL = 0.95
    two_tail_area = 1 - CL
    A[3]=abs( sp_stats.t.ppf(1 - (two_tail_area)/2.0,M) ) * A[2] / np.sqrt(NTOT)

    ### clear the figure
    plt.clf()

    # Plot the histogram [important to have density=True for uniform normalization
    # Also determines 'optimal' bin sizing, based on Freedman algorithm
    histy,histx,ignored=hist(DATA, bins=200, histtype='stepfilled', density=True)

    # Compute the midpoints of the histogram bins
    xmid = np.zeros(len(histy))
    for i in range(len(histy)):
         xmid[i] = (histx[i]+histx[i+1])/2

    # trim the histogram-fitting region, if a domain is specified
    if (domain==[0,0]):
        fitx = xmid
        fity = histy
    else:
        fitx = xmid[(xmid>=domain[0]) & (xmid<=domain[1])]
        fity = histy[(xmid>=domain[0]) & (xmid<=domain[1])]

    # Array containing the initial guess of Gaussian params: normalization; mean; sigma
    # !!! POOR RESULTS IF INPUT DATA ARE HIGHLY NON-GAUSSIAN !!!
    p0 = [max(histy)*(A[2] * np.sqrt(2 * np.pi)),A[1],A[2]]

    # Uniform weighting of each histogram bin value, in Gaussian fit
    err = np.ones(len(fitx))

    # prepare the dictionary of histogram information, for the fitting
    fa = {'x':fitx, 'y':fity, 'err':err}

    # perform the Gaussian fit to the histogram
    m = mpfit.mpfit(gaussian_fitter, p0, functkw=fa)

    # post-fitting diagnostics
    print('mpfit status = ', m.status)
    if (m.status <= 0):
        print('mpfit error message = ', m.errmsg)
        return 0
    print('mpfit parameters = ', m.params)
    [norm, mu, sigma] = m.params

    ### plot the model-fit Gaussian at finely-spaced locations spanning the data bins
    ### color the model-fitted region green, and the ignored region(s) red
    finex = histx[0] + np.arange(1000) * (histx[-1]-histx[0])/1000
    if (domain==[0,0]):
        plt.plot(finex, norm/(sigma * np.sqrt(2 * np.pi)) *
                 np.exp( - (finex - mu)**2 / (2 * sigma**2) ),
                 linewidth=2, color='g')
    else:
        xsec = finex[(finex<domain[0])]
        if len(xsec) > 0:
            plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) *
                     np.exp( - (xsec - mu)**2 / (2 * sigma**2) ),
                     linewidth=2, color='r')
        xsec = finex[(finex>=domain[0]) & (finex<=domain[1])]
        plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) *
                 np.exp( - (xsec - mu)**2 / (2 * sigma**2) ),
                 linewidth=2, color='g')
        xsec = finex[(finex>domain[1])]
        if len(xsec) > 0:
            plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) *
                     np.exp( - (xsec - mu)**2 / (2 * sigma**2) ),
                     linewidth=2, color='r')

    # NAG's devel environment does not easily allow plot-windowing,
    # so the 'plt.show' is commented out, in favor of file-dumping
    #plt.show()

    ### !!! BEWARE HARDCODED OUTPUT-FILENAME, BELOW !!!
    ### SUGGEST ADDING OUTPUT-FILENAME AS ADDITIONAL PARAMETER PASSED TO HISTOGAUSS
    plt.savefig('temp.png')

    return [norm, mu, sigma, A[3]]
Beispiel #24
0
def test_biweight_location_small():
    cbl = biweight_location([1, 3, 5, 500, 2])
    assert abs(cbl - 2.745) < 1e-3