Example #1
0
    def run_pa(self, frame, outputfile):
        from desispec.fiberflat import FiberFlat
        import desispec.io.fiberflat as ffIO
        from desispec.linalg import cholesky_solve
        nwave = frame.nwave
        nfibers = frame.nspec
        wave = frame.wave  #- this will become part of output too
        flux = frame.flux
        sumFlux = np.zeros((nwave))
        realFlux = np.zeros(flux.shape)
        ivar = frame.ivar * (frame.mask == 0)
        #deconv
        for fib in range(nfibers):
            Rf = frame.R[fib].todense()
            B = flux[fib]
            realFlux[fib] = cholesky_solve(Rf, B)
            sumFlux += realFlux[fib]
        #iflux=nfibers/sumFlux
        flat = np.zeros(flux.shape)
        flat_ivar = np.zeros(ivar.shape)
        avg = sumFlux / nfibers
        for fib in range(nfibers):
            Rf = frame.R[fib]
            # apply and reconvolute
            M = Rf.dot(avg)
            M0 = (M == 0)
            flat[fib] = (~M0) * flux[fib] / (M + M0) + M0
            flat_ivar[fib] = ivar[fib] * M**2
        fibflat = FiberFlat(frame.wave.copy(), flat, flat_ivar,
                            frame.mask.copy(), avg)

        #fiberflat=compute_fiberflat(input_frame)
        ffIO.write_fiberflat(outputfile, fibflat, header=frame.meta)
        log.info("Wrote fiberflat file {}".format(outputfile))
Example #2
0
 def test_cholesky_solve(self): 
     # create a random positive definite matrix A
     n = 12
     A = np.zeros((n,n))                
     for i in range(n) :
         H = numpy.random.random(n)
         A += np.outer(H,H.T)
     # random X
     X = numpy.random.random(n)
     # compute B
     B = A.dot(X)
     # solve for X given A and B
     Xs=cholesky_solve(A,B)
     # compute diff
     delta=Xs-X
     d=np.inner(delta,delta)
     self.assertAlmostEqual(d,0.)
Example #3
0
 def test_cholesky_solve(self):
     # create a random positive definite matrix A
     n = 12
     A = np.zeros((n, n))
     for i in range(n):
         H = numpy.random.random(n)
         A += np.outer(H, H.T)
     # random X
     X = numpy.random.random(n)
     # compute B
     B = A.dot(X)
     # solve for X given A and B
     Xs = cholesky_solve(A, B)
     # compute diff
     delta = Xs - X
     d = np.inner(delta, delta)
     self.assertAlmostEqual(d, 0.)
Example #4
0
def compute_fiberflat(frame,
                      nsig_clipping=10.,
                      accuracy=5.e-4,
                      minval=0.1,
                      maxval=10.,
                      max_iterations=100,
                      smoothing_res=5.,
                      max_bad=100,
                      max_rej_it=5,
                      min_sn=0,
                      diag_epsilon=1e-3):
    """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average.
    Input data are expected to be on the same wavelength grid, with uncorrelated noise.
    They however do not have exactly the same resolution.

    Args:
        frame (desispec.Frame): input Frame object with attributes
            wave, flux, ivar, resolution_data
        nsig_clipping : [optional] sigma clipping value for outlier rejection
        accuracy : [optional] accuracy of fiberflat (end test for the iterative loop)
        minval: [optional] mask pixels with flux < minval * median fiberflat.
        maxval: [optional] mask pixels with flux > maxval * median fiberflat.
        max_iterations: [optional] maximum number of iterations
        smoothing_res: [optional] spacing between spline fit nodes for smoothing the fiberflat
        max_bad: [optional] mask entire fiber if more than max_bad-1 initially unmasked pixels are masked during the iterations
        max_rej_it: [optional] reject at most the max_rej_it worst pixels in each iteration
        min_sn: [optional] mask portions with signal to noise less than min_sn
        diag_epsilon: [optional] size of the regularization term in the deconvolution


    Returns:
        desispec.FiberFlat object with attributes
            wave, fiberflat, ivar, mask, meanspec

    Notes:
    - we first iteratively :

       - compute a deconvolved mean spectrum
       - compute a fiber flat using the resolution convolved mean spectrum for each fiber
       - smooth the fiber flat along wavelength
       - clip outliers

    - then we compute a fiberflat at the native fiber resolution (not smoothed)

    - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum

    - the fiberflat is the ratio data/mean , so this flat should be divided to the data

    NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS,
    OUTLIER PIXELS, DEAD COLUMNS ...
    """
    log = get_logger()
    log.info("starting")

    #
    # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i )
    #
    # where
    # w = inverse variance
    # D = flux data (at the resolution of the fiber)
    # F = smooth fiber flat
    # R = resolution data
    # M = mean deconvolved spectrum
    #
    # M = A^{-1} B
    # with
    # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli)
    # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki
    #
    # defining R'_fi = sqrt(w_fi) F_fi R_fi
    # and      D'_fi = sqrt(w_fi) D_fi
    #
    # A = sum_(fiber f) R'_f R'_f^T
    # B = sum_(fiber f) R'_f D'_f
    # (it's faster that way, and we try to use sparse matrices as much as possible)
    #

    #- Shortcuts
    nwave = frame.nwave
    nfibers = frame.nspec
    wave = frame.wave.copy()  #- this will become part of output too
    flux = frame.flux.copy()
    ivar = frame.ivar * (frame.mask == 0)

    # iterative fitting and clipping to get precise mean spectrum

    # we first need to iterate to converge on a solution of mean spectrum
    # and smooth fiber flat. several interations are needed when
    # throughput AND resolution vary from fiber to fiber.
    # the end test is that the fiber flat has varied by less than accuracy
    # of previous iteration for all wavelength
    # we also have a max. number of iterations for this code

    nout_tot = 0
    chi2pdf = 0.

    smooth_fiberflat = np.ones((flux.shape))

    chi2 = np.zeros((flux.shape))

    ## mask low sn portions
    w = flux * np.sqrt(ivar) < min_sn
    ivar[w] = 0

    ## 0th pass: reject pixels according to minval and maxval
    mean_spectrum = np.zeros(flux.shape[1])
    nbad = np.zeros(nfibers, dtype=int)
    for iteration in range(max_iterations):
        for i in range(flux.shape[1]):
            w = ivar[:, i] > 0
            if w.sum() > 0:
                mean_spectrum[i] = np.median(flux[w, i])

        nbad_it = 0
        for fib in range(nfibers):
            w = ((flux[fib, :] < minval * mean_spectrum) |
                 (flux[fib, :] > maxval * mean_spectrum)) & (ivar[fib, :] > 0)
            nbad_it += w.sum()
            nbad[fib] += w.sum()

            if w.sum() > 0:
                ivar[fib, w] = 0
                log.warning("0th pass: masking {} pixels in fiber {}".format(
                    w.sum(), fib))
            if nbad[fib] >= max_bad:
                ivar[fib, :] = 0
                log.warning(
                    "0th pass: masking entire fiber {} (nbad={})".format(
                        fib, nbad[fib]))
        if nbad_it == 0:
            break

    # 1st pass is median for spectrum, flat field without resolution
    # outlier rejection
    for iteration in range(max_iterations):

        # use median for spectrum
        mean_spectrum = np.zeros((flux.shape[1]))
        for i in range(flux.shape[1]):
            w = ivar[:, i] > 0
            if w.sum() > 0:
                mean_spectrum[i] = np.median(flux[w, i])

        nbad_it = 0
        sum_chi2 = 0
        # not more than max_rej_it pixels per fiber at a time
        for fib in range(nfibers):
            w = ivar[fib, :] > 0
            if w.sum() == 0:
                continue
            F = flux[fib, :] * 0
            w = (mean_spectrum != 0) & (ivar[fib, :] > 0)
            F[w] = flux[fib, w] / mean_spectrum[w]
            smooth_fiberflat[fib, :] = spline_fit(
                wave, wave[w], F[w], smoothing_res,
                ivar[fib, w] * mean_spectrum[w]**2)
            chi2 = ivar[fib, :] * (flux[fib, :] -
                                   mean_spectrum * smooth_fiberflat[fib, :])**2
            w = np.isnan(chi2)
            bad = np.where(chi2 > nsig_clipping**2)[0]
            if bad.size > 0:
                if bad.size > max_rej_it:  # not more than 5 pixels at a time
                    ii = np.argsort(chi2[bad])
                    bad = bad[ii[-max_rej_it:]]
                ivar[fib, bad] = 0
                log.warning(
                    "1st pass: rejecting {} pixels from fiber {}".format(
                        len(bad), fib))
                nbad[fib] += len(bad)
                if nbad[fib] >= max_bad:
                    ivar[fib, :] = 0
                    log.warning(
                        "1st pass: rejecting fiber {} due to too many (new) bad pixels"
                        .format(fib))
                nbad_it += len(bad)

            sum_chi2 += chi2.sum()
        ndf = int((ivar > 0).sum() - nwave - nfibers * (nwave / smoothing_res))
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info(
            "1st pass iter #{} chi2={}/{} chi2pdf={} nout={} (nsig={})".format(
                iteration, sum_chi2, ndf, chi2pdf, nbad_it, nsig_clipping))

        if nbad_it == 0:
            break
    ## flatten fiberflat
    ## normalize smooth_fiberflat:
    mean = np.ones(smooth_fiberflat.shape[1])
    for i in range(smooth_fiberflat.shape[1]):
        w = ivar[:, i] > 0
        if w.sum() > 0:
            mean[i] = np.median(smooth_fiberflat[w, i])
    smooth_fiberflat = smooth_fiberflat / mean

    median_spectrum = mean_spectrum * 1.

    previous_smooth_fiberflat = smooth_fiberflat * 0
    log.info("after 1st pass : nout = %d/%d" %
             (np.sum(ivar == 0), np.size(ivar.flatten())))
    # 2nd pass is full solution including deconvolved spectrum, no outlier rejection
    for iteration in range(max_iterations):
        ## reset sum_chi2
        sum_chi2 = 0
        log.info("2nd pass, iter %d : mean deconvolved spectrum" % iteration)

        # fit mean spectrum
        A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr()
        B = np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))

        # this is to go a bit faster
        sqrtwflat = np.sqrt(ivar) * smooth_fiberflat

        # loop on fiber to handle resolution (this is long)
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("2nd pass, filling matrix, iter %d fiber %d" %
                         (iteration, fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]
            SD.setdiag(sqrtwflat[fiber])

            sqrtwflatR = SD * R  # each row r of R is multiplied by sqrtwflat[r]

            A = A + (sqrtwflatR.T * sqrtwflatR).tocsr()
            B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber]) * flux[fiber])
        A_pos_def = A.todense()
        log.info("deconvolving")
        w = A.diagonal() > 0

        A_pos_def = A_pos_def[w, :]
        A_pos_def = A_pos_def[:, w]
        mean_spectrum = np.zeros(nwave)
        try:
            mean_spectrum[w] = cholesky_solve(A_pos_def, B[w])
        except:
            mean_spectrum[w] = np.linalg.lstsq(A_pos_def, B[w])[0]
            log.info("cholesky failes, trying svd inverse in iter {}".format(
                iteration))

        for fiber in range(nfibers):

            if np.sum(ivar[fiber] > 0) == 0:
                continue

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            M = R.dot(mean_spectrum)
            ok = (M != 0) & (ivar[fiber, :] > 0)
            if ok.sum() == 0:
                continue
            smooth_fiberflat[fiber] = spline_fit(
                wave, wave[ok], flux[fiber, ok] / M[ok], smoothing_res,
                ivar[fiber, ok] * M[ok]**2) * (ivar[fiber, :] * M**2 > 0)
            chi2 = ivar[fiber] * (flux[fiber] - smooth_fiberflat[fiber] * M)**2
            sum_chi2 += chi2.sum()
            w = np.isnan(smooth_fiberflat[fiber])
            if w.sum() > 0:
                ivar[fiber] = 0
                smooth_fiberflat[fiber] = 1

        # normalize to get a mean fiberflat=1
        mean = np.ones(smooth_fiberflat.shape[1])
        for i in range(nwave):
            w = ivar[:, i] > 0
            if w.sum() > 0:
                mean[i] = np.median(smooth_fiberflat[w, i])
        ok = np.where(mean != 0)[0]
        smooth_fiberflat[:, ok] /= mean[ok]

        # this is the max difference between two iterations
        max_diff = np.max(
            np.abs(smooth_fiberflat - previous_smooth_fiberflat) * (ivar > 0.))
        previous_smooth_fiberflat = smooth_fiberflat.copy()

        ndf = int(np.sum(ivar > 0) - nwave - nfibers * (nwave / smoothing_res))
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f" %
                 (iteration, sum_chi2, ndf, chi2pdf))

        if max_diff < accuracy:
            break

        log.info(
            "2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating"
            % (iteration, max_diff, accuracy))

    log.info("Total number of masked pixels=%d" % nout_tot)
    log.info("3rd pass, final computation of fiber flat")

    # now use mean spectrum to compute flat field correction without any smoothing
    # because sharp feature can arise if dead columns

    fiberflat = np.ones((flux.shape))
    fiberflat_ivar = np.zeros((flux.shape))
    mask = np.zeros((flux.shape), dtype='uint32')

    # reset ivar
    ivar = frame.ivar

    fiberflat_mask = 12  # place holder for actual mask bit when defined

    nsig_for_mask = nsig_clipping  # only mask out N sigma outliers

    for fiber in range(nfibers):

        if np.sum(ivar[fiber] > 0) == 0:
            continue

        ### R = Resolution(resolution_data[fiber])
        R = frame.R[fiber]
        M = np.array(np.dot(R.todense(), mean_spectrum)).flatten()
        fiberflat[fiber] = (M != 0) * flux[fiber] / (M + (M == 0)) + (M == 0)
        fiberflat_ivar[fiber] = ivar[fiber] * M**2
        nbad_tot = 0
        iteration = 0
        while iteration < 500:
            w = fiberflat_ivar[fiber, :] > 0
            if w.sum() < 100:
                break
            smooth_fiberflat = spline_fit(wave, wave[w], fiberflat[fiber, w],
                                          smoothing_res, fiberflat_ivar[fiber,
                                                                        w])
            chi2 = fiberflat_ivar[fiber] * (fiberflat[fiber] -
                                            smooth_fiberflat)**2
            bad = np.where(chi2 > nsig_for_mask**2)[0]
            if bad.size > 0:

                nbadmax = 1
                if bad.size > nbadmax:  # not more than nbadmax pixels at a time
                    ii = np.argsort(chi2[bad])
                    bad = bad[ii[-nbadmax:]]

                mask[fiber, bad] += fiberflat_mask
                fiberflat_ivar[fiber, bad] = 0.
                nbad_tot += bad.size
            else:
                break
            iteration += 1

        log.info("3rd pass : fiber #%d , number of iterations %d" %
                 (fiber, iteration))

    # set median flat to 1
    log.info("3rd pass : set median fiberflat to 1")

    mean = np.ones((flux.shape[1]))
    for i in range(flux.shape[1]):
        ok = np.where((mask[:, i] == 0) & (ivar[:, i] > 0))[0]
        if ok.size > 0:
            mean[i] = np.median(fiberflat[ok, i])
    ok = np.where(mean != 0)[0]
    for fiber in range(nfibers):
        fiberflat[fiber, ok] /= mean[ok]

    log.info("3rd pass : interpolating over masked pixels")

    for fiber in range(nfibers):

        if np.sum(ivar[fiber] > 0) == 0:
            continue
        # replace bad by smooth fiber flat
        bad = np.where((mask[fiber] > 0) | (fiberflat_ivar[fiber] == 0)
                       | (fiberflat[fiber] < minval)
                       | (fiberflat[fiber] > maxval))[0]

        if bad.size > 0:

            fiberflat_ivar[fiber, bad] = 0

            # find max length of segment with bad pix
            length = 0
            for i in range(bad.size):
                ib = bad[i]
                ilength = 1
                tmp = ib
                for jb in bad[i + 1:]:
                    if jb == tmp + 1:
                        ilength += 1
                        tmp = jb
                    else:
                        break
                length = max(length, ilength)
            if length > 10:
                log.info(
                    "3rd pass : fiber #%d has a max length of bad pixels=%d" %
                    (fiber, length))
            smoothing_res = float(max(100, length))
            x = np.arange(wave.size)

            ok = fiberflat_ivar[fiber] > 0
            if ok.sum() == 0:
                continue
            try:
                smooth_fiberflat = spline_fit(x, x[ok], fiberflat[fiber, ok],
                                              smoothing_res,
                                              fiberflat_ivar[fiber, ok])
                fiberflat[fiber, bad] = smooth_fiberflat[bad]
            except:
                fiberflat[fiber, bad] = 1
                fiberflat_ivar[fiber, bad] = 0

        if nbad_tot > 0:
            log.info(
                "3rd pass : fiber #%d masked pixels = %d (%d iterations)" %
                (fiber, nbad_tot, iteration))

    # set median flat to 1
    log.info("set median fiberflat to 1")

    mean = np.ones((flux.shape[1]))
    for i in range(flux.shape[1]):
        ok = np.where((mask[:, i] == 0) & (ivar[:, i] > 0))[0]
        if ok.size > 0:
            mean[i] = np.median(fiberflat[ok, i])
    ok = np.where(mean != 0)[0]
    for fiber in range(nfibers):
        fiberflat[fiber, ok] /= mean[ok]

    log.info("done fiberflat")

    return FiberFlat(wave,
                     fiberflat,
                     fiberflat_ivar,
                     mask,
                     mean_spectrum,
                     chi2pdf=chi2pdf)
Example #5
0
def compute_fiberflat(frame, nsig_clipping=4.) :
    """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average.
    Input data are expected to be on the same wavelenght grid, with uncorrelated noise.
    They however do not have exactly the same resolution.

    args:
        frame (desispec.Frame): input Frame object with attributes
            wave, flux, ivar, resolution_data
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    returns tuple (fiberflat, ivar, mask, meanspec):
        fiberflat : 2D[nwave, nflux] fiberflat (data have to be divided by this to be flatfielded)
        ivar : inverse variance of that fiberflat
        mask : 0=ok >0 if problems
        meanspec : deconvolved mean spectrum

    - we first iteratively :
       - compute a deconvolved mean spectrum
       - compute a fiber flat using the resolution convolved mean spectrum for each fiber
       - smooth the fiber flat along wavelength
       - clip outliers

    - then we compute a fiberflat at the native fiber resolution (not smoothed)

    - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum

    - the fiberflat is the ratio data/mean , so this flat should be divided to the data

    NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS,
    OUTLIER PIXELS, DEAD COLUMNS ...
    """
    log=get_logger()
    log.info("starting")

    #
    # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i )
    #
    # where
    # w = inverse variance
    # D = flux data (at the resolution of the fiber)
    # F = smooth fiber flat
    # R = resolution data
    # M = mean deconvolved spectrum
    #
    # M = A^{-1} B
    # with
    # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli)
    # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki
    #
    # defining R'_fi = sqrt(w_fi) F_fi R_fi
    # and      D'_fi = sqrt(w_fi) D_fi
    #
    # A = sum_(fiber f) R'_f R'_f^T
    # B = sum_(fiber f) R'_f D'_f
    # (it's faster that way, and we try to use sparse matrices as much as possible)
    #

    #- Shortcuts
    nwave=frame.nwave
    nfibers=frame.nspec
    wave = frame.wave.copy()  #- this will become part of output too
    flux = frame.flux
    ivar = frame.ivar


    # iterative fitting and clipping to get precise mean spectrum
    current_ivar=ivar.copy()


    smooth_fiberflat=np.ones((frame.flux.shape))
    chi2=np.zeros((flux.shape))


    sqrtwflat=np.sqrt(current_ivar)*smooth_fiberflat
    sqrtwflux=np.sqrt(current_ivar)*flux


    # test
    #nfibers=20
    nout_tot=0
    for iteration in range(20) :

        # fit mean spectrum
        A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        B=np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))

        # loop on fiber to handle resolution
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d fiber %d"%(iteration,fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)*flat
            SD.setdiag(sqrtwflat[fiber])

            sqrtwflatR = SD*R # each row r of R is multiplied by sqrtwflat[r]

            A = A+(sqrtwflatR.T*sqrtwflatR).tocsr()
            B += sqrtwflatR.T*sqrtwflux[fiber]

        log.info("iter %d solving"%iteration)

        mean_spectrum=cholesky_solve(A.todense(),B)

        log.info("iter %d smoothing"%iteration)

        # fit smooth fiberflat and compute chi2
        smoothing_res=100. #A

        for fiber in range(nfibers) :

            #if fiber%10==0 :
            #    log.info("iter %d fiber %d (smoothing)"%(iteration,fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            #M = np.array(np.dot(R.todense(),mean_spectrum)).flatten()
            M = R.dot(mean_spectrum)

            F = flux[fiber]/(M+(M==0))
            smooth_fiberflat[fiber]=spline_fit(wave,wave,F,smoothing_res,current_ivar[fiber]*(M!=0))
            chi2[fiber]=current_ivar[fiber]*(flux[fiber]-smooth_fiberflat[fiber]*M)**2

        log.info("rejecting")

        nout_iter=0
        if iteration<1 :
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
            selection=np.where(nout_per_wave>0)[0]
            for i in selection :
                worst_entry=np.argmax(chi2[:,i])
                current_ivar[worst_entry,i]=0
                sqrtwflat[worst_entry,i]=0
                sqrtwflux[worst_entry,i]=0
                nout_iter += 1

        else :
            # remove all of them at once
            bad=(chi2>nsig_clipping**2)
            current_ivar *= (bad==0)
            sqrtwflat *= (bad==0)
            sqrtwflux *= (bad==0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave-nfibers*(nwave/smoothing_res))
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))

        # normalize to get a mean fiberflat=1
        mean=np.mean(smooth_fiberflat,axis=0)
        smooth_fiberflat = smooth_fiberflat/mean
        mean_spectrum    = mean_spectrum*mean



        if nout_iter == 0 :
            break

    log.info("nout tot=%d"%nout_tot)

    # now use mean spectrum to compute flat field correction without any smoothing
    # because sharp feature can arise if dead columns

    fiberflat=np.ones((flux.shape))
    fiberflat_ivar=np.zeros((flux.shape))
    mask=np.zeros((flux.shape)).astype(long)  # SOMEONE CHECK THIS !

    fiberflat_mask=12 # place holder for actual mask bit when defined

    nsig_for_mask=4 # only mask out 4 sigma outliers

    for fiber in range(nfibers) :
        ### R = Resolution(resolution_data[fiber])
        R = frame.R[fiber]
        M = np.array(np.dot(R.todense(),mean_spectrum)).flatten()
        fiberflat[fiber] = (M!=0)*flux[fiber]/(M+(M==0)) + (M==0)
        fiberflat_ivar[fiber] = ivar[fiber]*M**2
        smooth_fiberflat=spline_fit(wave,wave,fiberflat[fiber],smoothing_res,current_ivar[fiber]*M**2*(M!=0))
        bad=np.where(fiberflat_ivar[fiber]*(fiberflat[fiber]-smooth_fiberflat)**2>nsig_for_mask**2)[0]
        if bad.size>0 :
            mask[fiber,bad] += fiberflat_mask

    return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum)    
Example #6
0
def compute_fiberflat(frame, nsig_clipping=10., accuracy=5.e-4, minval=0.1, maxval=10.,max_iterations=100,smoothing_res=5.,max_bad=100,max_rej_it=5,min_sn=0,diag_epsilon=1e-3) :
    """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average.
    Input data are expected to be on the same wavelength grid, with uncorrelated noise.
    They however do not have exactly the same resolution.

    Args:
        frame (desispec.Frame): input Frame object with attributes
            wave, flux, ivar, resolution_data
        nsig_clipping : [optional] sigma clipping value for outlier rejection
        accuracy : [optional] accuracy of fiberflat (end test for the iterative loop)
        minval: [optional] mask pixels with flux < minval * median fiberflat.
        maxval: [optional] mask pixels with flux > maxval * median fiberflat.
        max_iterations: [optional] maximum number of iterations
        smoothing_res: [optional] spacing between spline fit nodes for smoothing the fiberflat
        max_bad: [optional] mask entire fiber if more than max_bad-1 initially unmasked pixels are masked during the iterations
        max_rej_it: [optional] reject at most the max_rej_it worst pixels in each iteration
        min_sn: [optional] mask portions with signal to noise less than min_sn
        diag_epsilon: [optional] size of the regularization term in the deconvolution


    Returns:
        desispec.FiberFlat object with attributes
            wave, fiberflat, ivar, mask, meanspec

    Notes:
    - we first iteratively :

       - compute a deconvolved mean spectrum
       - compute a fiber flat using the resolution convolved mean spectrum for each fiber
       - smooth the fiber flat along wavelength
       - clip outliers

    - then we compute a fiberflat at the native fiber resolution (not smoothed)

    - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum

    - the fiberflat is the ratio data/mean , so this flat should be divided to the data

    NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS,
    OUTLIER PIXELS, DEAD COLUMNS ...
    """
    log=get_logger()
    log.info("starting")

    #
    # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i )
    #
    # where
    # w = inverse variance
    # D = flux data (at the resolution of the fiber)
    # F = smooth fiber flat
    # R = resolution data
    # M = mean deconvolved spectrum
    #
    # M = A^{-1} B
    # with
    # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli)
    # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki
    #
    # defining R'_fi = sqrt(w_fi) F_fi R_fi
    # and      D'_fi = sqrt(w_fi) D_fi
    #
    # A = sum_(fiber f) R'_f R'_f^T
    # B = sum_(fiber f) R'_f D'_f
    # (it's faster that way, and we try to use sparse matrices as much as possible)
    #

    #- Shortcuts
    nwave=frame.nwave
    nfibers=frame.nspec
    wave = frame.wave.copy()  #- this will become part of output too
    flux = frame.flux.copy()
    ivar = frame.ivar*(frame.mask==0)



    # iterative fitting and clipping to get precise mean spectrum




    # we first need to iterate to converge on a solution of mean spectrum
    # and smooth fiber flat. several interations are needed when
    # throughput AND resolution vary from fiber to fiber.
    # the end test is that the fiber flat has varied by less than accuracy
    # of previous iteration for all wavelength
    # we also have a max. number of iterations for this code

    nout_tot=0
    chi2pdf = 0.

    smooth_fiberflat=np.ones((flux.shape))

    chi2=np.zeros((flux.shape))

    ## mask low sn portions
    w = flux*np.sqrt(ivar)<min_sn
    ivar[w]=0

    ## 0th pass: reject pixels according to minval and maxval
    mean_spectrum = np.zeros(flux.shape[1])
    nbad=np.zeros(nfibers,dtype=int)
    for iteration in range(max_iterations):
        for i in range(flux.shape[1]):
            w = ivar[:,i]>0
            if w.sum()>0:
                mean_spectrum[i] = np.median(flux[w,i])

        nbad_it=0
        for fib in range(nfibers):
            w = ((flux[fib,:]<minval*mean_spectrum) | (flux[fib,:]>maxval*mean_spectrum)) & (ivar[fib,:]>0)
            nbad_it+=w.sum()
            nbad[fib]+=w.sum()

            if w.sum()>0:
                ivar[fib,w]=0
                log.warning("0th pass: masking {} pixels in fiber {}".format(w.sum(),fib))
            if nbad[fib]>=max_bad:
                ivar[fib,:]=0
                log.warning("0th pass: masking entire fiber {} (nbad={})".format(fib,nbad[fib]))
        if nbad_it == 0:
            break

    # 1st pass is median for spectrum, flat field without resolution
    # outlier rejection
    for iteration in range(max_iterations) :

        # use median for spectrum
        mean_spectrum=np.zeros((flux.shape[1]))
        for i in range(flux.shape[1]) :
            w=ivar[:,i]>0
            if w.sum() > 0 :
                mean_spectrum[i]=np.median(flux[w,i])

        nbad_it=0
        sum_chi2 = 0
        # not more than max_rej_it pixels per fiber at a time
        for fib in range(nfibers) :
            w=ivar[fib,:]>0
            if w.sum()==0:
                continue
            F = flux[fib,:]*0
            w=(mean_spectrum!=0) & (ivar[fib,:]>0)
            F[w]= flux[fib,w]/mean_spectrum[w]
            try :
                smooth_fiberflat[fib,:] = spline_fit(wave,wave[w],F[w],smoothing_res,ivar[fib,w]*mean_spectrum[w]**2,max_resolution=1.5*smoothing_res)
            except ValueError as err  :
                log.error("Error when smoothing the flat")
                log.error("Setting ivar=0 for fiber {} because spline fit failed".format(fib))
                ivar[fib,:] *= 0
            chi2 = ivar[fib,:]*(flux[fib,:]-mean_spectrum*smooth_fiberflat[fib,:])**2
            w=np.isnan(chi2)
            bad=np.where(chi2>nsig_clipping**2)[0]
            if bad.size>0 :
                if bad.size>max_rej_it : # not more than 5 pixels at a time
                    ii=np.argsort(chi2[bad])
                    bad=bad[ii[-max_rej_it:]]
                ivar[fib,bad] = 0
                log.warning("1st pass: rejecting {} pixels from fiber {}".format(len(bad),fib))
                nbad[fib]+=len(bad)
                if nbad[fib]>=max_bad:
                    ivar[fib,:]=0
                    log.warning("1st pass: rejecting fiber {} due to too many (new) bad pixels".format(fib))
                nbad_it+=len(bad)

            sum_chi2+=chi2.sum()
        ndf=int((ivar>0).sum()-nwave-nfibers*(nwave/smoothing_res))
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("1st pass iter #{} chi2={}/{} chi2pdf={} nout={} (nsig={})".format(iteration,sum_chi2,ndf,chi2pdf,nbad_it,nsig_clipping))

        if nbad_it == 0 :
            break
    ## flatten fiberflat
    ## normalize smooth_fiberflat:
    mean=np.ones(smooth_fiberflat.shape[1])
    for i in range(smooth_fiberflat.shape[1]):
        w=ivar[:,i]>0
        if w.sum()>0:
            mean[i]=np.median(smooth_fiberflat[w,i])
    smooth_fiberflat = smooth_fiberflat/mean

    median_spectrum = mean_spectrum*1.

    previous_smooth_fiberflat = smooth_fiberflat*0
    previous_max_diff = 0.
    log.info("after 1st pass : nout = %d/%d"%(np.sum(ivar==0),np.size(ivar.flatten())))
    # 2nd pass is full solution including deconvolved spectrum, no outlier rejection
    for iteration in range(max_iterations) :
        ## reset sum_chi2
        sum_chi2=0
        log.info("2nd pass, iter %d : mean deconvolved spectrum"%iteration)

        # fit mean spectrum
        A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        B=np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))

        # this is to go a bit faster
        sqrtwflat=np.sqrt(ivar)*smooth_fiberflat

        # loop on fiber to handle resolution (this is long)
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("2nd pass, filling matrix, iter %d fiber %d"%(iteration,fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]
            SD.setdiag(sqrtwflat[fiber])

            sqrtwflatR = SD*R # each row r of R is multiplied by sqrtwflat[r]

            A = A+(sqrtwflatR.T*sqrtwflatR).tocsr()
            B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber])*flux[fiber])
        A_pos_def = A.todense()
        log.info("deconvolving")
        w = A.diagonal() > 0

        A_pos_def = A_pos_def[w,:]
        A_pos_def = A_pos_def[:,w]
        mean_spectrum = np.zeros(nwave)
        try:
            mean_spectrum[w]=cholesky_solve(A_pos_def,B[w])
        except:
            mean_spectrum[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
            log.info("cholesky failes, trying svd inverse in iter {}".format(iteration))

        for fiber in range(nfibers) :

            if np.sum(ivar[fiber]>0)==0 :
                continue

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            M = R.dot(mean_spectrum)
            ok=(M!=0) & (ivar[fiber,:]>0)
            if ok.sum()==0:
                continue
            try :
                smooth_fiberflat[fiber] = spline_fit(wave,wave[ok],flux[fiber,ok]/M[ok],smoothing_res,ivar[fiber,ok]*M[ok]**2,max_resolution=1.5*smoothing_res)*(ivar[fiber,:]*M**2>0)
            except ValueError as err  :
                log.error("Error when smoothing the flat")
                log.error("Setting ivar=0 for fiber {} because spline fit failed".format(fiber))
                ivar[fiber,:] *= 0
            chi2 = ivar[fiber]*(flux[fiber]-smooth_fiberflat[fiber]*M)**2
            sum_chi2 += chi2.sum()
            w=np.isnan(smooth_fiberflat[fiber])
            if w.sum()>0:
                ivar[fiber]=0
                smooth_fiberflat[fiber]=1

        # normalize to get a mean fiberflat=1
        mean = np.ones(smooth_fiberflat.shape[1])
        for i in range(nwave):
            w = ivar[:,i]>0
            if w.sum()>0:
                mean[i]=np.median(smooth_fiberflat[w,i])
        ok=np.where(mean!=0)[0]
        smooth_fiberflat[:,ok] /= mean[ok]

        # this is the max difference between two iterations
        max_diff=np.max(np.abs(smooth_fiberflat-previous_smooth_fiberflat)*(ivar>0.))
        previous_smooth_fiberflat=smooth_fiberflat.copy()

        ndf=int(np.sum(ivar>0)-nwave-nfibers*(nwave/smoothing_res))
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f"%(iteration,sum_chi2,ndf,chi2pdf))


        if max_diff<accuracy :
            break

        if np.abs(max_diff-previous_max_diff)<accuracy*0.1 :
            log.warning("no significant improvement on max diff, quit loop")
            break
        
        previous_max_diff=max_diff
        
        log.info("2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating"%(iteration,max_diff,accuracy))



    
    log.info("Total number of masked pixels=%d"%nout_tot)
    log.info("3rd pass, final computation of fiber flat")
    
    # now use mean spectrum to compute flat field correction without any smoothing
    # because sharp feature can arise if dead columns

    fiberflat=np.ones((flux.shape))
    fiberflat_ivar=np.zeros((flux.shape))
    mask=np.zeros((flux.shape), dtype='uint32')

    # reset ivar
    ivar=frame.ivar

    fiberflat_mask=12 # place holder for actual mask bit when defined

    nsig_for_mask=nsig_clipping # only mask out N sigma outliers

    for fiber in range(nfibers) :

        if np.sum(ivar[fiber]>0)==0 :
            continue

        ### R = Resolution(resolution_data[fiber])
        R = frame.R[fiber]
        M = np.array(np.dot(R.todense(),mean_spectrum)).flatten()
        fiberflat[fiber] = (M!=0)*flux[fiber]/(M+(M==0)) + (M==0)
        fiberflat_ivar[fiber] = ivar[fiber]*M**2
        nbad_tot=0
        iteration=0
        while iteration<500 :
            w=fiberflat_ivar[fiber,:]>0
            if w.sum()<100:
                break
            smooth_fiberflat=spline_fit(wave,wave[w],fiberflat[fiber,w],smoothing_res,fiberflat_ivar[fiber,w])
            chi2=fiberflat_ivar[fiber]*(fiberflat[fiber]-smooth_fiberflat)**2
            bad=np.where(chi2>nsig_for_mask**2)[0]
            if bad.size>0 :
                
                nbadmax=1
                if bad.size>nbadmax : # not more than nbadmax pixels at a time
                    ii=np.argsort(chi2[bad])
                    bad=bad[ii[-nbadmax:]]

                mask[fiber,bad] += fiberflat_mask
                fiberflat_ivar[fiber,bad] = 0.
                nbad_tot += bad.size
            else :
                break
            iteration += 1

        
        log.info("3rd pass : fiber #%d , number of iterations %d"%(fiber,iteration))
    
    
    # set median flat to 1
    log.info("3rd pass : set median fiberflat to 1")

    mean=np.ones((flux.shape[1]))
    for i in range(flux.shape[1]) :
        ok=np.where((mask[:,i]==0)&(ivar[:,i]>0))[0]
        if ok.size > 0 :
            mean[i] = np.median(fiberflat[ok,i])
    ok=np.where(mean!=0)[0]
    for fiber in range(nfibers) :
        fiberflat[fiber,ok] /= mean[ok]  

    log.info("3rd pass : interpolating over masked pixels")


    for fiber in range(nfibers) :

        if np.sum(ivar[fiber]>0)==0 :
            continue
        # replace bad by smooth fiber flat
        bad=np.where((mask[fiber]>0)|(fiberflat_ivar[fiber]==0)|(fiberflat[fiber]<minval)|(fiberflat[fiber]>maxval))[0]
        
        if bad.size>0 :

            fiberflat_ivar[fiber,bad] = 0

            # find max length of segment with bad pix
            length=0
            for i in range(bad.size) :
                ib=bad[i]
                ilength=1
                tmp=ib
                for jb in bad[i+1:] :
                    if jb==tmp+1 :
                        ilength +=1
                        tmp=jb
                    else :
                        break
                length=max(length,ilength)
            if length>10 :
                log.info("3rd pass : fiber #%d has a max length of bad pixels=%d"%(fiber,length))
            smoothing_res=float(max(100,length))
            x=np.arange(wave.size)

            ok=fiberflat_ivar[fiber]>0
            if ok.sum()==0:
                continue
            try:
                smooth_fiberflat=spline_fit(x,x[ok],fiberflat[fiber,ok],smoothing_res,fiberflat_ivar[fiber,ok])
                fiberflat[fiber,bad] = smooth_fiberflat[bad]
            except:
                fiberflat[fiber,bad] = 1
                fiberflat_ivar[fiber,bad]=0

        if nbad_tot>0 :
            log.info("3rd pass : fiber #%d masked pixels = %d (%d iterations)"%(fiber,nbad_tot,iteration))

    # set median flat to 1
    log.info("set median fiberflat to 1")

    mean=np.ones((flux.shape[1]))
    for i in range(flux.shape[1]) :
        ok=np.where((mask[:,i]==0)&(ivar[:,i]>0))[0]
        if ok.size > 0 :
            mean[i] = np.median(fiberflat[ok,i])
    ok=np.where(mean!=0)[0]
    for fiber in range(nfibers) :
        fiberflat[fiber,ok] /= mean[ok]

    log.info("done fiberflat")

    log.info("add a systematic error of 0.0035 to fiberflat variance (calibrated on sims)")
    fiberflat_ivar = (fiberflat_ivar>0)/( 1./ (fiberflat_ivar+(fiberflat_ivar==0) ) + 0.0035**2)
    
    return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum,
                     chi2pdf=chi2pdf)
Example #7
0
def polynomial_fit(z, ez, xx, yy, degx, degy):
    """
    Computes and 2D polynomial fit of z as a function of (x,y) of degrees degx and degy

    Args:
        z : ND array
        ez : ND array of same shape as z, uncertainties on z
        x : ND array of same shape as z
        y : ND array of same shape as z
        degx : int (>=0), polynomial degree along x
        degy : int (>=0), polynomial degree along y

    Returns:
        coeff : 1D array of size (degx+1)*(degy+1) with polynomial coefficients (as defined by routine monomials)
        covariance : 2D array of covariance of coeff
        error_floor : float , extra uncertainty needed to get chi2/ndf=1
        polval : ND array of same shape as z with values of pol(x,y)
        mask : ND array of same shape as z indicating the masked data points in the fit

    """
    M = monomials(x=xx, y=yy, degx=degx, degy=degy)

    error_floor = 0.

    npar = M.shape[0]
    A = np.zeros((npar, npar))
    B = np.zeros((npar))

    mask = np.ones(z.shape).astype(int)
    for loop in range(100):  # loop to increase errors

        w = 1. / (ez**2 + error_floor**2)
        w[mask == 0] = 0.

        A *= 0.
        B *= 0.
        for k in range(npar):
            B[k] = np.sum(w * z * M[k])
            for l in range(k + 1):
                A[k, l] = np.sum(w * M[k] * M[l])
                if l != k: A[l, k] = A[k, l]
        coeff = cholesky_solve(A, B)
        polval = M.T.dot(coeff)

        # compute rchi2 with median
        ndata = np.sum(w > 0)
        rchi2 = 1.4826 * np.median(
            np.sqrt(w) * np.abs(z - polval)) * ndata / float(ndata - npar)
        # std chi2
        rchi2_std = np.sum(w * (z - polval)**2) / (ndata - npar)
        #print("#%d rchi2=%f rchi2_std=%f ngood=%d nbad=%d error floor=%f"%(loop,rchi2,rchi2_std,ndata,np.sum(w==0),error_floor))

        # reject huge outliers
        nbad = 0
        rvar = w * (z - polval)**2
        worst = np.argmax(rvar)
        if rvar[worst] > 25 * max(
                rchi2, 1.2):  # cap rchi2 if starting point is very bad
            #print("remove one bad measurement at %2.1f sigmas"%np.sqrt(rvar[worst]))
            mask[worst] = 0
            nbad = 1

        if rchi2 > 1:
            if nbad == 0 or loop > 5:
                error_floor += 0.002

        if rchi2 <= 1. and nbad == 0:
            break

    # rerun chol. solve to get covariance
    coeff, covariance = cholesky_solve_and_invert(A, B)

    return coeff, covariance, error_floor, polval, mask
Example #8
0
def compute_fiberflat(frame, nsig_clipping=4.):
    """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average.
    Input data are expected to be on the same wavelenght grid, with uncorrelated noise.
    They however do not have exactly the same resolution.

    args:
        frame (desispec.Frame): input Frame object with attributes
            wave, flux, ivar, resolution_data
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    returns tuple (fiberflat, ivar, mask, meanspec):
        fiberflat : 2D[nwave, nflux] fiberflat (data have to be divided by this to be flatfielded)
        ivar : inverse variance of that fiberflat
        mask : 0=ok >0 if problems
        meanspec : deconvolved mean spectrum

    - we first iteratively :
       - compute a deconvolved mean spectrum
       - compute a fiber flat using the resolution convolved mean spectrum for each fiber
       - smooth the fiber flat along wavelength
       - clip outliers

    - then we compute a fiberflat at the native fiber resolution (not smoothed)

    - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum

    - the fiberflat is the ratio data/mean , so this flat should be divided to the data

    NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS,
    OUTLIER PIXELS, DEAD COLUMNS ...
    """
    log = get_logger()
    log.info("starting")

    #
    # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i )
    #
    # where
    # w = inverse variance
    # D = flux data (at the resolution of the fiber)
    # F = smooth fiber flat
    # R = resolution data
    # M = mean deconvolved spectrum
    #
    # M = A^{-1} B
    # with
    # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli)
    # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki
    #
    # defining R'_fi = sqrt(w_fi) F_fi R_fi
    # and      D'_fi = sqrt(w_fi) D_fi
    #
    # A = sum_(fiber f) R'_f R'_f^T
    # B = sum_(fiber f) R'_f D'_f
    # (it's faster that way, and we try to use sparse matrices as much as possible)
    #

    #- Shortcuts
    nwave = frame.nwave
    nfibers = frame.nspec
    wave = frame.wave.copy()  #- this will become part of output too
    flux = frame.flux
    ivar = frame.ivar

    # iterative fitting and clipping to get precise mean spectrum
    current_ivar = ivar.copy()

    smooth_fiberflat = np.ones((frame.flux.shape))
    chi2 = np.zeros((flux.shape))

    sqrtwflat = np.sqrt(current_ivar) * smooth_fiberflat
    sqrtwflux = np.sqrt(current_ivar) * flux

    # test
    #nfibers=20
    nout_tot = 0
    for iteration in range(20):

        # fit mean spectrum
        A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr()
        B = np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))

        # loop on fiber to handle resolution
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d fiber %d" % (iteration, fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)*flat
            SD.setdiag(sqrtwflat[fiber])

            sqrtwflatR = SD * R  # each row r of R is multiplied by sqrtwflat[r]

            A = A + (sqrtwflatR.T * sqrtwflatR).tocsr()
            B += sqrtwflatR.T * sqrtwflux[fiber]

        log.info("iter %d solving" % iteration)

        mean_spectrum = cholesky_solve(A.todense(), B)

        log.info("iter %d smoothing" % iteration)

        # fit smooth fiberflat and compute chi2
        smoothing_res = 100.  #A

        for fiber in range(nfibers):

            #if fiber%10==0 :
            #    log.info("iter %d fiber %d (smoothing)"%(iteration,fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            #M = np.array(np.dot(R.todense(),mean_spectrum)).flatten()
            M = R.dot(mean_spectrum)

            F = flux[fiber] / (M + (M == 0))
            smooth_fiberflat[fiber] = spline_fit(
                wave, wave, F, smoothing_res, current_ivar[fiber] * (M != 0))
            chi2[fiber] = current_ivar[fiber] * (
                flux[fiber] - smooth_fiberflat[fiber] * M)**2

        log.info("rejecting")

        nout_iter = 0
        if iteration < 1:
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0)
            selection = np.where(nout_per_wave > 0)[0]
            for i in selection:
                worst_entry = np.argmax(chi2[:, i])
                current_ivar[worst_entry, i] = 0
                sqrtwflat[worst_entry, i] = 0
                sqrtwflux[worst_entry, i] = 0
                nout_iter += 1

        else:
            # remove all of them at once
            bad = (chi2 > nsig_clipping**2)
            current_ivar *= (bad == 0)
            sqrtwflat *= (bad == 0)
            sqrtwflux *= (bad == 0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave - nfibers * (nwave / smoothing_res))
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" %
                 (iteration, sum_chi2, ndf, chi2pdf, nout_iter))

        # normalize to get a mean fiberflat=1
        mean = np.mean(smooth_fiberflat, axis=0)
        smooth_fiberflat = smooth_fiberflat / mean
        mean_spectrum = mean_spectrum * mean

        if nout_iter == 0:
            break

    log.info("nout tot=%d" % nout_tot)

    # now use mean spectrum to compute flat field correction without any smoothing
    # because sharp feature can arise if dead columns

    fiberflat = np.ones((flux.shape))
    fiberflat_ivar = np.zeros((flux.shape))
    mask = np.zeros((flux.shape)).astype(long)  # SOMEONE CHECK THIS !

    fiberflat_mask = 12  # place holder for actual mask bit when defined

    nsig_for_mask = 4  # only mask out 4 sigma outliers

    for fiber in range(nfibers):
        ### R = Resolution(resolution_data[fiber])
        R = frame.R[fiber]
        M = np.array(np.dot(R.todense(), mean_spectrum)).flatten()
        fiberflat[fiber] = (M != 0) * flux[fiber] / (M + (M == 0)) + (M == 0)
        fiberflat_ivar[fiber] = ivar[fiber] * M**2
        smooth_fiberflat = spline_fit(wave, wave, fiberflat[fiber],
                                      smoothing_res,
                                      current_ivar[fiber] * M**2 * (M != 0))
        bad = np.where(
            fiberflat_ivar[fiber] *
            (fiberflat[fiber] - smooth_fiberflat)**2 > nsig_for_mask**2)[0]
        if bad.size > 0:
            mask[fiber, bad] += fiberflat_mask

    return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum)
Example #9
0
def compute_polynomial_times_sky(frame,
                                 nsig_clipping=4.,
                                 max_iterations=30,
                                 model_ivar=False,
                                 add_variance=True,
                                 angular_variation_deg=1,
                                 chromatic_variation_deg=1):
    """Compute a sky model.
    
    Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j]
    
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    Optional:
        max_iterations : int , number of iterations
        model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
        add_variance : evaluate calibration error and add this to the sky model variance
        
    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log = get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave = frame.nwave
    nfibers = len(skyfibers)

    current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0)
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    input_ivar = None
    if model_ivar:
        log.info(
            "use a model of the inverse variance to remove bias due to correlated ivar and flux"
        )
        input_ivar = current_ivar.copy()
        median_ivar_vs_wave = np.median(current_ivar, axis=0)
        median_ivar_vs_fiber = np.median(current_ivar, axis=1)
        median_median_ivar = np.median(median_ivar_vs_fiber)
        for f in range(current_ivar.shape[0]):
            threshold = 0.01
            current_ivar[f] = median_ivar_vs_fiber[
                f] / median_median_ivar * median_ivar_vs_wave
            # keep input ivar for very low weights
            ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave))
            #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
            current_ivar[f][ii] = input_ivar[f][ii]

    # need focal plane coordinates
    x = frame.fibermap["FIBERASSIGN_X"]
    y = frame.fibermap["FIBERASSIGN_Y"]

    # normalize for numerical stability
    xm = np.mean(x)
    ym = np.mean(y)
    xs = np.std(x)
    ys = np.std(y)
    if xs == 0: xs = 1
    if ys == 0: ys = 1
    x = (x - xm) / xs
    y = (y - ym) / ys
    w = (frame.wave - frame.wave[0]) / (frame.wave[-1] -
                                        frame.wave[0]) * 2. - 1

    # precompute the monomials for the sky fibers
    log.debug("compute monomials for deg={} and {}".format(
        angular_variation_deg, chromatic_variation_deg))
    monomials = []
    for dx in range(angular_variation_deg + 1):
        for dy in range(angular_variation_deg + 1 - dx):
            xypol = (x**dx) * (y**dy)
            for dw in range(chromatic_variation_deg + 1):
                wpol = w**dw
                monomials.append(np.outer(xypol, wpol))

    ncoef = len(monomials)
    coef = np.zeros((ncoef))

    allfibers_monomials = np.array(monomials)
    log.debug("shape of allfibers_monomials = {}".format(
        allfibers_monomials.shape))

    skyfibers_monomials = allfibers_monomials[:, skyfibers, :]
    log.debug("shape of skyfibers_monomials = {}".format(
        skyfibers_monomials.shape))

    sqrtw = np.sqrt(current_ivar)
    sqrtwflux = sqrtw * flux

    chi2 = np.zeros(flux.shape)

    Pol = np.ones(flux.shape, dtype=float)
    coef[0] = 1.

    nout_tot = 0
    previous_chi2 = -10.
    for iteration in range(max_iterations):

        # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
        # A_ij = 1/2 d2(chi2)/di/dj
        # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]

        # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
        # B_i  = 1/2 d(chi2)/di
        # B_i  = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])

        # the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky
        # the parameters are the unconvolved sky flux at the wavelength i
        # and the polynomial coefficients

        A = np.zeros((nwave, nwave), dtype=float)
        B = np.zeros((nwave), dtype=float)
        D = scipy.sparse.lil_matrix((nwave, nwave))
        D2 = scipy.sparse.lil_matrix((nwave, nwave))

        Pol /= coef[0]  # force constant term to 1.

        # solving for the deconvolved mean sky spectrum
        # loop on fiber to handle resolution
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d sky fiber (1st fit) %d/%d" %
                         (iteration, fiber, nfibers))
            D.setdiag(sqrtw[fiber])
            D2.setdiag(Pol[fiber])
            sqrtwRP = D.dot(Rsky[fiber]).dot(
                D2)  # each row r of R is multiplied by sqrtw[r]
            A += (sqrtwRP.T * sqrtwRP).todense()
            B += sqrtwRP.T * sqrtwflux[fiber]

        log.info("iter %d solving" % iteration)
        w = A.diagonal() > 0
        A_pos_def = A[w, :]
        A_pos_def = A_pos_def[:, w]
        parameters = B * 0
        try:
            parameters[w] = cholesky_solve(A_pos_def, B[w])
        except:
            log.info("cholesky failed, trying svd in iteration {}".format(
                iteration))
            parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0]
        # parameters = the deconvolved mean sky spectrum

        # now evaluate the polynomial coefficients
        Ap = np.zeros((ncoef, ncoef), dtype=float)
        Bp = np.zeros((ncoef), dtype=float)
        D2.setdiag(parameters)
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d sky fiber  (2nd fit) %d/%d" %
                         (iteration, fiber, nfibers))
            D.setdiag(sqrtw[fiber])
            sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot(
                skyfibers_monomials[:, fiber, :].T)
            Ap += sqrtwRSM.T.dot(sqrtwRSM)
            Bp += sqrtwRSM.T.dot(sqrtwflux[fiber])

        # Add huge prior on zeroth angular order terms to converge faster
        # (because those terms are degenerate with the mean deconvolved spectrum)
        weight = 1e24
        Ap[0, 0] += weight
        Bp[0] += weight  # force 0th term to 1
        for i in range(1, chromatic_variation_deg + 1):
            Ap[i, i] += weight  # force other wavelength terms to 0

        coef = cholesky_solve(Ap, Bp)
        log.info("pol coef = {}".format(coef))

        # recompute the polynomial values
        Pol = skyfibers_monomials.T.dot(coef).T

        # chi2 and outlier rejection
        log.info("iter %d compute chi2" % iteration)
        for fiber in range(nfibers):
            chi2[fiber] = current_ivar[fiber] * (
                flux[fiber] - Rsky[fiber].dot(Pol[fiber] * parameters))**2

        log.info("rejecting")

        nout_iter = 0
        if iteration < 1:
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0)
            selection = np.where(nout_per_wave > 0)[0]
            for i in selection:
                worst_entry = np.argmax(chi2[:, i])
                current_ivar[worst_entry, i] = 0
                sqrtw[worst_entry, i] = 0
                sqrtwflux[worst_entry, i] = 0
                nout_iter += 1

        else:
            # remove all of them at once
            bad = (chi2 > nsig_clipping**2)
            current_ivar *= (bad == 0)
            sqrtw *= (bad == 0)
            sqrtwflux *= (bad == 0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave)
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf

        log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d" %
                 (iteration, sum_chi2, ndf, chi2pdf,
                  abs(sum_chi2 - previous_chi2), nout_iter))

        if nout_iter == 0 and abs(sum_chi2 - previous_chi2) < 0.2:
            break
        previous_chi2 = sum_chi2 + 0.

    log.info("nout tot=%d" % nout_tot)

    # we know have to compute the sky model for all fibers
    # and propagate the uncertainties

    # no need to restore the original ivar to compute the model errors when modeling ivar
    # the sky inverse variances are very similar

    # we ignore here the fact that we have fit a angular variation,
    # so the sky model uncertainties are inaccurate

    log.info("compute the parameter covariance")
    try:
        parameter_covar = cholesky_invert(A)
    except np.linalg.linalg.LinAlgError:
        log.warning(
            "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv"
        )
        parameter_covar = np.linalg.pinv(A)

    log.info("compute mean resolution")
    # we make an approximation for the variance to save CPU time
    # we use the average resolution of all fibers in the frame:
    mean_res_data = np.mean(frame.resolution_data, axis=0)
    Rmean = Resolution(mean_res_data)

    log.info("compute convolved sky and ivar")

    # The parameters are directly the unconvolved sky
    # First convolve with average resolution :
    convolved_sky_covar = Rmean.dot(parameter_covar).dot(Rmean.T.todense())

    # and keep only the diagonal
    convolved_sky_var = np.diagonal(convolved_sky_covar)

    # inverse
    convolved_sky_ivar = (convolved_sky_var > 0) / (convolved_sky_var +
                                                    (convolved_sky_var == 0))

    # and simply consider it's the same for all spectra
    cskyivar = np.tile(convolved_sky_ivar,
                       frame.nspec).reshape(frame.nspec, nwave)

    # The sky model for each fiber (simple convolution with resolution of each fiber)
    cskyflux = np.zeros(frame.flux.shape)

    Pol = allfibers_monomials.T.dot(coef).T
    for fiber in range(frame.nspec):
        cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber] * parameters)

    # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
    if skyfibers.size > 1 and add_variance:
        modified_cskyivar = _model_variance(frame, cskyflux, cskyivar,
                                            skyfibers)
    else:
        modified_cskyivar = cskyivar.copy()

    # need to do better here
    mask = (cskyivar == 0).astype(np.uint32)

    return SkyModel(
        frame.wave.copy(),
        cskyflux,
        modified_cskyivar,
        mask,
        nrej=nout_tot,
        stat_ivar=cskyivar)  # keep a record of the statistical ivar for QA
Example #10
0
def polynomial_fit(z,ez,xx,yy,degx,degy) :
    """
    Computes and 2D polynomial fit of z as a function of (x,y) of degrees degx and degy

    Args:
        z : ND array
        ez : ND array of same shape as z, uncertainties on z
        x : ND array of same shape as z
        y : ND array of same shape as z
        degx : int (>=0), polynomial degree along x
        degy : int (>=0), polynomial degree along y

    Returns:
        coeff : 1D array of size (degx+1)*(degy+1) with polynomial coefficients (as defined by routine monomials)
        covariance : 2D array of covariance of coeff
        error_floor : float , extra uncertainty needed to get chi2/ndf=1
        polval : ND array of same shape as z with values of pol(x,y)
        mask : ND array of same shape as z indicating the masked data points in the fit

    """
    M=monomials(x=xx,y=yy,degx=degx,degy=degy)

    error_floor = 0.

    npar=M.shape[0]
    A=np.zeros((npar,npar))
    B=np.zeros((npar))

    mask=np.ones(z.shape).astype(int)
    for loop in range(100) : # loop to increase errors

        w=1./(ez**2+error_floor**2)
        w[mask==0]=0.

        A *= 0.
        B *= 0.
        for k in range(npar) :
            B[k]=np.sum(w*z*M[k])
            for l in range(k+1) :
                A[k,l]=np.sum(w*M[k]*M[l])
                if l!=k : A[l,k]=A[k,l]
        coeff=cholesky_solve(A,B)
        polval = M.T.dot(coeff)

        # compute rchi2 with median
        ndata=np.sum(w>0)
        rchi2=1.4826*np.median(np.sqrt(w)*np.abs(z-polval))*ndata/float(ndata-npar)
        # std chi2
        rchi2_std = np.sum(w*(z-polval)**2)/(ndata-npar)
        #print("#%d rchi2=%f rchi2_std=%f ngood=%d nbad=%d error floor=%f"%(loop,rchi2,rchi2_std,ndata,np.sum(w==0),error_floor))

        # reject huge outliers
        nbad=0
        rvar=w*(z-polval)**2
        worst=np.argmax(rvar)
        if rvar[worst] > 25*max(rchi2,1.2) : # cap rchi2 if starting point is very bad
            #print("remove one bad measurement at %2.1f sigmas"%np.sqrt(rvar[worst]))
            mask[worst]=0
            nbad=1

        if rchi2>1 :
            if nbad==0 or loop>5 :
                error_floor+=0.002

        if rchi2<=1. and nbad==0 :
            break

    # rerun chol. solve to get covariance
    coeff,covariance=cholesky_solve_and_invert(A,B)


    return coeff,covariance,error_floor,polval,mask
Example #11
0
def compute_non_uniform_sky(frame, nsig_clipping=4.,max_iterations=10,model_ivar=False,add_variance=True,angular_variation_deg=1) :
    """Compute a sky model.
    
    Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... )
    
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    Optional:
        max_iterations : int , number of iterations
        model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
        add_variance : evaluate calibration error and add this to the sky model variance
        angular_variation_deg  : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength
    
    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log=get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave=frame.nwave
    nfibers=len(skyfibers)

    current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0)
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]
    
    
    # need focal plane coordinates of fibers
    x = frame.fibermap["DESIGN_X"][skyfibers]
    y = frame.fibermap["DESIGN_Y"][skyfibers]
    # normalize for numerical stability
    xm = np.mean(frame.fibermap["DESIGN_X"])
    ym = np.mean(frame.fibermap["DESIGN_Y"])
    xs = np.std(frame.fibermap["DESIGN_X"])
    ys = np.std(frame.fibermap["DESIGN_Y"])
    if xs==0 : xs = 1
    if ys==0 : ys = 1
    x = (x-xm)/xs
    y = (y-ym)/ys

    # precompute the monomials for the sky fibers
    log.debug("compute monomials for deg={}".format(angular_variation_deg))
    monomials=[]
    for dx in range(angular_variation_deg+1) :
        for dy in range(angular_variation_deg+1-dx) :
            monomials.append((x**dx)*(y**dy))
    ncoef=len(monomials)
    monomials=np.array(monomials)
        
    
    input_ivar=None 
    if model_ivar :
        log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux")
        input_ivar=current_ivar.copy()
        median_ivar_vs_wave  = np.median(current_ivar,axis=0)
        median_ivar_vs_fiber = np.median(current_ivar,axis=1)
        median_median_ivar   = np.median(median_ivar_vs_fiber)
        for f in range(current_ivar.shape[0]) :
            threshold=0.01
            current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave
            # keep input ivar for very low weights
            ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave))
            #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))                      
            current_ivar[f][ii] = input_ivar[f][ii]
    

    sqrtw=np.sqrt(current_ivar)
    sqrtwflux=sqrtw*flux

    chi2=np.zeros(flux.shape)

    
    
    
    nout_tot=0
    for iteration in range(max_iterations) :

        # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
        # A_ij = 1/2 d2(chi2)/di/dj
        # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]
        
        # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
        # B_i  = 1/2 d(chi2)/di
        # B_i  = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])
        
        # with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky)
        # the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber
        # sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p)
        # sky(fiber,i) =  sum_p monom[fiber,p] *  a_ip
        # the convolved sky flux at wavelength w is 
        # model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] *  a_ip
        # model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip
        # 
        # so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where
        # A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t
        # similarily
        # B[p]  =  sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave]
        
        A=np.zeros((nwave*ncoef,nwave*ncoef))
        B=np.zeros((nwave*ncoef))
        
        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))
        
        # loop on fiber to handle resolution
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]

            #wRtR=(sqrtwR.T*sqrtwR).tocsr()
            wRtR=(sqrtwR.T*sqrtwR).todense()
            wRtF=sqrtwR.T*sqrtwflux[fiber]
            # loop on polynomial coefficients (double loop for A)
            # fill only blocks of A and B
            for p in range(ncoef) :
                for k in range(ncoef) :
                    A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave] += monomials[p,fiber]*monomials[k,fiber]*wRtR
                B[p*nwave:(p+1)*nwave] += monomials[p,fiber]*wRtF
                
        log.info("iter %d solving"%iteration)
        w = A.diagonal()>0
        A_pos_def = A[w,:]
        A_pos_def = A_pos_def[:,w]
        parameters = B*0
        try:
            parameters[w]=cholesky_solve(A_pos_def,B[w])
        except:
            log.info("cholesky failed, trying svd in iteration {}".format(iteration))
            parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
        
        log.info("iter %d compute chi2"%iteration)

        for fiber in range(nfibers) :
            # loop on polynomial indices
            unconvolved_fiber_sky_flux = np.zeros(nwave)
            for p in range(ncoef) :
                unconvolved_fiber_sky_flux += monomials[p,fiber]*parameters[p*nwave:(p+1)*nwave]
            # then convolve
            fiber_convolved_sky_flux = Rsky[fiber].dot(unconvolved_fiber_sky_flux)
            
            chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2
            
        log.info("rejecting")

        nout_iter=0
        if iteration<1 :
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
            selection=np.where(nout_per_wave>0)[0]
            for i in selection :
                worst_entry=np.argmax(chi2[:,i])
                current_ivar[worst_entry,i]=0
                sqrtw[worst_entry,i]=0
                sqrtwflux[worst_entry,i]=0
                nout_iter += 1

        else :
            # remove all of them at once
            bad=(chi2>nsig_clipping**2)
            current_ivar *= (bad==0)
            sqrtw *= (bad==0)
            sqrtwflux *= (bad==0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave)
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))

        if nout_iter == 0 :
            break

    log.info("nout tot=%d"%nout_tot)


    # we know have to compute the sky model for all fibers
    # and propagate the uncertainties

    # no need to restore the original ivar to compute the model errors when modeling ivar
    # the sky inverse variances are very similar
    
    # is there a different method to compute this ?
    log.info("compute covariance")
    try :
        parameter_covar=cholesky_invert(A)
    except np.linalg.linalg.LinAlgError :
        log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv")
        parameter_covar = np.linalg.pinv(A)
    
    log.info("compute mean resolution")
    # we make an approximation for the variance to save CPU time
    # we use the average resolution of all fibers in the frame:
    mean_res_data=np.mean(frame.resolution_data,axis=0)
    Rmean = Resolution(mean_res_data)
    
    log.info("compute convolved sky and ivar")
        
    cskyflux = np.zeros(frame.flux.shape)
    cskyivar = np.zeros(frame.flux.shape)

    log.info("compute convolved parameter covariance")
    # The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave
    # A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k
    # We first sandwich each block with the average resolution.
    convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave))
    for p in range(ncoef) :
        for k in range(ncoef) :
            convolved_parameter_covar[p,k] = np.diagonal(Rmean.dot(parameter_covar[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rmean.T.todense()))
    
    '''
    import astropy.io.fits as pyfits
    pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True)
    
    # other approach
    log.info("dense Rmean...")
    Rmean=Rmean.todense()
    log.info("invert Rinv...")
    Rinv=np.linalg.inv(Rmean)
    # check this
    print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean)))
    convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave))
    for p in range(ncoef) :
        for k in range(ncoef) :
            convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv))
    # solve for each wave separately
    convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave))
    for i in range(nwave) :
        print("inverting ivar of wave %d/%d"%(i,nwave))
        convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i])
    pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True)
    import sys
    sys.exit(12)
    '''
    
    # Now we compute the sky model variance for each fiber individually
    # accounting for its focal plane coordinates
    # so that a target fiber distant for a sky fiber will naturally have a larger
    # sky model variance
    log.info("compute sky and variance per fiber")        
    for i in range(frame.nspec):
        # compute monomials
        M = []
        xi=(frame.fibermap["DESIGN_X"][i]-xm)/xs
        yi=(frame.fibermap["DESIGN_Y"][i]-ym)/ys
        for dx in range(angular_variation_deg+1) :
            for dy in range(angular_variation_deg+1-dx) :
                M.append((xi**dx)*(yi**dy))
        M = np.array(M)

        unconvolved_fiber_sky_flux=np.zeros(nwave)
        convolved_fiber_skyvar=np.zeros(nwave)
        for p in range(ncoef) :
            unconvolved_fiber_sky_flux += M[p]*parameters[p*nwave:(p+1)*nwave]
            for k in range(ncoef) :
                convolved_fiber_skyvar += M[p]*M[k]*convolved_parameter_covar[p,k]

        # convolve sky model with this fiber's resolution
        cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux)

        # save inverse of variance
        cskyivar[i] = (convolved_fiber_skyvar>0)/(convolved_fiber_skyvar+(convolved_fiber_skyvar==0))

    
    # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
    if skyfibers.size > 1 and add_variance :
        modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers)
    else :
        modified_cskyivar = cskyivar.copy()
    
    # need to do better here
    mask = (cskyivar==0).astype(np.uint32)
    
    return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask,
                    nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
Example #12
0
def main() :

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-i','--infile', type = str, default = None, required=True,
                        help = 'path to zzbest.fits file')
    parser.add_argument('-o','--outfile', type = str, default = None, required=True,
                        help = 'path to output json file')
    args = parser.parse_args()
    log  = get_logger()
    
    hdulist=fits.open(args.infile)

    # find list of lines from table keywords
    keys=hdulist[1].columns.names
    table=hdulist[1].data
    ok=np.where(table["ZWARN"]==0)[0]
    table=table[ok]
    
    lines=[]
    for k in keys :
        if k.find("BEST_FLUX_")==0 and  k.find("BEST_FLUX_ERR")<0 :
            numbers=re.findall(r'\d+',k)
            if len(numbers)==1 :
                lines.append(string.atoi(numbers[0]))
    lines=np.unique(np.array(lines))
    log.info("lines in file: %s"%str(lines))

    


    
    oIIline1=3727
    oIIline2=3729
    try :
        oIIflux=table["BEST_FLUX_%dA"%oIIline1]+table["BEST_FLUX_%dA"%oIIline2]
        oIIerr=np.sqrt(table["BEST_FLUX_%dA"%oIIline1]**2+table["BEST_FLUX_%dA"%oIIline2]**2)
    except KeyError :
        log.error("cannot compute oII flux")
        log.error(sys.exc_info())
        sys.exit(12)
    
    # first step : compute an average set of line ratios
    # by scaling fluxes wrt to oII
    # we will then use this average set of line ratios to normalize 
    # all entries and then start the pca
    selection=np.where((oIIflux>0)&(oIIerr>0))[0]
    if selection.size == 0 :
        log.error("no entry with valid oII flux")
        sys.exit(12)
    
     
    flux=np.zeros((selection.size,lines.size))
    ivar=np.zeros((selection.size,lines.size))
    for i in range(lines.size) :
        flux[:,i]=table["BEST_FLUX_%dA"%lines[i]][selection]/oIIflux[selection]
        var=(table["BEST_FLUX_ERR_%dA"%lines[i]][selection]/oIIflux[selection])**2
        # account for error on oIIflux
        var += (flux[:,i]*oIIerr[selection]/oIIflux[selection])**2        
        mask=np.where(var>0)[0]
        ivar[mask,i]=1./var[mask]

    # test : do not weight with ivar because redshift dependence blurs the picture
    no_weight = True

    if no_weight :
        ivar=(ivar>0)/(0.001)**2
    

    
    # this is the mean line ratios
    sivar=np.sum(ivar,axis=0)
    ok=np.where(sivar>0)[0]
    lines=lines[ok]
    mean_flux_wrt_oII=np.sum(ivar*flux,axis=0)[ok]/sivar[ok]
    err_flux_wrt_oII=1./np.sqrt(sivar[ok])
    
    # refit the amp of each galaxy wrt to mean_flux_wrt_oII
    ngal=table.size
    log.info("number of galaxies = %d"%ngal)
    
    
    # fill array
    flux=np.zeros((ngal,lines.size))
    ivar=np.zeros((ngal,lines.size))
    for i in range(lines.size) :
        flux[:,i]=table["BEST_FLUX_%dA"%lines[i]]
        var=(table["BEST_FLUX_ERR_%dA"%lines[i]])**2
        ok=np.where(var>0)[0]
        ivar[ok,i]=1./var[ok]
    
    if no_weight :
        ivar=(ivar>0)/(0.001)**2
        
    # for each gal, fit scale and apply it
    a=np.sum(ivar*mean_flux_wrt_oII**2,axis=1)
    b=np.sum(ivar*mean_flux_wrt_oII*flux,axis=1)
    scale=b/(a+(a==0))
    
    for i in range(ngal) :
        if scale[i] > 0 :
            flux[i] /= scale[i]
            ivar[i] *= scale[i]**2
        else :
            flux[i]=0.
            ivar[i]=0.

    dchi2min=1.
    if no_weight :
        ivar=(ivar>0)/(0.001)**2
    
    a    = np.sum(ivar,axis=0)
    mean = np.sum(ivar*flux,axis=0)/a
    
    residuals=flux-mean
    tmpres=residuals.copy()
    
    # now we can try to do some sort of pca
    eigenvectors=np.zeros((lines.size,lines.size))
    coefs=np.zeros((ngal,lines.size))
    
    bb=np.zeros((lines.size))
    aa=np.zeros((lines.size))
    chi2=1e20
    for e in range(lines.size) :
        
        eigenvectors[e]=np.ones(lines.size) # 
        eigenvectors[e] /= np.sqrt(np.sum(eigenvectors[e]**2))

        # orthogonalize         
        for i in range(e) :
            prod=np.inner(eigenvectors[e],eigenvectors[i])
            eigenvectors[e] -= eigenvectors[i]
        # normalize
        eigenvectors[e]  /= np.sqrt(np.sum(eigenvectors[e]**2))
        
        A=np.zeros((e+1,e+1)).astype(float)
        B=np.zeros((e+1)).astype(float)
        
        for loop in range(500) :
            # refit coordinates, including previous ones
            for g in range(ngal) :
                #log.debug("%d/%d"%(g,ngal))
                A *= 0.
                B *= 0.
                for i in range(e+1) :
                    B[i]=np.sum(ivar[g]*eigenvectors[i]*residuals[g])
                    for j in range(e+1) :
                        A[i,j]=np.sum(ivar[g]*eigenvectors[i]*eigenvectors[j])
                    A[i,i] += 0.00001 # weak prior
                try :
                    coefs[g,:e+1]=cholesky_solve(A,B)
                except :
                    log.warning("cholesky_solve error")
                    print "A=",A
                    print "B=",B
                    print "ivar=",ivar[g]
                    print "eigenvectors[e]=",eigenvectors[e]
                    sys.exit(12)
                    log.warning(sys.exc_info())
                    coefs[g]=0.
                    pass
                # update residuals
                tmpres[g] = residuals[g]
                for i in range(e) :
                    tmpres[g] -= coefs[g,i]*eigenvectors[i]
            
            old=eigenvectors[e].copy()
            
            # refit this eigen vectors
            #tmpres = residuals.copy()
            for i in [e] : #range(e+1) :
                aa *= 0.
                bb *= 0.            
                for l in range(lines.size) : 
                    bb[l]=np.sum(ivar[:,l]*coefs[:,i]*tmpres[:,l])
                    aa[l]=np.sum(ivar[:,l]*coefs[:,i]**2)
                newvect=(aa>0)*bb/(aa+(aa==0))
                        
                
                # orthogonalize         
                for j in range(i) :
                    prod=np.inner(newvect,eigenvectors[j])
                    newvect -= prod*eigenvectors[j]
                    coefs[:,j] += prod*coefs[:,i]
                    for g in range(ngal) :
                        tmpres[g] -= prod*coefs[g,i]*eigenvectors[j]
                # normalize
                norme = np.sqrt(np.sum(newvect**2))
                newvect /= norme
                coefs[:,i] *= norme
                
                eigenvectors[i]=newvect
                
                # update tmpres
                for g in range(ngal) :
                    tmpres[g] -= coefs[g,i]*eigenvectors[i]
            
            oldchi2=chi2
            chi2=np.sum(ivar*tmpres**2)
            ndf=np.sum(ivar>0)-(e+1)
            dchi2=oldchi2-chi2
            dist=np.max(np.abs(old-eigenvectors[e]))
            if dist<1e-4 or dchi2<dchi2min :
                break
            for i in [e] : #range(e+1) :
                log.info("#%d-%d chi2=%f chi2/ndf=%f dchi2=%f %s"%(i,loop,chi2,chi2/ndf,dchi2,str(eigenvectors[i])))
    
    fits.writeto("coefs.fits",coefs,clobber=True)
    log.info("wrote coefs.fits")
    file=open(args.outfile,"w")
    file.write('"pca":{\n')
    file.write('"lines": [')
    for l in lines :
        if l != lines[0] :
            file.write(",")
        file.write("%d"%l)
    file.write('],\n')
    file.write('"mean_flux": [')
    for  e in range(eigenvectors.shape[0]) :
        if e>0 :
            file.write(",")
        file.write("%f"%mean[e])
    file.write('],\n')
    
    file.write('"components": [\n')
    for e in range(eigenvectors.shape[0]) :
        file.write('[')
        for i in range(eigenvectors.shape[1]) :
            if i>0 :
                file.write(",")
            file.write("%f"%eigenvectors[e,i])
        if e<eigenvectors.shape[0]-1 :
            file.write('],\n')
        else :
            file.write(']\n')
    file.write('],\n')
    file.write('"mean_coef": [')
    for e in range(eigenvectors.shape[0]) :
        if e>0 :
            file.write(",")
        file.write("%f"%np.mean(coefs[:,e]))
    file.write('],\n')
    file.write('"rms_coef": [')
    for e in range(eigenvectors.shape[0]) :
        if e>0 :
            file.write(",")
        file.write("%f"%np.std(coefs[:,e]))
    file.write('],\n')
    file.write('"min_coef": [')
    for e in range(eigenvectors.shape[0]) :
        if e>0 :
            file.write(",")
        file.write("%f"%np.min(coefs[:,e]))
    file.write('],\n')
    file.write('"max_coef": [')
    for e in range(eigenvectors.shape[0]) :
        if e>0 :
            file.write(",")
        file.write("%f"%np.max(coefs[:,e]))
    file.write(']\n')
    file.write('}\n')
    
    file.close()
    log.info("wrote %s"%args.outfile)
Example #13
0
def compute_polynomial_times_sky(frame, nsig_clipping=4.,max_iterations=30,model_ivar=False,add_variance=True,angular_variation_deg=1,chromatic_variation_deg=1) :
    """Compute a sky model.
    
    Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j]
    
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    Optional:
        max_iterations : int , number of iterations
        model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
        add_variance : evaluate calibration error and add this to the sky model variance
        
    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log=get_logger()
    log.info("starting")
    
    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers
    
    nwave=frame.nwave
    nfibers=len(skyfibers)

    current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0)
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]
    

    input_ivar=None 
    if model_ivar :
        log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux")
        input_ivar=current_ivar.copy()
        median_ivar_vs_wave  = np.median(current_ivar,axis=0)
        median_ivar_vs_fiber = np.median(current_ivar,axis=1)
        median_median_ivar   = np.median(median_ivar_vs_fiber)
        for f in range(current_ivar.shape[0]) :
            threshold=0.01
            current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave
            # keep input ivar for very low weights
            ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave))
            #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
            current_ivar[f][ii] = input_ivar[f][ii]
    
    # need focal plane coordinates
    x = frame.fibermap["DESIGN_X"]
    y = frame.fibermap["DESIGN_Y"]
    
    # normalize for numerical stability
    xm = np.mean(x)
    ym = np.mean(y)
    xs = np.std(x)
    ys = np.std(y)
    if xs==0 : xs = 1
    if ys==0 : ys = 1
    x = (x-xm)/xs
    y = (y-ym)/ys
    w = (frame.wave-frame.wave[0])/(frame.wave[-1]-frame.wave[0])*2.-1
    
    # precompute the monomials for the sky fibers
    log.debug("compute monomials for deg={} and {}".format(angular_variation_deg,chromatic_variation_deg))
    monomials=[]
    for dx in range(angular_variation_deg+1) :
        for dy in range(angular_variation_deg+1-dx) :
            xypol = (x**dx)*(y**dy)
            for dw in range(chromatic_variation_deg+1) :
                wpol=w**dw
                monomials.append(np.outer(xypol,wpol))
                
    ncoef=len(monomials)
    coef=np.zeros((ncoef))
    
    allfibers_monomials=np.array(monomials)
    log.debug("shape of allfibers_monomials = {}".format(allfibers_monomials.shape))
    
    skyfibers_monomials = allfibers_monomials[:,skyfibers,:]
    log.debug("shape of skyfibers_monomials = {}".format(skyfibers_monomials.shape))
    
    
    sqrtw=np.sqrt(current_ivar)
    sqrtwflux=sqrtw*flux

    chi2=np.zeros(flux.shape)

    Pol     = np.ones(flux.shape,dtype=float)
    coef[0] = 1.
    
    nout_tot=0
    previous_chi2=-10.
    for iteration in range(max_iterations) :
        
        # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
        # A_ij = 1/2 d2(chi2)/di/dj
        # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]
        
        # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
        # B_i  = 1/2 d(chi2)/di
        # B_i  = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])
        
        # the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky
        # the parameters are the unconvolved sky flux at the wavelength i
        # and the polynomial coefficients
        
        A=np.zeros((nwave,nwave),dtype=float)
        B=np.zeros((nwave),dtype=float)
        D=scipy.sparse.lil_matrix((nwave,nwave))
        D2=scipy.sparse.lil_matrix((nwave,nwave))
        
        Pol /= coef[0] # force constant term to 1.
        
        # solving for the deconvolved mean sky spectrum
        # loop on fiber to handle resolution
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d sky fiber (1st fit) %d/%d"%(iteration,fiber,nfibers))
            D.setdiag(sqrtw[fiber])
            D2.setdiag(Pol[fiber])
            sqrtwRP = D.dot(Rsky[fiber]).dot(D2) # each row r of R is multiplied by sqrtw[r]
            A += (sqrtwRP.T*sqrtwRP).todense()
            B += sqrtwRP.T*sqrtwflux[fiber]
        
        log.info("iter %d solving"%iteration)
        w = A.diagonal()>0
        A_pos_def = A[w,:]
        A_pos_def = A_pos_def[:,w]
        parameters = B*0
        try:
            parameters[w]=cholesky_solve(A_pos_def,B[w])
        except:
            log.info("cholesky failed, trying svd in iteration {}".format(iteration))
            parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
        # parameters = the deconvolved mean sky spectrum
        
        # now evaluate the polynomial coefficients
        Ap=np.zeros((ncoef,ncoef),dtype=float)
        Bp=np.zeros((ncoef),dtype=float)
        D2.setdiag(parameters)
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d sky fiber  (2nd fit) %d/%d"%(iteration,fiber,nfibers))
            D.setdiag(sqrtw[fiber])
            sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot(skyfibers_monomials[:,fiber,:].T)
            Ap += sqrtwRSM.T.dot(sqrtwRSM)
            Bp += sqrtwRSM.T.dot(sqrtwflux[fiber])
        
        # Add huge prior on zeroth angular order terms to converge faster
        # (because those terms are degenerate with the mean deconvolved spectrum)    
        weight=1e24
        Ap[0,0] += weight
        Bp[0]   += weight # force 0th term to 1
        for i in range(1,chromatic_variation_deg+1) :
            Ap[i,i] += weight # force other wavelength terms to 0
        

        coef=cholesky_solve(Ap,Bp)
        log.info("pol coef = {}".format(coef))
        
        # recompute the polynomial values
        Pol = skyfibers_monomials.T.dot(coef).T
        
        # chi2 and outlier rejection
        log.info("iter %d compute chi2"%iteration)
        for fiber in range(nfibers) :
            chi2[fiber]=current_ivar[fiber]*(flux[fiber]-Rsky[fiber].dot(Pol[fiber]*parameters))**2
        
        log.info("rejecting")

        nout_iter=0
        if iteration<1 :
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
            selection=np.where(nout_per_wave>0)[0]
            for i in selection :
                worst_entry=np.argmax(chi2[:,i])
                current_ivar[worst_entry,i]=0
                sqrtw[worst_entry,i]=0
                sqrtwflux[worst_entry,i]=0
                nout_iter += 1

        else :
            # remove all of them at once
            bad=(chi2>nsig_clipping**2)
            current_ivar *= (bad==0)
            sqrtw *= (bad==0)
            sqrtwflux *= (bad==0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave)
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        
        log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,abs(sum_chi2-previous_chi2),nout_iter))

        if nout_iter == 0 and abs(sum_chi2-previous_chi2)<0.2 :
            break
        previous_chi2 = sum_chi2+0.
        

    log.info("nout tot=%d"%nout_tot)
    
    # we know have to compute the sky model for all fibers
    # and propagate the uncertainties

    # no need to restore the original ivar to compute the model errors when modeling ivar
    # the sky inverse variances are very similar
    
    # we ignore here the fact that we have fit a angular variation,
    # so the sky model uncertainties are inaccurate
    
    log.info("compute the parameter covariance")
    try :
        parameter_covar=cholesky_invert(A)
    except np.linalg.linalg.LinAlgError :
        log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv")
        parameter_covar = np.linalg.pinv(A)
    
    log.info("compute mean resolution")
    # we make an approximation for the variance to save CPU time
    # we use the average resolution of all fibers in the frame:
    mean_res_data=np.mean(frame.resolution_data,axis=0)
    Rmean = Resolution(mean_res_data)
    
    log.info("compute convolved sky and ivar")
    
    # The parameters are directly the unconvolved sky
    # First convolve with average resolution :
    convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense())
        
    # and keep only the diagonal
    convolved_sky_var=np.diagonal(convolved_sky_covar)
        
    # inverse
    convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0))
    
    # and simply consider it's the same for all spectra
    cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave)

    # The sky model for each fiber (simple convolution with resolution of each fiber)
    cskyflux = np.zeros(frame.flux.shape)
    
    Pol = allfibers_monomials.T.dot(coef).T
    for fiber in range(frame.nspec):
        cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber]*parameters)
        
    # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
    if skyfibers.size > 1 and add_variance :
        modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers)
    else :
        modified_cskyivar = cskyivar.copy()
    
    # need to do better here
    mask = (cskyivar==0).astype(np.uint32)
    
    return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask,
                    nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
Example #14
0
def compute_uniform_sky(frame, nsig_clipping=4.,max_iterations=100,model_ivar=False,add_variance=True) :
    """Compute a sky model.
    
    Sky[fiber,i] = R[fiber,i,j] Flux[j]
    
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    Optional:
        max_iterations : int , number of iterations
        model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
        add_variance : evaluate calibration error and add this to the sky model variance
        
    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log=get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave=frame.nwave
    nfibers=len(skyfibers)

    current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0)
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]
    
    input_ivar=None 
    if model_ivar :
        log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux")
        input_ivar=current_ivar.copy()
        median_ivar_vs_wave  = np.median(current_ivar,axis=0)
        median_ivar_vs_fiber = np.median(current_ivar,axis=1)
        median_median_ivar   = np.median(median_ivar_vs_fiber)
        for f in range(current_ivar.shape[0]) :
            threshold=0.01
            current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave
            # keep input ivar for very low weights
            ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave))
            #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
            current_ivar[f][ii] = input_ivar[f][ii]
    

    sqrtw=np.sqrt(current_ivar)
    sqrtwflux=sqrtw*flux

    chi2=np.zeros(flux.shape)

    
    
    
    nout_tot=0
    for iteration in range(max_iterations) :
        
        # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
        # A_ij = 1/2 d2(chi2)/di/dj
        # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]
        
        # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
        # B_i  = 1/2 d(chi2)/di
        # B_i  = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])
        
        # the model is model[fiber]=R[fiber]*sky
        # and the parameters are the unconvolved sky flux at the wavelength i
        
        # so, d(model)/di[fiber,w] = R[fiber][w,i]
        # this gives
        # A_ij = sum_fiber  sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j]
        # A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t
        # A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t
        # and 
        # B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w]
        # B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave]
        
        #A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        A=np.zeros((nwave,nwave))
        B=np.zeros((nwave))
        
        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))
        
        # loop on fiber to handle resolution
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]
            A += (sqrtwR.T*sqrtwR).todense()
            B += sqrtwR.T*sqrtwflux[fiber]
                        
        log.info("iter %d solving"%iteration)
        w = A.diagonal()>0
        A_pos_def = A[w,:]
        A_pos_def = A_pos_def[:,w]
        parameters = B*0
        try:
            parameters[w]=cholesky_solve(A_pos_def,B[w])
        except:
            log.info("cholesky failed, trying svd in iteration {}".format(iteration))
            parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
        
        log.info("iter %d compute chi2"%iteration)

        for fiber in range(nfibers) :
            # the parameters are directly the unconvolve sky flux
            # so we simply have to reconvolve it
            fiber_convolved_sky_flux = Rsky[fiber].dot(parameters)
            chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2
            
        log.info("rejecting")

        nout_iter=0
        if iteration<1 :
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
            selection=np.where(nout_per_wave>0)[0]
            for i in selection :
                worst_entry=np.argmax(chi2[:,i])
                current_ivar[worst_entry,i]=0
                sqrtw[worst_entry,i]=0
                sqrtwflux[worst_entry,i]=0
                nout_iter += 1

        else :
            # remove all of them at once
            bad=(chi2>nsig_clipping**2)
            current_ivar *= (bad==0)
            sqrtw *= (bad==0)
            sqrtwflux *= (bad==0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave)
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))

        if nout_iter == 0 :
            break

    log.info("nout tot=%d"%nout_tot)


    # we know have to compute the sky model for all fibers
    # and propagate the uncertainties

    # no need to restore the original ivar to compute the model errors when modeling ivar
    # the sky inverse variances are very similar
    
    log.info("compute the parameter covariance")
    # we may have to use a different method to compute this
    # covariance
   
    try :
        parameter_covar=cholesky_invert(A)
        # the above is too slow
        # maybe invert per block, sandwich by R 
    except np.linalg.linalg.LinAlgError :
        log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv")
        parameter_covar = np.linalg.pinv(A)
    
    log.info("compute mean resolution")
    # we make an approximation for the variance to save CPU time
    # we use the average resolution of all fibers in the frame:
    mean_res_data=np.mean(frame.resolution_data,axis=0)
    Rmean = Resolution(mean_res_data)
    
    log.info("compute convolved sky and ivar")
    
    # The parameters are directly the unconvolved sky
    # First convolve with average resolution :
    convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense())
        
    # and keep only the diagonal
    convolved_sky_var=np.diagonal(convolved_sky_covar)
        
    # inverse
    convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0))
    
    # and simply consider it's the same for all spectra
    cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave)

    # The sky model for each fiber (simple convolution with resolution of each fiber)
    cskyflux = np.zeros(frame.flux.shape)
    for i in range(frame.nspec):
        cskyflux[i] = frame.R[i].dot(parameters)
        
    # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
    if skyfibers.size > 1 and add_variance :
        modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers)
    else :
        modified_cskyivar = cskyivar.copy()
    
    # need to do better here
    mask = (cskyivar==0).astype(np.uint32)
    
    return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask,
                    nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
Example #15
0
def compute_sky(fframe,fibermap=None,nsig_clipping=4., apply_resolution=False):
    """
    Adding in the offline algorithm here to be able to apply resolution for sky compute.
    We will update this here as needed for quicklook.
    The original weighted sky compute still is the default.

    Args: fframe: fiberflat fielded frame object
          fibermap: fibermap object
          apply_resolution: if True, uses the resolution in the frame object to evaluate
          sky allowing fiber to fiber variation of resolution.
    """
    nspec=fframe.nspec
    nwave=fframe.nwave

    #- Check with fibermap. exit if None
    #- use fibermap from frame itself if exists

    if fframe.fibermap is not None:
        fibermap=fframe.fibermap

    if fibermap is None:
        print("Must have fibermap for Sky compute")
        sys.exit(0)

    #- get the sky
    skyfibers = np.where(fibermap['OBJTYPE'] == 'SKY')[0]
    skyfluxes=fframe.flux[skyfibers]
    skyivars=fframe.ivar[skyfibers]
    
   
    nfibers=len(skyfibers)

    if apply_resolution:
        max_iterations=100
        current_ivar=skyivars.copy()
        Rsky = fframe.R[skyfibers] 
        sqrtw=np.sqrt(skyivars)
        sqrtwflux=sqrtw*skyfluxes

        chi2=np.zeros(skyfluxes.shape)

        nout_tot=0
        for iteration in range(max_iterations) :

            A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
            B=np.zeros((nwave))
            # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
            SD=scipy.sparse.lil_matrix((nwave,nwave))
            # loop on fiber to handle resolution
            for fiber in range(nfibers) :
                if fiber%10==0 :
                    print("iter %d fiber %d"%(iteration,fiber))
                R = Rsky[fiber]

                # diagonal sparse matrix with content = sqrt(ivar)
                SD.setdiag(sqrtw[fiber])

                sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]

                A = A+(sqrtwR.T*sqrtwR).tocsr()
                B += sqrtwR.T*sqrtwflux[fiber]

            print("iter %d solving"%iteration)
    
            w = A.diagonal()>0
            A_pos_def = A.todense()[w,:]
            A_pos_def = A_pos_def[:,w]
            skyflux = B*0
            try:
                skyflux[w]=cholesky_solve(A_pos_def,B[w],rcond=None)
            except:
                print("cholesky failed, trying svd in iteration {}".format(iteration))
                skyflux[w]=np.linalg.lstsq(A_pos_def,B[w],rcond=None)[0]

            print("iter %d compute chi2"%iteration)

            for fiber in range(nfibers) :

                S = Rsky[fiber].dot(skyflux)
                chi2[fiber]=current_ivar[fiber]*(skyfluxes[fiber]-S)**2

            print("rejecting")

            nout_iter=0
            if iteration<1 :
                # only remove worst outlier per wave
                # apply rejection iteratively, only one entry per wave among fibers
                # find waves with outlier (fastest way)
                nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
                selection=np.where(nout_per_wave>0)[0]
                for i in selection :
                    worst_entry=np.argmax(chi2[:,i])
                    current_ivar[worst_entry,i]=0
                    sqrtw[worst_entry,i]=0
                    sqrtwflux[worst_entry,i]=0
                    nout_iter += 1
            else :
                # remove all of them at once
                bad=(chi2>nsig_clipping**2)
                current_ivar *= (bad==0)
                sqrtw *= (bad==0)
                sqrtwflux *= (bad==0)
                nout_iter += np.sum(bad)

            nout_tot += nout_iter

            sum_chi2=float(np.sum(chi2))
            ndf=int(np.sum(chi2>0)-nwave)
            chi2pdf=0.
            if ndf>0 :
                chi2pdf=sum_chi2/ndf
            print("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))

            if nout_iter == 0 :
                break

        print("nout tot=%d"%nout_tot)
        # solve once again to get deconvolved sky variance
        #skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B)
        skyflux = np.linalg.lstsq(A.todense(),B,rcond=None)[0]
        skycovar = np.linalg.pinv(A.todense())
        #- sky inverse variance, but incomplete and not needed anyway
        # skyvar=np.diagonal(skycovar)
        # skyivar=(skyvar>0)/(skyvar+(skyvar==0))

        # Use diagonal of skycovar convolved with mean resolution of all fibers
        # first compute average resolution
        #- computing mean from matrix itself
        R= (fframe.R.sum()/fframe.nspec).todia()
        #mean_res_data=np.mean(fframe.resolution_data,axis=0)
        #R = Resolution(mean_res_data)
        # compute convolved sky and ivar
        cskycovar=R.dot(skycovar).dot(R.T.todense())
        cskyvar=np.diagonal(cskycovar)
        cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0))

        # convert cskyivar to 2D; today it is the same for all spectra,
        # but that may not be the case in the future
        finalskyivar = np.tile(cskyivar, nspec).reshape(nspec, nwave)

        # Convolved sky
        finalskyflux = np.zeros(fframe.flux.shape)
        for i in range(nspec):
            finalskyflux[i] = fframe.R[i].dot(skyflux)

        # need to do better here
        mask = (finalskyivar==0).astype(np.uint32)

    else: #- compute weighted average sky ignoring the fiber/wavelength resolution 
        if skyfibers.shape[0] > 1:

            weights=skyivars
            #- now get weighted meansky and ivar
            meanskyflux=np.average(skyfluxes,axis=0,weights=weights)
            wtot=weights.sum(axis=0)
            werr2=(weights**2*(skyfluxes-meanskyflux)**2).sum(axis=0)
            werr=np.sqrt(werr2)/wtot
            meanskyivar=1./werr**2
        else:
            meanskyflux=skyfluxes
            meanskyivar=skyivars

        #- Create a 2d- sky model replicating this  
        finalskyflux=np.tile(meanskyflux,nspec).reshape(nspec,nwave)
        finalskyivar=np.tile(meanskyivar,nspec).reshape(nspec,nwave)
        mask=fframe.mask
        
    skymodel=SkyModel(fframe.wave,finalskyflux,finalskyivar,mask)
    return skymodel
Example #16
0
def compute_sky(frame, nsig_clipping=4.):
    """Compute a sky model.

    Input has to correspond to sky fibers only.
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log = get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave = frame.nwave
    nfibers = len(skyfibers)

    current_ivar = frame.ivar[skyfibers].copy()
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    sqrtw = np.sqrt(current_ivar)
    sqrtwflux = sqrtw * flux

    chi2 = np.zeros(flux.shape)

    #debug
    #nfibers=min(nfibers,2)

    nout_tot = 0
    for iteration in range(20):

        A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr()
        B = np.zeros((nwave))
        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))
        # loop on fiber to handle resolution
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d fiber %d" % (iteration, fiber))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD * R  # each row r of R is multiplied by sqrtw[r]

            A = A + (sqrtwR.T * sqrtwR).tocsr()
            B += sqrtwR.T * sqrtwflux[fiber]

        log.info("iter %d solving" % iteration)

        skyflux = cholesky_solve(A.todense(), B)

        log.info("iter %d compute chi2" % iteration)

        for fiber in range(nfibers):

            S = Rsky[fiber].dot(skyflux)
            chi2[fiber] = current_ivar[fiber] * (flux[fiber] - S)**2

        log.info("rejecting")

        nout_iter = 0
        if iteration < 1:
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0)
            selection = np.where(nout_per_wave > 0)[0]
            for i in selection:
                worst_entry = np.argmax(chi2[:, i])
                current_ivar[worst_entry, i] = 0
                sqrtw[worst_entry, i] = 0
                sqrtwflux[worst_entry, i] = 0
                nout_iter += 1

        else:
            # remove all of them at once
            bad = (chi2 > nsig_clipping**2)
            current_ivar *= (bad == 0)
            sqrtw *= (bad == 0)
            sqrtwflux *= (bad == 0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave)
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" %
                 (iteration, sum_chi2, ndf, chi2pdf, nout_iter))

        if nout_iter == 0:
            break

    log.info("nout tot=%d" % nout_tot)

    # solve once again to get deconvolved sky variance
    skyflux, skycovar = cholesky_solve_and_invert(A.todense(), B)

    #- sky inverse variance, but incomplete and not needed anyway
    # skyvar=np.diagonal(skycovar)
    # skyivar=(skyvar>0)/(skyvar+(skyvar==0))

    # Use diagonal of skycovar convolved with mean resolution of all fibers
    # first compute average resolution
    mean_res_data = np.mean(frame.resolution_data, axis=0)
    R = Resolution(mean_res_data)
    # compute convolved sky and ivar
    cskycovar = R.dot(skycovar).dot(R.T.todense())
    cskyvar = np.diagonal(cskycovar)
    cskyivar = (cskyvar > 0) / (cskyvar + (cskyvar == 0))

    # convert cskyivar to 2D; today it is the same for all spectra,
    # but that may not be the case in the future
    cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave)

    # Convolved sky
    cskyflux = np.zeros(frame.flux.shape)
    for i in range(frame.nspec):
        cskyflux[i] = frame.R[i].dot(skyflux)

    # need to do better here
    mask = (cskyivar == 0).astype(np.uint32)

    return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask, nrej=nout_tot)
Example #17
0
def compute_fiberflat(frame,
                      nsig_clipping=4.,
                      accuracy=5.e-4,
                      minval=0.1,
                      maxval=10.):
    """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average.
    Input data are expected to be on the same wavelength grid, with uncorrelated noise.
    They however do not have exactly the same resolution.

    Args:
        frame (desispec.Frame): input Frame object with attributes
            wave, flux, ivar, resolution_data
        nsig_clipping : [optional] sigma clipping value for outlier rejection
        accuracy : [optional] accuracy of fiberflat (end test for the iterative loop)
    Returns:
        desispec.FiberFlat object with attributes
            wave, fiberflat, ivar, mask, meanspec

    Notes:
    - we first iteratively :

       - compute a deconvolved mean spectrum
       - compute a fiber flat using the resolution convolved mean spectrum for each fiber
       - smooth the fiber flat along wavelength
       - clip outliers

    - then we compute a fiberflat at the native fiber resolution (not smoothed)

    - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum

    - the fiberflat is the ratio data/mean , so this flat should be divided to the data

    NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS,
    OUTLIER PIXELS, DEAD COLUMNS ...
    """
    log = get_logger()
    log.info("starting")

    #
    # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i )
    #
    # where
    # w = inverse variance
    # D = flux data (at the resolution of the fiber)
    # F = smooth fiber flat
    # R = resolution data
    # M = mean deconvolved spectrum
    #
    # M = A^{-1} B
    # with
    # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli)
    # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki
    #
    # defining R'_fi = sqrt(w_fi) F_fi R_fi
    # and      D'_fi = sqrt(w_fi) D_fi
    #
    # A = sum_(fiber f) R'_f R'_f^T
    # B = sum_(fiber f) R'_f D'_f
    # (it's faster that way, and we try to use sparse matrices as much as possible)
    #

    #- Shortcuts
    nwave = frame.nwave
    nfibers = frame.nspec
    wave = frame.wave.copy()  #- this will become part of output too
    flux = frame.flux
    ivar = frame.ivar * (frame.mask == 0)

    # iterative fitting and clipping to get precise mean spectrum

    # we first need to iterate to converge on a solution of mean spectrum
    # and smooth fiber flat. several interations are needed when
    # throughput AND resolution vary from fiber to fiber.
    # the end test is that the fiber flat has varied by less than accuracy
    # of previous iteration for all wavelength
    # we also have a max. number of iterations for this code
    max_iterations = 100

    nout_tot = 0
    chi2pdf = 0.

    smooth_fiberflat = np.ones((frame.flux.shape))
    previous_smooth_fiberflat = smooth_fiberflat.copy()

    chi2 = np.zeros((flux.shape))

    # 1st pass is median for spectrum, flat field without resolution
    # outlier rejection

    for iteration in range(max_iterations):

        # use median for spectrum
        mean_spectrum = np.zeros((flux.shape[1]))
        for i in range(flux.shape[1]):
            ok = np.where(ivar[:, i] > 0)[0]
            if ok.size > 0:
                mean_spectrum[i] = np.median(flux[ok, i])

        # max pixels far from mean spectrum.
        #log.info("mask pixels with difference smaller than %f or larger than %f of mean")
        nout_iter = 0
        for fiber in range(nfibers):
            bad = np.where((ivar[fiber] > 0)
                           & ((flux[fiber] > maxval * mean_spectrum)
                              | (flux[fiber] < minval * mean_spectrum)))[0]
        if bad.size > 100:
            log.warning(
                "masking fiber %d because of bad flat field with %d bad pixels"
                % (fiber, bad.size))
            ivar[fiber] = 0.
        if bad.size > 0:
            log.warning("masking %d bad pixels for fiber %d" %
                        (bad.size, fiber))
            ivar[fiber, bad] = 0.
        nout_iter += bad.size

        # fit smooth fiberflat and compute chi2
        smoothing_res = 100.  #A

        for fiber in range(nfibers):

            if np.sum(ivar[fiber] > 0) == 0:
                continue

            F = np.ones((flux.shape[1]))
            ok = np.where((mean_spectrum != 0) & (ivar[fiber] > 0))[0]
            F[ok] = flux[fiber, ok] / mean_spectrum[ok]
            smooth_fiberflat[fiber] = spline_fit(wave, wave[ok], F[ok],
                                                 smoothing_res, ivar[fiber,
                                                                     ok])

        # normalize to get a mean fiberflat=1
        mean = np.mean(smooth_fiberflat, axis=0)
        ok = np.where(mean != 0)[0]
        for fiber in range(nfibers):
            smooth_fiberflat[fiber,
                             ok] = smooth_fiberflat[fiber, ok] / mean[ok]
        mean_spectrum *= mean

        # this is the max difference between two iterations
        max_diff = np.max(
            np.abs(smooth_fiberflat - previous_smooth_fiberflat) * (ivar > 0.))
        previous_smooth_fiberflat = smooth_fiberflat.copy()

        # we don't start the rejection tests until we have converged on this
        if max_diff > 0.01:
            log.info(
                "1st pass, max diff. = %g > 0.01 , continue iterating before outlier rejection"
                % (max_diff))
            continue

        chi2 = ivar * (flux - smooth_fiberflat * mean_spectrum)**2

        if True:
            nsig_clipping_for_this_pass = nsig_clipping

            # not more than 5 pixels per fiber at a time
            for fiber in range(nfibers):
                for loop in range(max_iterations):
                    bad = np.where(
                        chi2[fiber] > nsig_clipping_for_this_pass**2)[0]
                    if bad.size > 0:
                        if bad.size > 5:  # not more than 5 pixels at a time
                            ii = np.argsort(chi2[fiber, bad])
                            bad = bad[ii[-5:]]
                        ivar[fiber, bad] = 0
                        nout_iter += bad.size
                        ok = np.where((mean_spectrum != 0)
                                      & (ivar[fiber] > 0))[0]
                        F[ok] = flux[fiber, ok] / mean_spectrum[ok]
                        smooth_fiberflat[fiber] = spline_fit(
                            wave, wave[ok], F[ok], smoothing_res, ivar[fiber,
                                                                       ok])
                        chi2[fiber] = ivar[fiber] * (
                            flux[fiber] -
                            smooth_fiberflat[fiber] * mean_spectrum)**2
                    else:
                        break

            nout_tot += nout_iter

            sum_chi2 = float(np.sum(chi2))
            ndf = int(
                np.sum(chi2 > 0) - nwave - nfibers * (nwave / smoothing_res))
            chi2pdf = 0.
            if ndf > 0:
                chi2pdf = sum_chi2 / ndf
            log.info(
                "1st pass iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d (nsig=%f)"
                % (iteration, sum_chi2, ndf, chi2pdf, nout_iter,
                   nsig_clipping_for_this_pass))

        if max_diff > accuracy:
            log.info(
                "1st pass iter #%d max diff. = %g > requirement = %g , continue iterating"
                % (iteration, max_diff, accuracy))
            continue

        if nout_iter == 0:
            break

    log.info("after 1st pass : nout = %d/%d" %
             (np.sum(ivar == 0), np.size(ivar.flatten())))

    # 2nd pass is full solution including deconvolved spectrum, no outlier rejection
    for iteration in range(max_iterations):

        log.info("2nd pass, iter %d : mean deconvolved spectrum" % iteration)

        # fit mean spectrum
        A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr()
        B = np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))

        # this is to go a bit faster
        sqrtwflat = np.sqrt(ivar) * smooth_fiberflat

        # loop on fiber to handle resolution (this is long)
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("2nd pass, filling matrix, iter %d fiber %d" %
                         (iteration, fiber))

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]
            SD.setdiag(sqrtwflat[fiber])

            sqrtwflatR = SD * R  # each row r of R is multiplied by sqrtwflat[r]

            A = A + (sqrtwflatR.T * sqrtwflatR).tocsr()
            B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber]) * flux[fiber])

        mean_spectrum = cholesky_solve(A.todense(), B)

        # fit smooth fiberflat
        smoothing_res = 100.  #A

        for fiber in range(nfibers):

            if np.sum(ivar[fiber] > 0) == 0:
                continue

            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]

            M = R.dot(mean_spectrum)
            ok = np.where(M != 0)[0]
            smooth_fiberflat[fiber] = spline_fit(wave, wave[ok],
                                                 flux[fiber, ok] / M[ok],
                                                 smoothing_res, ivar[fiber,
                                                                     ok])

        # normalize to get a mean fiberflat=1
        mean = np.mean(smooth_fiberflat, axis=0)
        ok = np.where(mean != 0)[0]
        smooth_fiberflat[:, ok] /= mean[ok]
        mean_spectrum *= mean

        chi2 = ivar * (flux - smooth_fiberflat * mean_spectrum)**2

        # this is the max difference between two iterations
        max_diff = np.max(
            np.abs(smooth_fiberflat - previous_smooth_fiberflat) * (ivar > 0.))
        previous_smooth_fiberflat = smooth_fiberflat.copy()

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave - nfibers * (nwave / smoothing_res))
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f" %
                 (iteration, sum_chi2, ndf, chi2pdf))

        if max_diff < accuracy:
            break

        log.info(
            "2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating"
            % (iteration, max_diff, accuracy))

    log.info("Total number of masked pixels=%d" % nout_tot)

    log.info("3rd pass, final computation of fiber flat")

    # now use mean spectrum to compute flat field correction without any smoothing
    # because sharp feature can arise if dead columns

    fiberflat = np.ones((flux.shape))
    fiberflat_ivar = np.zeros((flux.shape))
    mask = np.zeros((flux.shape), dtype='uint32')

    # reset ivar
    ivar = frame.ivar

    fiberflat_mask = 12  # place holder for actual mask bit when defined

    nsig_for_mask = nsig_clipping  # only mask out N sigma outliers

    for fiber in range(nfibers):

        if np.sum(ivar[fiber] > 0) == 0:
            continue

        ### R = Resolution(resolution_data[fiber])
        R = frame.R[fiber]
        M = np.array(np.dot(R.todense(), mean_spectrum)).flatten()
        fiberflat[fiber] = (M != 0) * flux[fiber] / (M + (M == 0)) + (M == 0)
        fiberflat_ivar[fiber] = ivar[fiber] * M**2
        nbad_tot = 0
        iteration = 0
        while iteration < 500:
            smooth_fiberflat = spline_fit(wave, wave, fiberflat[fiber],
                                          smoothing_res, fiberflat_ivar[fiber])
            chi2 = fiberflat_ivar[fiber] * (fiberflat[fiber] -
                                            smooth_fiberflat)**2
            bad = np.where(chi2 > nsig_for_mask**2)[0]
            if bad.size > 0:

                if bad.size > 5:  # not more than 5 pixels at a time
                    ii = np.argsort(chi2[bad])
                    bad = bad[ii[-5:]]

                mask[fiber, bad] += fiberflat_mask
                fiberflat_ivar[fiber, bad] = 0.
                nbad_tot += bad.size
            else:
                break
            iteration += 1
        # replace bad by smooth fiber flat
        bad = np.where((mask[fiber] > 0) | (fiberflat_ivar[fiber] == 0)
                       | (fiberflat[fiber] < minval)
                       | (fiberflat[fiber] > maxval))[0]
        if bad.size > 0:

            fiberflat_ivar[fiber, bad] = 0

            # find max length of segment with bad pix
            length = 0
            for i in range(bad.size):
                ib = bad[i]
                ilength = 1
                tmp = ib
                for jb in bad[i + 1:]:
                    if jb == tmp + 1:
                        ilength += 1
                        tmp = jb
                    else:
                        break
                length = max(length, ilength)
            if length > 10:
                log.info(
                    "3rd pass : fiber #%d has a max length of bad pixels=%d" %
                    (fiber, length))
            smoothing_res = float(max(100, 2 * length))
            x = np.arange(wave.size)

            ok = np.where(fiberflat_ivar[fiber] > 0)[0]
            smooth_fiberflat = spline_fit(x, x[ok], fiberflat[fiber, ok],
                                          smoothing_res, fiberflat_ivar[fiber,
                                                                        ok])
            fiberflat[fiber, bad] = smooth_fiberflat[bad]

        if nbad_tot > 0:
            log.info(
                "3rd pass : fiber #%d masked pixels = %d (%d iterations)" %
                (fiber, nbad_tot, iteration))

    # set median flat to 1
    log.info("set median fiberflat to 1")

    mean = np.ones((flux.shape[1]))
    for i in range(flux.shape[1]):
        ok = np.where((mask[:, i] == 0) & (ivar[:, i] > 0))[0]
        if ok.size > 0:
            mean[i] = np.median(fiberflat[ok, i])
    ok = np.where(mean != 0)[0]
    for fiber in range(nfibers):
        fiberflat[fiber, ok] /= mean[ok]

    log.info("done fiberflat")

    return FiberFlat(wave,
                     fiberflat,
                     fiberflat_ivar,
                     mask,
                     mean_spectrum,
                     chi2pdf=chi2pdf)
Example #18
0
def compute_uniform_sky(frame,
                        nsig_clipping=4.,
                        max_iterations=100,
                        model_ivar=False,
                        add_variance=True):
    """Compute a sky model.
    
    Sky[fiber,i] = R[fiber,i,j] Flux[j]
    
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    Optional:
        max_iterations : int , number of iterations
        model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
        add_variance : evaluate calibration error and add this to the sky model variance
        
    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log = get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave = frame.nwave
    nfibers = len(skyfibers)

    current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0)
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    input_ivar = None
    if model_ivar:
        log.info(
            "use a model of the inverse variance to remove bias due to correlated ivar and flux"
        )
        input_ivar = current_ivar.copy()
        median_ivar_vs_wave = np.median(current_ivar, axis=0)
        median_ivar_vs_fiber = np.median(current_ivar, axis=1)
        median_median_ivar = np.median(median_ivar_vs_fiber)
        for f in range(current_ivar.shape[0]):
            threshold = 0.01
            current_ivar[f] = median_ivar_vs_fiber[
                f] / median_median_ivar * median_ivar_vs_wave
            # keep input ivar for very low weights
            ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave))
            #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
            current_ivar[f][ii] = input_ivar[f][ii]

    sqrtw = np.sqrt(current_ivar)
    sqrtwflux = sqrtw * flux

    chi2 = np.zeros(flux.shape)

    nout_tot = 0
    for iteration in range(max_iterations):

        # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
        # A_ij = 1/2 d2(chi2)/di/dj
        # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]

        # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
        # B_i  = 1/2 d(chi2)/di
        # B_i  = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])

        # the model is model[fiber]=R[fiber]*sky
        # and the parameters are the unconvolved sky flux at the wavelength i

        # so, d(model)/di[fiber,w] = R[fiber][w,i]
        # this gives
        # A_ij = sum_fiber  sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j]
        # A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t
        # A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t
        # and
        # B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w]
        # B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave]

        #A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        A = np.zeros((nwave, nwave))
        B = np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))

        # loop on fiber to handle resolution
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d sky fiber %d/%d" %
                         (iteration, fiber, nfibers))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD * R  # each row r of R is multiplied by sqrtw[r]
            A += (sqrtwR.T * sqrtwR).todense()
            B += sqrtwR.T * sqrtwflux[fiber]

        log.info("iter %d solving" % iteration)
        w = A.diagonal() > 0
        A_pos_def = A[w, :]
        A_pos_def = A_pos_def[:, w]
        parameters = B * 0
        try:
            parameters[w] = cholesky_solve(A_pos_def, B[w])
        except:
            log.info("cholesky failed, trying svd in iteration {}".format(
                iteration))
            parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0]

        log.info("iter %d compute chi2" % iteration)

        for fiber in range(nfibers):
            # the parameters are directly the unconvolve sky flux
            # so we simply have to reconvolve it
            fiber_convolved_sky_flux = Rsky[fiber].dot(parameters)
            chi2[fiber] = current_ivar[fiber] * (flux[fiber] -
                                                 fiber_convolved_sky_flux)**2

        log.info("rejecting")

        nout_iter = 0
        if iteration < 1:
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0)
            selection = np.where(nout_per_wave > 0)[0]
            for i in selection:
                worst_entry = np.argmax(chi2[:, i])
                current_ivar[worst_entry, i] = 0
                sqrtw[worst_entry, i] = 0
                sqrtwflux[worst_entry, i] = 0
                nout_iter += 1

        else:
            # remove all of them at once
            bad = (chi2 > nsig_clipping**2)
            current_ivar *= (bad == 0)
            sqrtw *= (bad == 0)
            sqrtwflux *= (bad == 0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave)
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" %
                 (iteration, sum_chi2, ndf, chi2pdf, nout_iter))

        if nout_iter == 0:
            break

    log.info("nout tot=%d" % nout_tot)

    # we know have to compute the sky model for all fibers
    # and propagate the uncertainties

    # no need to restore the original ivar to compute the model errors when modeling ivar
    # the sky inverse variances are very similar

    log.info("compute the parameter covariance")
    # we may have to use a different method to compute this
    # covariance

    try:
        parameter_covar = cholesky_invert(A)
        # the above is too slow
        # maybe invert per block, sandwich by R
    except np.linalg.linalg.LinAlgError:
        log.warning(
            "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv"
        )
        parameter_covar = np.linalg.pinv(A)

    log.info("compute mean resolution")
    # we make an approximation for the variance to save CPU time
    # we use the average resolution of all fibers in the frame:
    mean_res_data = np.mean(frame.resolution_data, axis=0)
    Rmean = Resolution(mean_res_data)

    log.info("compute convolved sky and ivar")

    # The parameters are directly the unconvolved sky
    # First convolve with average resolution :
    convolved_sky_covar = Rmean.dot(parameter_covar).dot(Rmean.T.todense())

    # and keep only the diagonal
    convolved_sky_var = np.diagonal(convolved_sky_covar)

    # inverse
    convolved_sky_ivar = (convolved_sky_var > 0) / (convolved_sky_var +
                                                    (convolved_sky_var == 0))

    # and simply consider it's the same for all spectra
    cskyivar = np.tile(convolved_sky_ivar,
                       frame.nspec).reshape(frame.nspec, nwave)

    # The sky model for each fiber (simple convolution with resolution of each fiber)
    cskyflux = np.zeros(frame.flux.shape)
    for i in range(frame.nspec):
        cskyflux[i] = frame.R[i].dot(parameters)

    # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
    if skyfibers.size > 1 and add_variance:
        modified_cskyivar = _model_variance(frame, cskyflux, cskyivar,
                                            skyfibers)
    else:
        modified_cskyivar = cskyivar.copy()

    # need to do better here
    mask = (cskyivar == 0).astype(np.uint32)

    return SkyModel(
        frame.wave.copy(),
        cskyflux,
        modified_cskyivar,
        mask,
        nrej=nout_tot,
        stat_ivar=cskyivar)  # keep a record of the statistical ivar for QA
Example #19
0
def compute_sky(frame, nsig_clipping=4.) :
    """Compute a sky model.

    Input has to correspond to sky fibers only.
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log=get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave=frame.nwave
    nfibers=len(skyfibers)

    current_ivar=frame.ivar[skyfibers].copy()
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    sqrtw=np.sqrt(current_ivar)
    sqrtwflux=sqrtw*flux

    chi2=np.zeros(flux.shape)

    #debug
    #nfibers=min(nfibers,2)

    nout_tot=0
    for iteration in range(20) :

        A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        B=np.zeros((nwave))
        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))
        # loop on fiber to handle resolution
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("iter %d fiber %d"%(iteration,fiber))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]

            A = A+(sqrtwR.T*sqrtwR).tocsr()
            B += sqrtwR.T*sqrtwflux[fiber]

        log.info("iter %d solving"%iteration)

        skyflux=cholesky_solve(A.todense(),B)

        log.info("iter %d compute chi2"%iteration)

        for fiber in range(nfibers) :

            S = Rsky[fiber].dot(skyflux)
            chi2[fiber]=current_ivar[fiber]*(flux[fiber]-S)**2

        log.info("rejecting")

        nout_iter=0
        if iteration<1 :
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
            selection=np.where(nout_per_wave>0)[0]
            for i in selection :
                worst_entry=np.argmax(chi2[:,i])
                current_ivar[worst_entry,i]=0
                sqrtw[worst_entry,i]=0
                sqrtwflux[worst_entry,i]=0
                nout_iter += 1

        else :
            # remove all of them at once
            bad=(chi2>nsig_clipping**2)
            current_ivar *= (bad==0)
            sqrtw *= (bad==0)
            sqrtwflux *= (bad==0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave)
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))

        if nout_iter == 0 :
            break

    log.info("nout tot=%d"%nout_tot)


    # solve once again to get deconvolved sky variance
    skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B)

    #- sky inverse variance, but incomplete and not needed anyway
    # skyvar=np.diagonal(skycovar)
    # skyivar=(skyvar>0)/(skyvar+(skyvar==0))

    # Use diagonal of skycovar convolved with mean resolution of all fibers
    # first compute average resolution
    mean_res_data=np.mean(frame.resolution_data,axis=0)
    R = Resolution(mean_res_data)
    # compute convolved sky and ivar
    cskycovar=R.dot(skycovar).dot(R.T.todense())
    cskyvar=np.diagonal(cskycovar)
    cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0))

    # convert cskyivar to 2D; today it is the same for all spectra,
    # but that may not be the case in the future
    cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave)

    # Convolved sky
    cskyflux = np.zeros(frame.flux.shape)
    for i in range(frame.nspec):
        cskyflux[i] = frame.R[i].dot(skyflux)

    # need to do better here
    mask = (cskyivar==0).astype(np.uint32)

    return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask,
                    nrej=nout_tot)
Example #20
0
def compute_non_uniform_sky(frame,
                            nsig_clipping=4.,
                            max_iterations=10,
                            model_ivar=False,
                            add_variance=True,
                            angular_variation_deg=1):
    """Compute a sky model.
    
    Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... )
    
    Input flux are expected to be flatfielded!
    We don't check this in this routine.

    Args:
        frame : Frame object, which includes attributes
          - wave : 1D wavelength grid in Angstroms
          - flux : 2D flux[nspec, nwave] density
          - ivar : 2D inverse variance of flux
          - mask : 2D inverse mask flux (0=good)
          - resolution_data : 3D[nspec, ndiag, nwave]  (only sky fibers)
        nsig_clipping : [optional] sigma clipping value for outlier rejection

    Optional:
        max_iterations : int , number of iterations
        model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
        add_variance : evaluate calibration error and add this to the sky model variance
        angular_variation_deg  : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength
    
    returns SkyModel object with attributes wave, flux, ivar, mask
    """

    log = get_logger()
    log.info("starting")

    # Grab sky fibers on this frame
    skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
    assert np.max(skyfibers) < 500  #- indices, not fiber numbers

    nwave = frame.nwave
    nfibers = len(skyfibers)

    current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0)
    flux = frame.flux[skyfibers]
    Rsky = frame.R[skyfibers]

    # need focal plane coordinates of fibers
    x = frame.fibermap["FIBERASSIGN_X"][skyfibers]
    y = frame.fibermap["FIBERASSIGN_Y"][skyfibers]
    # normalize for numerical stability
    xm = np.mean(frame.fibermap["FIBERASSIGN_X"])
    ym = np.mean(frame.fibermap["FIBERASSIGN_Y"])
    xs = np.std(frame.fibermap["FIBERASSIGN_X"])
    ys = np.std(frame.fibermap["FIBERASSIGN_Y"])
    if xs == 0: xs = 1
    if ys == 0: ys = 1
    x = (x - xm) / xs
    y = (y - ym) / ys

    # precompute the monomials for the sky fibers
    log.debug("compute monomials for deg={}".format(angular_variation_deg))
    monomials = []
    for dx in range(angular_variation_deg + 1):
        for dy in range(angular_variation_deg + 1 - dx):
            monomials.append((x**dx) * (y**dy))
    ncoef = len(monomials)
    monomials = np.array(monomials)

    input_ivar = None
    if model_ivar:
        log.info(
            "use a model of the inverse variance to remove bias due to correlated ivar and flux"
        )
        input_ivar = current_ivar.copy()
        median_ivar_vs_wave = np.median(current_ivar, axis=0)
        median_ivar_vs_fiber = np.median(current_ivar, axis=1)
        median_median_ivar = np.median(median_ivar_vs_fiber)
        for f in range(current_ivar.shape[0]):
            threshold = 0.01
            current_ivar[f] = median_ivar_vs_fiber[
                f] / median_median_ivar * median_ivar_vs_wave
            # keep input ivar for very low weights
            ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave))
            #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
            current_ivar[f][ii] = input_ivar[f][ii]

    sqrtw = np.sqrt(current_ivar)
    sqrtwflux = sqrtw * flux

    chi2 = np.zeros(flux.shape)

    nout_tot = 0
    for iteration in range(max_iterations):

        # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
        # A_ij = 1/2 d2(chi2)/di/dj
        # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]

        # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
        # B_i  = 1/2 d(chi2)/di
        # B_i  = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])

        # with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky)
        # the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber
        # sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p)
        # sky(fiber,i) =  sum_p monom[fiber,p] *  a_ip
        # the convolved sky flux at wavelength w is
        # model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] *  a_ip
        # model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip
        #
        # so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where
        # A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t
        # similarily
        # B[p]  =  sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave]

        A = np.zeros((nwave * ncoef, nwave * ncoef))
        B = np.zeros((nwave * ncoef))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD = scipy.sparse.lil_matrix((nwave, nwave))

        # loop on fiber to handle resolution
        for fiber in range(nfibers):
            if fiber % 10 == 0:
                log.info("iter %d sky fiber %d/%d" %
                         (iteration, fiber, nfibers))
            R = Rsky[fiber]

            # diagonal sparse matrix with content = sqrt(ivar)
            SD.setdiag(sqrtw[fiber])

            sqrtwR = SD * R  # each row r of R is multiplied by sqrtw[r]

            #wRtR=(sqrtwR.T*sqrtwR).tocsr()
            wRtR = (sqrtwR.T * sqrtwR).todense()
            wRtF = sqrtwR.T * sqrtwflux[fiber]
            # loop on polynomial coefficients (double loop for A)
            # fill only blocks of A and B
            for p in range(ncoef):
                for k in range(ncoef):
                    A[p * nwave:(p + 1) * nwave, k * nwave:(k + 1) *
                      nwave] += monomials[p, fiber] * monomials[k,
                                                                fiber] * wRtR
                B[p * nwave:(p + 1) * nwave] += monomials[p, fiber] * wRtF

        log.info("iter %d solving" % iteration)
        w = A.diagonal() > 0
        A_pos_def = A[w, :]
        A_pos_def = A_pos_def[:, w]
        parameters = B * 0
        try:
            parameters[w] = cholesky_solve(A_pos_def, B[w])
        except:
            log.info("cholesky failed, trying svd in iteration {}".format(
                iteration))
            parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0]

        log.info("iter %d compute chi2" % iteration)

        for fiber in range(nfibers):
            # loop on polynomial indices
            unconvolved_fiber_sky_flux = np.zeros(nwave)
            for p in range(ncoef):
                unconvolved_fiber_sky_flux += monomials[
                    p, fiber] * parameters[p * nwave:(p + 1) * nwave]
            # then convolve
            fiber_convolved_sky_flux = Rsky[fiber].dot(
                unconvolved_fiber_sky_flux)

            chi2[fiber] = current_ivar[fiber] * (flux[fiber] -
                                                 fiber_convolved_sky_flux)**2

        log.info("rejecting")

        nout_iter = 0
        if iteration < 1:
            # only remove worst outlier per wave
            # apply rejection iteratively, only one entry per wave among fibers
            # find waves with outlier (fastest way)
            nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0)
            selection = np.where(nout_per_wave > 0)[0]
            for i in selection:
                worst_entry = np.argmax(chi2[:, i])
                current_ivar[worst_entry, i] = 0
                sqrtw[worst_entry, i] = 0
                sqrtwflux[worst_entry, i] = 0
                nout_iter += 1

        else:
            # remove all of them at once
            bad = (chi2 > nsig_clipping**2)
            current_ivar *= (bad == 0)
            sqrtw *= (bad == 0)
            sqrtwflux *= (bad == 0)
            nout_iter += np.sum(bad)

        nout_tot += nout_iter

        sum_chi2 = float(np.sum(chi2))
        ndf = int(np.sum(chi2 > 0) - nwave)
        chi2pdf = 0.
        if ndf > 0:
            chi2pdf = sum_chi2 / ndf
        log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" %
                 (iteration, sum_chi2, ndf, chi2pdf, nout_iter))

        if nout_iter == 0:
            break

    log.info("nout tot=%d" % nout_tot)

    # we know have to compute the sky model for all fibers
    # and propagate the uncertainties

    # no need to restore the original ivar to compute the model errors when modeling ivar
    # the sky inverse variances are very similar

    # is there a different method to compute this ?
    log.info("compute covariance")
    try:
        parameter_covar = cholesky_invert(A)
    except np.linalg.linalg.LinAlgError:
        log.warning(
            "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv"
        )
        parameter_covar = np.linalg.pinv(A)

    log.info("compute mean resolution")
    # we make an approximation for the variance to save CPU time
    # we use the average resolution of all fibers in the frame:
    mean_res_data = np.mean(frame.resolution_data, axis=0)
    Rmean = Resolution(mean_res_data)

    log.info("compute convolved sky and ivar")

    cskyflux = np.zeros(frame.flux.shape)
    cskyivar = np.zeros(frame.flux.shape)

    log.info("compute convolved parameter covariance")
    # The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave
    # A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k
    # We first sandwich each block with the average resolution.
    convolved_parameter_covar = np.zeros((ncoef, ncoef, nwave))
    for p in range(ncoef):
        for k in range(ncoef):
            convolved_parameter_covar[p, k] = np.diagonal(
                Rmean.dot(parameter_covar[p * nwave:(p + 1) * nwave,
                                          k * nwave:(k + 1) * nwave]).dot(
                                              Rmean.T.todense()))
    '''
    import astropy.io.fits as pyfits
    pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True)
    
    # other approach
    log.info("dense Rmean...")
    Rmean=Rmean.todense()
    log.info("invert Rinv...")
    Rinv=np.linalg.inv(Rmean)
    # check this
    print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean)))
    convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave))
    for p in range(ncoef) :
        for k in range(ncoef) :
            convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv))
    # solve for each wave separately
    convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave))
    for i in range(nwave) :
        print("inverting ivar of wave %d/%d"%(i,nwave))
        convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i])
    pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True)
    import sys
    sys.exit(12)
    '''

    # Now we compute the sky model variance for each fiber individually
    # accounting for its focal plane coordinates
    # so that a target fiber distant for a sky fiber will naturally have a larger
    # sky model variance
    log.info("compute sky and variance per fiber")
    for i in range(frame.nspec):
        # compute monomials
        M = []
        xi = (frame.fibermap["FIBERASSIGN_X"][i] - xm) / xs
        yi = (frame.fibermap["FIBERASSIGN_Y"][i] - ym) / ys
        for dx in range(angular_variation_deg + 1):
            for dy in range(angular_variation_deg + 1 - dx):
                M.append((xi**dx) * (yi**dy))
        M = np.array(M)

        unconvolved_fiber_sky_flux = np.zeros(nwave)
        convolved_fiber_skyvar = np.zeros(nwave)
        for p in range(ncoef):
            unconvolved_fiber_sky_flux += M[p] * parameters[p * nwave:(p + 1) *
                                                            nwave]
            for k in range(ncoef):
                convolved_fiber_skyvar += M[p] * M[
                    k] * convolved_parameter_covar[p, k]

        # convolve sky model with this fiber's resolution
        cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux)

        # save inverse of variance
        cskyivar[i] = (convolved_fiber_skyvar > 0) / (
            convolved_fiber_skyvar + (convolved_fiber_skyvar == 0))

    # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
    if skyfibers.size > 1 and add_variance:
        modified_cskyivar = _model_variance(frame, cskyflux, cskyivar,
                                            skyfibers)
    else:
        modified_cskyivar = cskyivar.copy()

    # need to do better here
    mask = (cskyivar == 0).astype(np.uint32)

    return SkyModel(
        frame.wave.copy(),
        cskyflux,
        modified_cskyivar,
        mask,
        nrej=nout_tot,
        stat_ivar=cskyivar)  # keep a record of the statistical ivar for QA
Example #21
0
def compute_fiberflat(frame, nsig_clipping=4., accuracy=5.e-4, minval=0.1, maxval=10.) :
    """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average.
    Input data are expected to be on the same wavelength grid, with uncorrelated noise.
    They however do not have exactly the same resolution.

    Args:
        frame (desispec.Frame): input Frame object with attributes
            wave, flux, ivar, resolution_data
        nsig_clipping : [optional] sigma clipping value for outlier rejection
        accuracy : [optional] accuracy of fiberflat (end test for the iterative loop)
    Returns:
        desispec.FiberFlat object with attributes
            wave, fiberflat, ivar, mask, meanspec

    Notes:
    - we first iteratively :

       - compute a deconvolved mean spectrum
       - compute a fiber flat using the resolution convolved mean spectrum for each fiber
       - smooth the fiber flat along wavelength
       - clip outliers

    - then we compute a fiberflat at the native fiber resolution (not smoothed)

    - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum

    - the fiberflat is the ratio data/mean , so this flat should be divided to the data

    NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS,
    OUTLIER PIXELS, DEAD COLUMNS ...
    """
    log=get_logger()
    log.info("starting")

    #
    # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i )
    #
    # where
    # w = inverse variance
    # D = flux data (at the resolution of the fiber)
    # F = smooth fiber flat
    # R = resolution data
    # M = mean deconvolved spectrum
    #
    # M = A^{-1} B
    # with
    # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli)
    # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki
    #
    # defining R'_fi = sqrt(w_fi) F_fi R_fi
    # and      D'_fi = sqrt(w_fi) D_fi
    #
    # A = sum_(fiber f) R'_f R'_f^T
    # B = sum_(fiber f) R'_f D'_f
    # (it's faster that way, and we try to use sparse matrices as much as possible)
    #

    #- Shortcuts
    nwave=frame.nwave
    nfibers=frame.nspec
    wave = frame.wave.copy()  #- this will become part of output too
    flux = frame.flux
    ivar = frame.ivar*(frame.mask==0)
    
    
    
    # iterative fitting and clipping to get precise mean spectrum


   

    # we first need to iterate to converge on a solution of mean spectrum
    # and smooth fiber flat. several interations are needed when
    # throughput AND resolution vary from fiber to fiber.
    # the end test is that the fiber flat has varied by less than accuracy
    # of previous iteration for all wavelength
    # we also have a max. number of iterations for this code
    max_iterations = 100
    
    nout_tot=0
    chi2pdf = 0.
    
    smooth_fiberflat=np.ones((frame.flux.shape))
    previous_smooth_fiberflat=smooth_fiberflat.copy()
    
    chi2=np.zeros((flux.shape))


    # 1st pass is median for spectrum, flat field without resolution
    # outlier rejection
    
    for iteration in range(max_iterations) :
        
        # use median for spectrum
        mean_spectrum=np.zeros((flux.shape[1]))
        for i in range(flux.shape[1]) :
            ok=np.where(ivar[:,i]>0)[0]
            if ok.size > 0 :
                mean_spectrum[i]=np.median(flux[ok,i])
                
        # max pixels far from mean spectrum.
        #log.info("mask pixels with difference smaller than %f or larger than %f of mean")
        nout_iter=0
        for fiber in range(nfibers) :
            bad=np.where((ivar[fiber]>0)&((flux[fiber]>maxval*mean_spectrum)|(flux[fiber]<minval*mean_spectrum)))[0]
        if bad.size>100 :
            log.warning("masking fiber %d because of bad flat field with %d bad pixels"%(fiber,bad.size))
            ivar[fiber]=0.                
        if bad.size>0 :
            log.warning("masking %d bad pixels for fiber %d"%(bad.size,fiber))
            ivar[fiber,bad]=0.
        nout_iter += bad.size
        
        # fit smooth fiberflat and compute chi2
        smoothing_res=100. #A
        
        for fiber in range(nfibers) :
            
            if np.sum(ivar[fiber]>0)==0 :
                continue

            F = np.ones((flux.shape[1]))
            ok=np.where((mean_spectrum!=0)&(ivar[fiber]>0))[0]
            F[ok] = flux[fiber,ok]/mean_spectrum[ok]
            smooth_fiberflat[fiber]=spline_fit(wave,wave[ok],F[ok],smoothing_res,ivar[fiber,ok])
            
        
        # normalize to get a mean fiberflat=1
        mean=np.mean(smooth_fiberflat,axis=0)
        ok=np.where(mean!=0)[0]
        for fiber in range(nfibers) :
            smooth_fiberflat[fiber,ok] = smooth_fiberflat[fiber,ok]/mean[ok]
        mean_spectrum *= mean
                
        
        
        # this is the max difference between two iterations
        max_diff=np.max(np.abs(smooth_fiberflat-previous_smooth_fiberflat)*(ivar>0.)) 
        previous_smooth_fiberflat=smooth_fiberflat.copy()
        
        # we don't start the rejection tests until we have converged on this
        if max_diff>0.01 :
            log.info("1st pass, max diff. = %g > 0.01 , continue iterating before outlier rejection"%(max_diff))
            continue
                    

        chi2=ivar*(flux-smooth_fiberflat*mean_spectrum)**2
        
        if True :  
            nsig_clipping_for_this_pass = nsig_clipping
            
            # not more than 5 pixels per fiber at a time
            for fiber in range(nfibers) :
                for loop in range(max_iterations) :
                    bad=np.where(chi2[fiber]>nsig_clipping_for_this_pass**2)[0]
                    if bad.size>0 :                
                        if bad.size>5 : # not more than 5 pixels at a time
                            ii=np.argsort(chi2[fiber,bad])
                            bad=bad[ii[-5:]]
                        ivar[fiber,bad] = 0
                        nout_iter += bad.size
                        ok=np.where((mean_spectrum!=0)&(ivar[fiber]>0))[0]
                        F[ok] = flux[fiber,ok]/mean_spectrum[ok]
                        smooth_fiberflat[fiber]=spline_fit(wave,wave[ok],F[ok],smoothing_res,ivar[fiber,ok])
                        chi2[fiber]=ivar[fiber]*(flux[fiber]-smooth_fiberflat[fiber]*mean_spectrum)**2
                    else :
                        break
        
            nout_tot += nout_iter

            sum_chi2=float(np.sum(chi2))
            ndf=int(np.sum(chi2>0)-nwave-nfibers*(nwave/smoothing_res))
            chi2pdf=0.
            if ndf>0 :
                chi2pdf=sum_chi2/ndf
            log.info("1st pass iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d (nsig=%f)"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter,nsig_clipping_for_this_pass))

        
        if max_diff>accuracy :
            log.info("1st pass iter #%d max diff. = %g > requirement = %g , continue iterating"%(iteration,max_diff,accuracy))
            continue
    
        if nout_iter == 0 :
            break

    log.info("after 1st pass : nout = %d/%d"%(np.sum(ivar==0),np.size(ivar.flatten())))
    
    # 2nd pass is full solution including deconvolved spectrum, no outlier rejection
    for iteration in range(max_iterations) : 
        
        log.info("2nd pass, iter %d : mean deconvolved spectrum"%iteration)
        
        # fit mean spectrum
        A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
        B=np.zeros((nwave))

        # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
        SD=scipy.sparse.lil_matrix((nwave,nwave))

        # this is to go a bit faster
        sqrtwflat=np.sqrt(ivar)*smooth_fiberflat
        
        # loop on fiber to handle resolution (this is long)
        for fiber in range(nfibers) :
            if fiber%10==0 :
                log.info("2nd pass, filling matrix, iter %d fiber %d"%(iteration,fiber))
                
            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]                
            SD.setdiag(sqrtwflat[fiber])

            sqrtwflatR = SD*R # each row r of R is multiplied by sqrtwflat[r]
                
            A = A+(sqrtwflatR.T*sqrtwflatR).tocsr()
            B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber])*flux[fiber])
            
        mean_spectrum=cholesky_solve(A.todense(),B)
            
            
        # fit smooth fiberflat
        smoothing_res=100. #A

        for fiber in range(nfibers) :

            if np.sum(ivar[fiber]>0)==0 :
                continue
            
            ### R = Resolution(resolution_data[fiber])
            R = frame.R[fiber]
            
            M = R.dot(mean_spectrum)            
            ok=np.where(M!=0)[0]
            smooth_fiberflat[fiber]=spline_fit(wave,wave[ok],flux[fiber,ok]/M[ok],smoothing_res,ivar[fiber,ok])
        
        # normalize to get a mean fiberflat=1
        mean=np.mean(smooth_fiberflat,axis=0)
        ok=np.where(mean!=0)[0]
        smooth_fiberflat[:,ok] /= mean[ok]
        mean_spectrum *= mean
        
        chi2=ivar*(flux-smooth_fiberflat*mean_spectrum)**2
        
        # this is the max difference between two iterations
        max_diff=np.max(np.abs(smooth_fiberflat-previous_smooth_fiberflat)*(ivar>0.))
        previous_smooth_fiberflat=smooth_fiberflat.copy()
        
        sum_chi2=float(np.sum(chi2))
        ndf=int(np.sum(chi2>0)-nwave-nfibers*(nwave/smoothing_res))
        chi2pdf=0.
        if ndf>0 :
            chi2pdf=sum_chi2/ndf
        log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f"%(iteration,sum_chi2,ndf,chi2pdf))
        
        if max_diff<accuracy :
            break
        
        log.info("2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating"%(iteration,max_diff,accuracy))
        

    log.info("Total number of masked pixels=%d"%nout_tot)

    log.info("3rd pass, final computation of fiber flat")

    # now use mean spectrum to compute flat field correction without any smoothing
    # because sharp feature can arise if dead columns

    fiberflat=np.ones((flux.shape))
    fiberflat_ivar=np.zeros((flux.shape))
    mask=np.zeros((flux.shape)).astype(long)  # SOMEONE CHECK THIS !
    
    # reset ivar
    ivar=frame.ivar
    
    fiberflat_mask=12 # place holder for actual mask bit when defined
    
    nsig_for_mask=nsig_clipping # only mask out N sigma outliers

    for fiber in range(nfibers) :
        
        if np.sum(ivar[fiber]>0)==0 :
            continue

        ### R = Resolution(resolution_data[fiber])
        R = frame.R[fiber]
        M = np.array(np.dot(R.todense(),mean_spectrum)).flatten()
        fiberflat[fiber] = (M!=0)*flux[fiber]/(M+(M==0)) + (M==0)
        fiberflat_ivar[fiber] = ivar[fiber]*M**2
        nbad_tot=0
        iteration=0
        while iteration<500 :
            smooth_fiberflat=spline_fit(wave,wave,fiberflat[fiber],smoothing_res,fiberflat_ivar[fiber])
            chi2=fiberflat_ivar[fiber]*(fiberflat[fiber]-smooth_fiberflat)**2
            bad=np.where(chi2>nsig_for_mask**2)[0]
            if bad.size>0 :
                
                if bad.size>5 : # not more than 5 pixels at a time
                    ii=np.argsort(chi2[bad])
                    bad=bad[ii[-5:]]
                
                mask[fiber,bad] += fiberflat_mask
                fiberflat_ivar[fiber,bad] = 0.
                nbad_tot += bad.size
            else :
                break
            iteration += 1
        # replace bad by smooth fiber flat
        bad=np.where((mask[fiber]>0)|(fiberflat_ivar[fiber]==0)|(fiberflat[fiber]<minval)|(fiberflat[fiber]>maxval))[0]
        if bad.size>0 :

            fiberflat_ivar[fiber,bad] = 0

            # find max length of segment with bad pix
            length=0
            for i in range(bad.size) :
                ib=bad[i]
                ilength=1
                tmp=ib
                for jb in bad[i+1:] :
                    if jb==tmp+1 :
                        ilength +=1
                        tmp=jb
                    else :
                        break
                length=max(length,ilength)
            if length>10 :
                log.info("3rd pass : fiber #%d has a max length of bad pixels=%d"%(fiber,length))
            smoothing_res=float(max(100,2*length))
            x=np.arange(wave.size)
            
            ok=np.where(fiberflat_ivar[fiber]>0)[0]
            smooth_fiberflat=spline_fit(x,x[ok],fiberflat[fiber,ok],smoothing_res,fiberflat_ivar[fiber,ok])
            fiberflat[fiber,bad] = smooth_fiberflat[bad]
                    
        if nbad_tot>0 :
            log.info("3rd pass : fiber #%d masked pixels = %d (%d iterations)"%(fiber,nbad_tot,iteration))
    
    # set median flat to 1
    log.info("set median fiberflat to 1")
    
    mean=np.ones((flux.shape[1]))
    for i in range(flux.shape[1]) :
        ok=np.where((mask[:,i]==0)&(ivar[:,i]>0))[0]
        if ok.size > 0 :
            mean[i] = np.median(fiberflat[ok,i])
    ok=np.where(mean!=0)[0]
    for fiber in range(nfibers) :
        fiberflat[fiber,ok] /= mean[ok]

    log.info("done fiberflat")

    return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum,
                     chi2pdf=chi2pdf)