def run_pa(self, frame, outputfile): from desispec.fiberflat import FiberFlat import desispec.io.fiberflat as ffIO from desispec.linalg import cholesky_solve nwave = frame.nwave nfibers = frame.nspec wave = frame.wave #- this will become part of output too flux = frame.flux sumFlux = np.zeros((nwave)) realFlux = np.zeros(flux.shape) ivar = frame.ivar * (frame.mask == 0) #deconv for fib in range(nfibers): Rf = frame.R[fib].todense() B = flux[fib] realFlux[fib] = cholesky_solve(Rf, B) sumFlux += realFlux[fib] #iflux=nfibers/sumFlux flat = np.zeros(flux.shape) flat_ivar = np.zeros(ivar.shape) avg = sumFlux / nfibers for fib in range(nfibers): Rf = frame.R[fib] # apply and reconvolute M = Rf.dot(avg) M0 = (M == 0) flat[fib] = (~M0) * flux[fib] / (M + M0) + M0 flat_ivar[fib] = ivar[fib] * M**2 fibflat = FiberFlat(frame.wave.copy(), flat, flat_ivar, frame.mask.copy(), avg) #fiberflat=compute_fiberflat(input_frame) ffIO.write_fiberflat(outputfile, fibflat, header=frame.meta) log.info("Wrote fiberflat file {}".format(outputfile))
def test_cholesky_solve(self): # create a random positive definite matrix A n = 12 A = np.zeros((n,n)) for i in range(n) : H = numpy.random.random(n) A += np.outer(H,H.T) # random X X = numpy.random.random(n) # compute B B = A.dot(X) # solve for X given A and B Xs=cholesky_solve(A,B) # compute diff delta=Xs-X d=np.inner(delta,delta) self.assertAlmostEqual(d,0.)
def test_cholesky_solve(self): # create a random positive definite matrix A n = 12 A = np.zeros((n, n)) for i in range(n): H = numpy.random.random(n) A += np.outer(H, H.T) # random X X = numpy.random.random(n) # compute B B = A.dot(X) # solve for X given A and B Xs = cholesky_solve(A, B) # compute diff delta = Xs - X d = np.inner(delta, delta) self.assertAlmostEqual(d, 0.)
def compute_fiberflat(frame, nsig_clipping=10., accuracy=5.e-4, minval=0.1, maxval=10., max_iterations=100, smoothing_res=5., max_bad=100, max_rej_it=5, min_sn=0, diag_epsilon=1e-3): """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average. Input data are expected to be on the same wavelength grid, with uncorrelated noise. They however do not have exactly the same resolution. Args: frame (desispec.Frame): input Frame object with attributes wave, flux, ivar, resolution_data nsig_clipping : [optional] sigma clipping value for outlier rejection accuracy : [optional] accuracy of fiberflat (end test for the iterative loop) minval: [optional] mask pixels with flux < minval * median fiberflat. maxval: [optional] mask pixels with flux > maxval * median fiberflat. max_iterations: [optional] maximum number of iterations smoothing_res: [optional] spacing between spline fit nodes for smoothing the fiberflat max_bad: [optional] mask entire fiber if more than max_bad-1 initially unmasked pixels are masked during the iterations max_rej_it: [optional] reject at most the max_rej_it worst pixels in each iteration min_sn: [optional] mask portions with signal to noise less than min_sn diag_epsilon: [optional] size of the regularization term in the deconvolution Returns: desispec.FiberFlat object with attributes wave, fiberflat, ivar, mask, meanspec Notes: - we first iteratively : - compute a deconvolved mean spectrum - compute a fiber flat using the resolution convolved mean spectrum for each fiber - smooth the fiber flat along wavelength - clip outliers - then we compute a fiberflat at the native fiber resolution (not smoothed) - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum - the fiberflat is the ratio data/mean , so this flat should be divided to the data NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS, OUTLIER PIXELS, DEAD COLUMNS ... """ log = get_logger() log.info("starting") # # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i ) # # where # w = inverse variance # D = flux data (at the resolution of the fiber) # F = smooth fiber flat # R = resolution data # M = mean deconvolved spectrum # # M = A^{-1} B # with # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli) # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki # # defining R'_fi = sqrt(w_fi) F_fi R_fi # and D'_fi = sqrt(w_fi) D_fi # # A = sum_(fiber f) R'_f R'_f^T # B = sum_(fiber f) R'_f D'_f # (it's faster that way, and we try to use sparse matrices as much as possible) # #- Shortcuts nwave = frame.nwave nfibers = frame.nspec wave = frame.wave.copy() #- this will become part of output too flux = frame.flux.copy() ivar = frame.ivar * (frame.mask == 0) # iterative fitting and clipping to get precise mean spectrum # we first need to iterate to converge on a solution of mean spectrum # and smooth fiber flat. several interations are needed when # throughput AND resolution vary from fiber to fiber. # the end test is that the fiber flat has varied by less than accuracy # of previous iteration for all wavelength # we also have a max. number of iterations for this code nout_tot = 0 chi2pdf = 0. smooth_fiberflat = np.ones((flux.shape)) chi2 = np.zeros((flux.shape)) ## mask low sn portions w = flux * np.sqrt(ivar) < min_sn ivar[w] = 0 ## 0th pass: reject pixels according to minval and maxval mean_spectrum = np.zeros(flux.shape[1]) nbad = np.zeros(nfibers, dtype=int) for iteration in range(max_iterations): for i in range(flux.shape[1]): w = ivar[:, i] > 0 if w.sum() > 0: mean_spectrum[i] = np.median(flux[w, i]) nbad_it = 0 for fib in range(nfibers): w = ((flux[fib, :] < minval * mean_spectrum) | (flux[fib, :] > maxval * mean_spectrum)) & (ivar[fib, :] > 0) nbad_it += w.sum() nbad[fib] += w.sum() if w.sum() > 0: ivar[fib, w] = 0 log.warning("0th pass: masking {} pixels in fiber {}".format( w.sum(), fib)) if nbad[fib] >= max_bad: ivar[fib, :] = 0 log.warning( "0th pass: masking entire fiber {} (nbad={})".format( fib, nbad[fib])) if nbad_it == 0: break # 1st pass is median for spectrum, flat field without resolution # outlier rejection for iteration in range(max_iterations): # use median for spectrum mean_spectrum = np.zeros((flux.shape[1])) for i in range(flux.shape[1]): w = ivar[:, i] > 0 if w.sum() > 0: mean_spectrum[i] = np.median(flux[w, i]) nbad_it = 0 sum_chi2 = 0 # not more than max_rej_it pixels per fiber at a time for fib in range(nfibers): w = ivar[fib, :] > 0 if w.sum() == 0: continue F = flux[fib, :] * 0 w = (mean_spectrum != 0) & (ivar[fib, :] > 0) F[w] = flux[fib, w] / mean_spectrum[w] smooth_fiberflat[fib, :] = spline_fit( wave, wave[w], F[w], smoothing_res, ivar[fib, w] * mean_spectrum[w]**2) chi2 = ivar[fib, :] * (flux[fib, :] - mean_spectrum * smooth_fiberflat[fib, :])**2 w = np.isnan(chi2) bad = np.where(chi2 > nsig_clipping**2)[0] if bad.size > 0: if bad.size > max_rej_it: # not more than 5 pixels at a time ii = np.argsort(chi2[bad]) bad = bad[ii[-max_rej_it:]] ivar[fib, bad] = 0 log.warning( "1st pass: rejecting {} pixels from fiber {}".format( len(bad), fib)) nbad[fib] += len(bad) if nbad[fib] >= max_bad: ivar[fib, :] = 0 log.warning( "1st pass: rejecting fiber {} due to too many (new) bad pixels" .format(fib)) nbad_it += len(bad) sum_chi2 += chi2.sum() ndf = int((ivar > 0).sum() - nwave - nfibers * (nwave / smoothing_res)) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info( "1st pass iter #{} chi2={}/{} chi2pdf={} nout={} (nsig={})".format( iteration, sum_chi2, ndf, chi2pdf, nbad_it, nsig_clipping)) if nbad_it == 0: break ## flatten fiberflat ## normalize smooth_fiberflat: mean = np.ones(smooth_fiberflat.shape[1]) for i in range(smooth_fiberflat.shape[1]): w = ivar[:, i] > 0 if w.sum() > 0: mean[i] = np.median(smooth_fiberflat[w, i]) smooth_fiberflat = smooth_fiberflat / mean median_spectrum = mean_spectrum * 1. previous_smooth_fiberflat = smooth_fiberflat * 0 log.info("after 1st pass : nout = %d/%d" % (np.sum(ivar == 0), np.size(ivar.flatten()))) # 2nd pass is full solution including deconvolved spectrum, no outlier rejection for iteration in range(max_iterations): ## reset sum_chi2 sum_chi2 = 0 log.info("2nd pass, iter %d : mean deconvolved spectrum" % iteration) # fit mean spectrum A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr() B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # this is to go a bit faster sqrtwflat = np.sqrt(ivar) * smooth_fiberflat # loop on fiber to handle resolution (this is long) for fiber in range(nfibers): if fiber % 10 == 0: log.info("2nd pass, filling matrix, iter %d fiber %d" % (iteration, fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] SD.setdiag(sqrtwflat[fiber]) sqrtwflatR = SD * R # each row r of R is multiplied by sqrtwflat[r] A = A + (sqrtwflatR.T * sqrtwflatR).tocsr() B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber]) * flux[fiber]) A_pos_def = A.todense() log.info("deconvolving") w = A.diagonal() > 0 A_pos_def = A_pos_def[w, :] A_pos_def = A_pos_def[:, w] mean_spectrum = np.zeros(nwave) try: mean_spectrum[w] = cholesky_solve(A_pos_def, B[w]) except: mean_spectrum[w] = np.linalg.lstsq(A_pos_def, B[w])[0] log.info("cholesky failes, trying svd inverse in iter {}".format( iteration)) for fiber in range(nfibers): if np.sum(ivar[fiber] > 0) == 0: continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = R.dot(mean_spectrum) ok = (M != 0) & (ivar[fiber, :] > 0) if ok.sum() == 0: continue smooth_fiberflat[fiber] = spline_fit( wave, wave[ok], flux[fiber, ok] / M[ok], smoothing_res, ivar[fiber, ok] * M[ok]**2) * (ivar[fiber, :] * M**2 > 0) chi2 = ivar[fiber] * (flux[fiber] - smooth_fiberflat[fiber] * M)**2 sum_chi2 += chi2.sum() w = np.isnan(smooth_fiberflat[fiber]) if w.sum() > 0: ivar[fiber] = 0 smooth_fiberflat[fiber] = 1 # normalize to get a mean fiberflat=1 mean = np.ones(smooth_fiberflat.shape[1]) for i in range(nwave): w = ivar[:, i] > 0 if w.sum() > 0: mean[i] = np.median(smooth_fiberflat[w, i]) ok = np.where(mean != 0)[0] smooth_fiberflat[:, ok] /= mean[ok] # this is the max difference between two iterations max_diff = np.max( np.abs(smooth_fiberflat - previous_smooth_fiberflat) * (ivar > 0.)) previous_smooth_fiberflat = smooth_fiberflat.copy() ndf = int(np.sum(ivar > 0) - nwave - nfibers * (nwave / smoothing_res)) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f" % (iteration, sum_chi2, ndf, chi2pdf)) if max_diff < accuracy: break log.info( "2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating" % (iteration, max_diff, accuracy)) log.info("Total number of masked pixels=%d" % nout_tot) log.info("3rd pass, final computation of fiber flat") # now use mean spectrum to compute flat field correction without any smoothing # because sharp feature can arise if dead columns fiberflat = np.ones((flux.shape)) fiberflat_ivar = np.zeros((flux.shape)) mask = np.zeros((flux.shape), dtype='uint32') # reset ivar ivar = frame.ivar fiberflat_mask = 12 # place holder for actual mask bit when defined nsig_for_mask = nsig_clipping # only mask out N sigma outliers for fiber in range(nfibers): if np.sum(ivar[fiber] > 0) == 0: continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = np.array(np.dot(R.todense(), mean_spectrum)).flatten() fiberflat[fiber] = (M != 0) * flux[fiber] / (M + (M == 0)) + (M == 0) fiberflat_ivar[fiber] = ivar[fiber] * M**2 nbad_tot = 0 iteration = 0 while iteration < 500: w = fiberflat_ivar[fiber, :] > 0 if w.sum() < 100: break smooth_fiberflat = spline_fit(wave, wave[w], fiberflat[fiber, w], smoothing_res, fiberflat_ivar[fiber, w]) chi2 = fiberflat_ivar[fiber] * (fiberflat[fiber] - smooth_fiberflat)**2 bad = np.where(chi2 > nsig_for_mask**2)[0] if bad.size > 0: nbadmax = 1 if bad.size > nbadmax: # not more than nbadmax pixels at a time ii = np.argsort(chi2[bad]) bad = bad[ii[-nbadmax:]] mask[fiber, bad] += fiberflat_mask fiberflat_ivar[fiber, bad] = 0. nbad_tot += bad.size else: break iteration += 1 log.info("3rd pass : fiber #%d , number of iterations %d" % (fiber, iteration)) # set median flat to 1 log.info("3rd pass : set median fiberflat to 1") mean = np.ones((flux.shape[1])) for i in range(flux.shape[1]): ok = np.where((mask[:, i] == 0) & (ivar[:, i] > 0))[0] if ok.size > 0: mean[i] = np.median(fiberflat[ok, i]) ok = np.where(mean != 0)[0] for fiber in range(nfibers): fiberflat[fiber, ok] /= mean[ok] log.info("3rd pass : interpolating over masked pixels") for fiber in range(nfibers): if np.sum(ivar[fiber] > 0) == 0: continue # replace bad by smooth fiber flat bad = np.where((mask[fiber] > 0) | (fiberflat_ivar[fiber] == 0) | (fiberflat[fiber] < minval) | (fiberflat[fiber] > maxval))[0] if bad.size > 0: fiberflat_ivar[fiber, bad] = 0 # find max length of segment with bad pix length = 0 for i in range(bad.size): ib = bad[i] ilength = 1 tmp = ib for jb in bad[i + 1:]: if jb == tmp + 1: ilength += 1 tmp = jb else: break length = max(length, ilength) if length > 10: log.info( "3rd pass : fiber #%d has a max length of bad pixels=%d" % (fiber, length)) smoothing_res = float(max(100, length)) x = np.arange(wave.size) ok = fiberflat_ivar[fiber] > 0 if ok.sum() == 0: continue try: smooth_fiberflat = spline_fit(x, x[ok], fiberflat[fiber, ok], smoothing_res, fiberflat_ivar[fiber, ok]) fiberflat[fiber, bad] = smooth_fiberflat[bad] except: fiberflat[fiber, bad] = 1 fiberflat_ivar[fiber, bad] = 0 if nbad_tot > 0: log.info( "3rd pass : fiber #%d masked pixels = %d (%d iterations)" % (fiber, nbad_tot, iteration)) # set median flat to 1 log.info("set median fiberflat to 1") mean = np.ones((flux.shape[1])) for i in range(flux.shape[1]): ok = np.where((mask[:, i] == 0) & (ivar[:, i] > 0))[0] if ok.size > 0: mean[i] = np.median(fiberflat[ok, i]) ok = np.where(mean != 0)[0] for fiber in range(nfibers): fiberflat[fiber, ok] /= mean[ok] log.info("done fiberflat") return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum, chi2pdf=chi2pdf)
def compute_fiberflat(frame, nsig_clipping=4.) : """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average. Input data are expected to be on the same wavelenght grid, with uncorrelated noise. They however do not have exactly the same resolution. args: frame (desispec.Frame): input Frame object with attributes wave, flux, ivar, resolution_data nsig_clipping : [optional] sigma clipping value for outlier rejection returns tuple (fiberflat, ivar, mask, meanspec): fiberflat : 2D[nwave, nflux] fiberflat (data have to be divided by this to be flatfielded) ivar : inverse variance of that fiberflat mask : 0=ok >0 if problems meanspec : deconvolved mean spectrum - we first iteratively : - compute a deconvolved mean spectrum - compute a fiber flat using the resolution convolved mean spectrum for each fiber - smooth the fiber flat along wavelength - clip outliers - then we compute a fiberflat at the native fiber resolution (not smoothed) - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum - the fiberflat is the ratio data/mean , so this flat should be divided to the data NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS, OUTLIER PIXELS, DEAD COLUMNS ... """ log=get_logger() log.info("starting") # # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i ) # # where # w = inverse variance # D = flux data (at the resolution of the fiber) # F = smooth fiber flat # R = resolution data # M = mean deconvolved spectrum # # M = A^{-1} B # with # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli) # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki # # defining R'_fi = sqrt(w_fi) F_fi R_fi # and D'_fi = sqrt(w_fi) D_fi # # A = sum_(fiber f) R'_f R'_f^T # B = sum_(fiber f) R'_f D'_f # (it's faster that way, and we try to use sparse matrices as much as possible) # #- Shortcuts nwave=frame.nwave nfibers=frame.nspec wave = frame.wave.copy() #- this will become part of output too flux = frame.flux ivar = frame.ivar # iterative fitting and clipping to get precise mean spectrum current_ivar=ivar.copy() smooth_fiberflat=np.ones((frame.flux.shape)) chi2=np.zeros((flux.shape)) sqrtwflat=np.sqrt(current_ivar)*smooth_fiberflat sqrtwflux=np.sqrt(current_ivar)*flux # test #nfibers=20 nout_tot=0 for iteration in range(20) : # fit mean spectrum A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d fiber %d"%(iteration,fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] # diagonal sparse matrix with content = sqrt(ivar)*flat SD.setdiag(sqrtwflat[fiber]) sqrtwflatR = SD*R # each row r of R is multiplied by sqrtwflat[r] A = A+(sqrtwflatR.T*sqrtwflatR).tocsr() B += sqrtwflatR.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) mean_spectrum=cholesky_solve(A.todense(),B) log.info("iter %d smoothing"%iteration) # fit smooth fiberflat and compute chi2 smoothing_res=100. #A for fiber in range(nfibers) : #if fiber%10==0 : # log.info("iter %d fiber %d (smoothing)"%(iteration,fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] #M = np.array(np.dot(R.todense(),mean_spectrum)).flatten() M = R.dot(mean_spectrum) F = flux[fiber]/(M+(M==0)) smooth_fiberflat[fiber]=spline_fit(wave,wave,F,smoothing_res,current_ivar[fiber]*(M!=0)) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-smooth_fiberflat[fiber]*M)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtwflat[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtwflat *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave-nfibers*(nwave/smoothing_res)) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) # normalize to get a mean fiberflat=1 mean=np.mean(smooth_fiberflat,axis=0) smooth_fiberflat = smooth_fiberflat/mean mean_spectrum = mean_spectrum*mean if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # now use mean spectrum to compute flat field correction without any smoothing # because sharp feature can arise if dead columns fiberflat=np.ones((flux.shape)) fiberflat_ivar=np.zeros((flux.shape)) mask=np.zeros((flux.shape)).astype(long) # SOMEONE CHECK THIS ! fiberflat_mask=12 # place holder for actual mask bit when defined nsig_for_mask=4 # only mask out 4 sigma outliers for fiber in range(nfibers) : ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = np.array(np.dot(R.todense(),mean_spectrum)).flatten() fiberflat[fiber] = (M!=0)*flux[fiber]/(M+(M==0)) + (M==0) fiberflat_ivar[fiber] = ivar[fiber]*M**2 smooth_fiberflat=spline_fit(wave,wave,fiberflat[fiber],smoothing_res,current_ivar[fiber]*M**2*(M!=0)) bad=np.where(fiberflat_ivar[fiber]*(fiberflat[fiber]-smooth_fiberflat)**2>nsig_for_mask**2)[0] if bad.size>0 : mask[fiber,bad] += fiberflat_mask return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum)
def compute_fiberflat(frame, nsig_clipping=10., accuracy=5.e-4, minval=0.1, maxval=10.,max_iterations=100,smoothing_res=5.,max_bad=100,max_rej_it=5,min_sn=0,diag_epsilon=1e-3) : """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average. Input data are expected to be on the same wavelength grid, with uncorrelated noise. They however do not have exactly the same resolution. Args: frame (desispec.Frame): input Frame object with attributes wave, flux, ivar, resolution_data nsig_clipping : [optional] sigma clipping value for outlier rejection accuracy : [optional] accuracy of fiberflat (end test for the iterative loop) minval: [optional] mask pixels with flux < minval * median fiberflat. maxval: [optional] mask pixels with flux > maxval * median fiberflat. max_iterations: [optional] maximum number of iterations smoothing_res: [optional] spacing between spline fit nodes for smoothing the fiberflat max_bad: [optional] mask entire fiber if more than max_bad-1 initially unmasked pixels are masked during the iterations max_rej_it: [optional] reject at most the max_rej_it worst pixels in each iteration min_sn: [optional] mask portions with signal to noise less than min_sn diag_epsilon: [optional] size of the regularization term in the deconvolution Returns: desispec.FiberFlat object with attributes wave, fiberflat, ivar, mask, meanspec Notes: - we first iteratively : - compute a deconvolved mean spectrum - compute a fiber flat using the resolution convolved mean spectrum for each fiber - smooth the fiber flat along wavelength - clip outliers - then we compute a fiberflat at the native fiber resolution (not smoothed) - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum - the fiberflat is the ratio data/mean , so this flat should be divided to the data NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS, OUTLIER PIXELS, DEAD COLUMNS ... """ log=get_logger() log.info("starting") # # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i ) # # where # w = inverse variance # D = flux data (at the resolution of the fiber) # F = smooth fiber flat # R = resolution data # M = mean deconvolved spectrum # # M = A^{-1} B # with # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli) # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki # # defining R'_fi = sqrt(w_fi) F_fi R_fi # and D'_fi = sqrt(w_fi) D_fi # # A = sum_(fiber f) R'_f R'_f^T # B = sum_(fiber f) R'_f D'_f # (it's faster that way, and we try to use sparse matrices as much as possible) # #- Shortcuts nwave=frame.nwave nfibers=frame.nspec wave = frame.wave.copy() #- this will become part of output too flux = frame.flux.copy() ivar = frame.ivar*(frame.mask==0) # iterative fitting and clipping to get precise mean spectrum # we first need to iterate to converge on a solution of mean spectrum # and smooth fiber flat. several interations are needed when # throughput AND resolution vary from fiber to fiber. # the end test is that the fiber flat has varied by less than accuracy # of previous iteration for all wavelength # we also have a max. number of iterations for this code nout_tot=0 chi2pdf = 0. smooth_fiberflat=np.ones((flux.shape)) chi2=np.zeros((flux.shape)) ## mask low sn portions w = flux*np.sqrt(ivar)<min_sn ivar[w]=0 ## 0th pass: reject pixels according to minval and maxval mean_spectrum = np.zeros(flux.shape[1]) nbad=np.zeros(nfibers,dtype=int) for iteration in range(max_iterations): for i in range(flux.shape[1]): w = ivar[:,i]>0 if w.sum()>0: mean_spectrum[i] = np.median(flux[w,i]) nbad_it=0 for fib in range(nfibers): w = ((flux[fib,:]<minval*mean_spectrum) | (flux[fib,:]>maxval*mean_spectrum)) & (ivar[fib,:]>0) nbad_it+=w.sum() nbad[fib]+=w.sum() if w.sum()>0: ivar[fib,w]=0 log.warning("0th pass: masking {} pixels in fiber {}".format(w.sum(),fib)) if nbad[fib]>=max_bad: ivar[fib,:]=0 log.warning("0th pass: masking entire fiber {} (nbad={})".format(fib,nbad[fib])) if nbad_it == 0: break # 1st pass is median for spectrum, flat field without resolution # outlier rejection for iteration in range(max_iterations) : # use median for spectrum mean_spectrum=np.zeros((flux.shape[1])) for i in range(flux.shape[1]) : w=ivar[:,i]>0 if w.sum() > 0 : mean_spectrum[i]=np.median(flux[w,i]) nbad_it=0 sum_chi2 = 0 # not more than max_rej_it pixels per fiber at a time for fib in range(nfibers) : w=ivar[fib,:]>0 if w.sum()==0: continue F = flux[fib,:]*0 w=(mean_spectrum!=0) & (ivar[fib,:]>0) F[w]= flux[fib,w]/mean_spectrum[w] try : smooth_fiberflat[fib,:] = spline_fit(wave,wave[w],F[w],smoothing_res,ivar[fib,w]*mean_spectrum[w]**2,max_resolution=1.5*smoothing_res) except ValueError as err : log.error("Error when smoothing the flat") log.error("Setting ivar=0 for fiber {} because spline fit failed".format(fib)) ivar[fib,:] *= 0 chi2 = ivar[fib,:]*(flux[fib,:]-mean_spectrum*smooth_fiberflat[fib,:])**2 w=np.isnan(chi2) bad=np.where(chi2>nsig_clipping**2)[0] if bad.size>0 : if bad.size>max_rej_it : # not more than 5 pixels at a time ii=np.argsort(chi2[bad]) bad=bad[ii[-max_rej_it:]] ivar[fib,bad] = 0 log.warning("1st pass: rejecting {} pixels from fiber {}".format(len(bad),fib)) nbad[fib]+=len(bad) if nbad[fib]>=max_bad: ivar[fib,:]=0 log.warning("1st pass: rejecting fiber {} due to too many (new) bad pixels".format(fib)) nbad_it+=len(bad) sum_chi2+=chi2.sum() ndf=int((ivar>0).sum()-nwave-nfibers*(nwave/smoothing_res)) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("1st pass iter #{} chi2={}/{} chi2pdf={} nout={} (nsig={})".format(iteration,sum_chi2,ndf,chi2pdf,nbad_it,nsig_clipping)) if nbad_it == 0 : break ## flatten fiberflat ## normalize smooth_fiberflat: mean=np.ones(smooth_fiberflat.shape[1]) for i in range(smooth_fiberflat.shape[1]): w=ivar[:,i]>0 if w.sum()>0: mean[i]=np.median(smooth_fiberflat[w,i]) smooth_fiberflat = smooth_fiberflat/mean median_spectrum = mean_spectrum*1. previous_smooth_fiberflat = smooth_fiberflat*0 previous_max_diff = 0. log.info("after 1st pass : nout = %d/%d"%(np.sum(ivar==0),np.size(ivar.flatten()))) # 2nd pass is full solution including deconvolved spectrum, no outlier rejection for iteration in range(max_iterations) : ## reset sum_chi2 sum_chi2=0 log.info("2nd pass, iter %d : mean deconvolved spectrum"%iteration) # fit mean spectrum A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # this is to go a bit faster sqrtwflat=np.sqrt(ivar)*smooth_fiberflat # loop on fiber to handle resolution (this is long) for fiber in range(nfibers) : if fiber%10==0 : log.info("2nd pass, filling matrix, iter %d fiber %d"%(iteration,fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] SD.setdiag(sqrtwflat[fiber]) sqrtwflatR = SD*R # each row r of R is multiplied by sqrtwflat[r] A = A+(sqrtwflatR.T*sqrtwflatR).tocsr() B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber])*flux[fiber]) A_pos_def = A.todense() log.info("deconvolving") w = A.diagonal() > 0 A_pos_def = A_pos_def[w,:] A_pos_def = A_pos_def[:,w] mean_spectrum = np.zeros(nwave) try: mean_spectrum[w]=cholesky_solve(A_pos_def,B[w]) except: mean_spectrum[w]=np.linalg.lstsq(A_pos_def,B[w])[0] log.info("cholesky failes, trying svd inverse in iter {}".format(iteration)) for fiber in range(nfibers) : if np.sum(ivar[fiber]>0)==0 : continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = R.dot(mean_spectrum) ok=(M!=0) & (ivar[fiber,:]>0) if ok.sum()==0: continue try : smooth_fiberflat[fiber] = spline_fit(wave,wave[ok],flux[fiber,ok]/M[ok],smoothing_res,ivar[fiber,ok]*M[ok]**2,max_resolution=1.5*smoothing_res)*(ivar[fiber,:]*M**2>0) except ValueError as err : log.error("Error when smoothing the flat") log.error("Setting ivar=0 for fiber {} because spline fit failed".format(fiber)) ivar[fiber,:] *= 0 chi2 = ivar[fiber]*(flux[fiber]-smooth_fiberflat[fiber]*M)**2 sum_chi2 += chi2.sum() w=np.isnan(smooth_fiberflat[fiber]) if w.sum()>0: ivar[fiber]=0 smooth_fiberflat[fiber]=1 # normalize to get a mean fiberflat=1 mean = np.ones(smooth_fiberflat.shape[1]) for i in range(nwave): w = ivar[:,i]>0 if w.sum()>0: mean[i]=np.median(smooth_fiberflat[w,i]) ok=np.where(mean!=0)[0] smooth_fiberflat[:,ok] /= mean[ok] # this is the max difference between two iterations max_diff=np.max(np.abs(smooth_fiberflat-previous_smooth_fiberflat)*(ivar>0.)) previous_smooth_fiberflat=smooth_fiberflat.copy() ndf=int(np.sum(ivar>0)-nwave-nfibers*(nwave/smoothing_res)) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f"%(iteration,sum_chi2,ndf,chi2pdf)) if max_diff<accuracy : break if np.abs(max_diff-previous_max_diff)<accuracy*0.1 : log.warning("no significant improvement on max diff, quit loop") break previous_max_diff=max_diff log.info("2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating"%(iteration,max_diff,accuracy)) log.info("Total number of masked pixels=%d"%nout_tot) log.info("3rd pass, final computation of fiber flat") # now use mean spectrum to compute flat field correction without any smoothing # because sharp feature can arise if dead columns fiberflat=np.ones((flux.shape)) fiberflat_ivar=np.zeros((flux.shape)) mask=np.zeros((flux.shape), dtype='uint32') # reset ivar ivar=frame.ivar fiberflat_mask=12 # place holder for actual mask bit when defined nsig_for_mask=nsig_clipping # only mask out N sigma outliers for fiber in range(nfibers) : if np.sum(ivar[fiber]>0)==0 : continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = np.array(np.dot(R.todense(),mean_spectrum)).flatten() fiberflat[fiber] = (M!=0)*flux[fiber]/(M+(M==0)) + (M==0) fiberflat_ivar[fiber] = ivar[fiber]*M**2 nbad_tot=0 iteration=0 while iteration<500 : w=fiberflat_ivar[fiber,:]>0 if w.sum()<100: break smooth_fiberflat=spline_fit(wave,wave[w],fiberflat[fiber,w],smoothing_res,fiberflat_ivar[fiber,w]) chi2=fiberflat_ivar[fiber]*(fiberflat[fiber]-smooth_fiberflat)**2 bad=np.where(chi2>nsig_for_mask**2)[0] if bad.size>0 : nbadmax=1 if bad.size>nbadmax : # not more than nbadmax pixels at a time ii=np.argsort(chi2[bad]) bad=bad[ii[-nbadmax:]] mask[fiber,bad] += fiberflat_mask fiberflat_ivar[fiber,bad] = 0. nbad_tot += bad.size else : break iteration += 1 log.info("3rd pass : fiber #%d , number of iterations %d"%(fiber,iteration)) # set median flat to 1 log.info("3rd pass : set median fiberflat to 1") mean=np.ones((flux.shape[1])) for i in range(flux.shape[1]) : ok=np.where((mask[:,i]==0)&(ivar[:,i]>0))[0] if ok.size > 0 : mean[i] = np.median(fiberflat[ok,i]) ok=np.where(mean!=0)[0] for fiber in range(nfibers) : fiberflat[fiber,ok] /= mean[ok] log.info("3rd pass : interpolating over masked pixels") for fiber in range(nfibers) : if np.sum(ivar[fiber]>0)==0 : continue # replace bad by smooth fiber flat bad=np.where((mask[fiber]>0)|(fiberflat_ivar[fiber]==0)|(fiberflat[fiber]<minval)|(fiberflat[fiber]>maxval))[0] if bad.size>0 : fiberflat_ivar[fiber,bad] = 0 # find max length of segment with bad pix length=0 for i in range(bad.size) : ib=bad[i] ilength=1 tmp=ib for jb in bad[i+1:] : if jb==tmp+1 : ilength +=1 tmp=jb else : break length=max(length,ilength) if length>10 : log.info("3rd pass : fiber #%d has a max length of bad pixels=%d"%(fiber,length)) smoothing_res=float(max(100,length)) x=np.arange(wave.size) ok=fiberflat_ivar[fiber]>0 if ok.sum()==0: continue try: smooth_fiberflat=spline_fit(x,x[ok],fiberflat[fiber,ok],smoothing_res,fiberflat_ivar[fiber,ok]) fiberflat[fiber,bad] = smooth_fiberflat[bad] except: fiberflat[fiber,bad] = 1 fiberflat_ivar[fiber,bad]=0 if nbad_tot>0 : log.info("3rd pass : fiber #%d masked pixels = %d (%d iterations)"%(fiber,nbad_tot,iteration)) # set median flat to 1 log.info("set median fiberflat to 1") mean=np.ones((flux.shape[1])) for i in range(flux.shape[1]) : ok=np.where((mask[:,i]==0)&(ivar[:,i]>0))[0] if ok.size > 0 : mean[i] = np.median(fiberflat[ok,i]) ok=np.where(mean!=0)[0] for fiber in range(nfibers) : fiberflat[fiber,ok] /= mean[ok] log.info("done fiberflat") log.info("add a systematic error of 0.0035 to fiberflat variance (calibrated on sims)") fiberflat_ivar = (fiberflat_ivar>0)/( 1./ (fiberflat_ivar+(fiberflat_ivar==0) ) + 0.0035**2) return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum, chi2pdf=chi2pdf)
def polynomial_fit(z, ez, xx, yy, degx, degy): """ Computes and 2D polynomial fit of z as a function of (x,y) of degrees degx and degy Args: z : ND array ez : ND array of same shape as z, uncertainties on z x : ND array of same shape as z y : ND array of same shape as z degx : int (>=0), polynomial degree along x degy : int (>=0), polynomial degree along y Returns: coeff : 1D array of size (degx+1)*(degy+1) with polynomial coefficients (as defined by routine monomials) covariance : 2D array of covariance of coeff error_floor : float , extra uncertainty needed to get chi2/ndf=1 polval : ND array of same shape as z with values of pol(x,y) mask : ND array of same shape as z indicating the masked data points in the fit """ M = monomials(x=xx, y=yy, degx=degx, degy=degy) error_floor = 0. npar = M.shape[0] A = np.zeros((npar, npar)) B = np.zeros((npar)) mask = np.ones(z.shape).astype(int) for loop in range(100): # loop to increase errors w = 1. / (ez**2 + error_floor**2) w[mask == 0] = 0. A *= 0. B *= 0. for k in range(npar): B[k] = np.sum(w * z * M[k]) for l in range(k + 1): A[k, l] = np.sum(w * M[k] * M[l]) if l != k: A[l, k] = A[k, l] coeff = cholesky_solve(A, B) polval = M.T.dot(coeff) # compute rchi2 with median ndata = np.sum(w > 0) rchi2 = 1.4826 * np.median( np.sqrt(w) * np.abs(z - polval)) * ndata / float(ndata - npar) # std chi2 rchi2_std = np.sum(w * (z - polval)**2) / (ndata - npar) #print("#%d rchi2=%f rchi2_std=%f ngood=%d nbad=%d error floor=%f"%(loop,rchi2,rchi2_std,ndata,np.sum(w==0),error_floor)) # reject huge outliers nbad = 0 rvar = w * (z - polval)**2 worst = np.argmax(rvar) if rvar[worst] > 25 * max( rchi2, 1.2): # cap rchi2 if starting point is very bad #print("remove one bad measurement at %2.1f sigmas"%np.sqrt(rvar[worst])) mask[worst] = 0 nbad = 1 if rchi2 > 1: if nbad == 0 or loop > 5: error_floor += 0.002 if rchi2 <= 1. and nbad == 0: break # rerun chol. solve to get covariance coeff, covariance = cholesky_solve_and_invert(A, B) return coeff, covariance, error_floor, polval, mask
def compute_fiberflat(frame, nsig_clipping=4.): """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average. Input data are expected to be on the same wavelenght grid, with uncorrelated noise. They however do not have exactly the same resolution. args: frame (desispec.Frame): input Frame object with attributes wave, flux, ivar, resolution_data nsig_clipping : [optional] sigma clipping value for outlier rejection returns tuple (fiberflat, ivar, mask, meanspec): fiberflat : 2D[nwave, nflux] fiberflat (data have to be divided by this to be flatfielded) ivar : inverse variance of that fiberflat mask : 0=ok >0 if problems meanspec : deconvolved mean spectrum - we first iteratively : - compute a deconvolved mean spectrum - compute a fiber flat using the resolution convolved mean spectrum for each fiber - smooth the fiber flat along wavelength - clip outliers - then we compute a fiberflat at the native fiber resolution (not smoothed) - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum - the fiberflat is the ratio data/mean , so this flat should be divided to the data NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS, OUTLIER PIXELS, DEAD COLUMNS ... """ log = get_logger() log.info("starting") # # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i ) # # where # w = inverse variance # D = flux data (at the resolution of the fiber) # F = smooth fiber flat # R = resolution data # M = mean deconvolved spectrum # # M = A^{-1} B # with # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli) # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki # # defining R'_fi = sqrt(w_fi) F_fi R_fi # and D'_fi = sqrt(w_fi) D_fi # # A = sum_(fiber f) R'_f R'_f^T # B = sum_(fiber f) R'_f D'_f # (it's faster that way, and we try to use sparse matrices as much as possible) # #- Shortcuts nwave = frame.nwave nfibers = frame.nspec wave = frame.wave.copy() #- this will become part of output too flux = frame.flux ivar = frame.ivar # iterative fitting and clipping to get precise mean spectrum current_ivar = ivar.copy() smooth_fiberflat = np.ones((frame.flux.shape)) chi2 = np.zeros((flux.shape)) sqrtwflat = np.sqrt(current_ivar) * smooth_fiberflat sqrtwflux = np.sqrt(current_ivar) * flux # test #nfibers=20 nout_tot = 0 for iteration in range(20): # fit mean spectrum A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr() B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d fiber %d" % (iteration, fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] # diagonal sparse matrix with content = sqrt(ivar)*flat SD.setdiag(sqrtwflat[fiber]) sqrtwflatR = SD * R # each row r of R is multiplied by sqrtwflat[r] A = A + (sqrtwflatR.T * sqrtwflatR).tocsr() B += sqrtwflatR.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) mean_spectrum = cholesky_solve(A.todense(), B) log.info("iter %d smoothing" % iteration) # fit smooth fiberflat and compute chi2 smoothing_res = 100. #A for fiber in range(nfibers): #if fiber%10==0 : # log.info("iter %d fiber %d (smoothing)"%(iteration,fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] #M = np.array(np.dot(R.todense(),mean_spectrum)).flatten() M = R.dot(mean_spectrum) F = flux[fiber] / (M + (M == 0)) smooth_fiberflat[fiber] = spline_fit( wave, wave, F, smoothing_res, current_ivar[fiber] * (M != 0)) chi2[fiber] = current_ivar[fiber] * ( flux[fiber] - smooth_fiberflat[fiber] * M)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtwflat[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtwflat *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave - nfibers * (nwave / smoothing_res)) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) # normalize to get a mean fiberflat=1 mean = np.mean(smooth_fiberflat, axis=0) smooth_fiberflat = smooth_fiberflat / mean mean_spectrum = mean_spectrum * mean if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # now use mean spectrum to compute flat field correction without any smoothing # because sharp feature can arise if dead columns fiberflat = np.ones((flux.shape)) fiberflat_ivar = np.zeros((flux.shape)) mask = np.zeros((flux.shape)).astype(long) # SOMEONE CHECK THIS ! fiberflat_mask = 12 # place holder for actual mask bit when defined nsig_for_mask = 4 # only mask out 4 sigma outliers for fiber in range(nfibers): ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = np.array(np.dot(R.todense(), mean_spectrum)).flatten() fiberflat[fiber] = (M != 0) * flux[fiber] / (M + (M == 0)) + (M == 0) fiberflat_ivar[fiber] = ivar[fiber] * M**2 smooth_fiberflat = spline_fit(wave, wave, fiberflat[fiber], smoothing_res, current_ivar[fiber] * M**2 * (M != 0)) bad = np.where( fiberflat_ivar[fiber] * (fiberflat[fiber] - smooth_fiberflat)**2 > nsig_for_mask**2)[0] if bad.size > 0: mask[fiber, bad] += fiberflat_mask return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum)
def compute_polynomial_times_sky(frame, nsig_clipping=4., max_iterations=30, model_ivar=False, add_variance=True, angular_variation_deg=1, chromatic_variation_deg=1): """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar = None if model_ivar: log.info( "use a model of the inverse variance to remove bias due to correlated ivar and flux" ) input_ivar = current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar, axis=0) median_ivar_vs_fiber = np.median(current_ivar, axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]): threshold = 0.01 current_ivar[f] = median_ivar_vs_fiber[ f] / median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] # need focal plane coordinates x = frame.fibermap["FIBERASSIGN_X"] y = frame.fibermap["FIBERASSIGN_Y"] # normalize for numerical stability xm = np.mean(x) ym = np.mean(y) xs = np.std(x) ys = np.std(y) if xs == 0: xs = 1 if ys == 0: ys = 1 x = (x - xm) / xs y = (y - ym) / ys w = (frame.wave - frame.wave[0]) / (frame.wave[-1] - frame.wave[0]) * 2. - 1 # precompute the monomials for the sky fibers log.debug("compute monomials for deg={} and {}".format( angular_variation_deg, chromatic_variation_deg)) monomials = [] for dx in range(angular_variation_deg + 1): for dy in range(angular_variation_deg + 1 - dx): xypol = (x**dx) * (y**dy) for dw in range(chromatic_variation_deg + 1): wpol = w**dw monomials.append(np.outer(xypol, wpol)) ncoef = len(monomials) coef = np.zeros((ncoef)) allfibers_monomials = np.array(monomials) log.debug("shape of allfibers_monomials = {}".format( allfibers_monomials.shape)) skyfibers_monomials = allfibers_monomials[:, skyfibers, :] log.debug("shape of skyfibers_monomials = {}".format( skyfibers_monomials.shape)) sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) Pol = np.ones(flux.shape, dtype=float) coef[0] = 1. nout_tot = 0 previous_chi2 = -10. for iteration in range(max_iterations): # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky # the parameters are the unconvolved sky flux at the wavelength i # and the polynomial coefficients A = np.zeros((nwave, nwave), dtype=float) B = np.zeros((nwave), dtype=float) D = scipy.sparse.lil_matrix((nwave, nwave)) D2 = scipy.sparse.lil_matrix((nwave, nwave)) Pol /= coef[0] # force constant term to 1. # solving for the deconvolved mean sky spectrum # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber (1st fit) %d/%d" % (iteration, fiber, nfibers)) D.setdiag(sqrtw[fiber]) D2.setdiag(Pol[fiber]) sqrtwRP = D.dot(Rsky[fiber]).dot( D2) # each row r of R is multiplied by sqrtw[r] A += (sqrtwRP.T * sqrtwRP).todense() B += sqrtwRP.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) w = A.diagonal() > 0 A_pos_def = A[w, :] A_pos_def = A_pos_def[:, w] parameters = B * 0 try: parameters[w] = cholesky_solve(A_pos_def, B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format( iteration)) parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0] # parameters = the deconvolved mean sky spectrum # now evaluate the polynomial coefficients Ap = np.zeros((ncoef, ncoef), dtype=float) Bp = np.zeros((ncoef), dtype=float) D2.setdiag(parameters) for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber (2nd fit) %d/%d" % (iteration, fiber, nfibers)) D.setdiag(sqrtw[fiber]) sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot( skyfibers_monomials[:, fiber, :].T) Ap += sqrtwRSM.T.dot(sqrtwRSM) Bp += sqrtwRSM.T.dot(sqrtwflux[fiber]) # Add huge prior on zeroth angular order terms to converge faster # (because those terms are degenerate with the mean deconvolved spectrum) weight = 1e24 Ap[0, 0] += weight Bp[0] += weight # force 0th term to 1 for i in range(1, chromatic_variation_deg + 1): Ap[i, i] += weight # force other wavelength terms to 0 coef = cholesky_solve(Ap, Bp) log.info("pol coef = {}".format(coef)) # recompute the polynomial values Pol = skyfibers_monomials.T.dot(coef).T # chi2 and outlier rejection log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): chi2[fiber] = current_ivar[fiber] * ( flux[fiber] - Rsky[fiber].dot(Pol[fiber] * parameters))**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, abs(sum_chi2 - previous_chi2), nout_iter)) if nout_iter == 0 and abs(sum_chi2 - previous_chi2) < 0.2: break previous_chi2 = sum_chi2 + 0. log.info("nout tot=%d" % nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # we ignore here the fact that we have fit a angular variation, # so the sky model uncertainties are inaccurate log.info("compute the parameter covariance") try: parameter_covar = cholesky_invert(A) except np.linalg.linalg.LinAlgError: log.warning( "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv" ) parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data = np.mean(frame.resolution_data, axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar = Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var = np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar = (convolved_sky_var > 0) / (convolved_sky_var + (convolved_sky_var == 0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) Pol = allfibers_monomials.T.dot(coef).T for fiber in range(frame.nspec): cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber] * parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance: modified_cskyivar = _model_variance(frame, cskyflux, cskyivar, skyfibers) else: modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel( frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar=cskyivar) # keep a record of the statistical ivar for QA
def polynomial_fit(z,ez,xx,yy,degx,degy) : """ Computes and 2D polynomial fit of z as a function of (x,y) of degrees degx and degy Args: z : ND array ez : ND array of same shape as z, uncertainties on z x : ND array of same shape as z y : ND array of same shape as z degx : int (>=0), polynomial degree along x degy : int (>=0), polynomial degree along y Returns: coeff : 1D array of size (degx+1)*(degy+1) with polynomial coefficients (as defined by routine monomials) covariance : 2D array of covariance of coeff error_floor : float , extra uncertainty needed to get chi2/ndf=1 polval : ND array of same shape as z with values of pol(x,y) mask : ND array of same shape as z indicating the masked data points in the fit """ M=monomials(x=xx,y=yy,degx=degx,degy=degy) error_floor = 0. npar=M.shape[0] A=np.zeros((npar,npar)) B=np.zeros((npar)) mask=np.ones(z.shape).astype(int) for loop in range(100) : # loop to increase errors w=1./(ez**2+error_floor**2) w[mask==0]=0. A *= 0. B *= 0. for k in range(npar) : B[k]=np.sum(w*z*M[k]) for l in range(k+1) : A[k,l]=np.sum(w*M[k]*M[l]) if l!=k : A[l,k]=A[k,l] coeff=cholesky_solve(A,B) polval = M.T.dot(coeff) # compute rchi2 with median ndata=np.sum(w>0) rchi2=1.4826*np.median(np.sqrt(w)*np.abs(z-polval))*ndata/float(ndata-npar) # std chi2 rchi2_std = np.sum(w*(z-polval)**2)/(ndata-npar) #print("#%d rchi2=%f rchi2_std=%f ngood=%d nbad=%d error floor=%f"%(loop,rchi2,rchi2_std,ndata,np.sum(w==0),error_floor)) # reject huge outliers nbad=0 rvar=w*(z-polval)**2 worst=np.argmax(rvar) if rvar[worst] > 25*max(rchi2,1.2) : # cap rchi2 if starting point is very bad #print("remove one bad measurement at %2.1f sigmas"%np.sqrt(rvar[worst])) mask[worst]=0 nbad=1 if rchi2>1 : if nbad==0 or loop>5 : error_floor+=0.002 if rchi2<=1. and nbad==0 : break # rerun chol. solve to get covariance coeff,covariance=cholesky_solve_and_invert(A,B) return coeff,covariance,error_floor,polval,mask
def compute_non_uniform_sky(frame, nsig_clipping=4.,max_iterations=10,model_ivar=False,add_variance=True,angular_variation_deg=1) : """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... ) Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance angular_variation_deg : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] # need focal plane coordinates of fibers x = frame.fibermap["DESIGN_X"][skyfibers] y = frame.fibermap["DESIGN_Y"][skyfibers] # normalize for numerical stability xm = np.mean(frame.fibermap["DESIGN_X"]) ym = np.mean(frame.fibermap["DESIGN_Y"]) xs = np.std(frame.fibermap["DESIGN_X"]) ys = np.std(frame.fibermap["DESIGN_Y"]) if xs==0 : xs = 1 if ys==0 : ys = 1 x = (x-xm)/xs y = (y-ym)/ys # precompute the monomials for the sky fibers log.debug("compute monomials for deg={}".format(angular_variation_deg)) monomials=[] for dx in range(angular_variation_deg+1) : for dy in range(angular_variation_deg+1-dx) : monomials.append((x**dx)*(y**dy)) ncoef=len(monomials) monomials=np.array(monomials) input_ivar=None if model_ivar : log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux") input_ivar=current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar,axis=0) median_ivar_vs_fiber = np.median(current_ivar,axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]) : threshold=0.01 current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) nout_tot=0 for iteration in range(max_iterations) : # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky) # the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber # sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p) # sky(fiber,i) = sum_p monom[fiber,p] * a_ip # the convolved sky flux at wavelength w is # model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] * a_ip # model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip # # so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where # A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t # similarily # B[p] = sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave] A=np.zeros((nwave*ncoef,nwave*ncoef)) B=np.zeros((nwave*ncoef)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] #wRtR=(sqrtwR.T*sqrtwR).tocsr() wRtR=(sqrtwR.T*sqrtwR).todense() wRtF=sqrtwR.T*sqrtwflux[fiber] # loop on polynomial coefficients (double loop for A) # fill only blocks of A and B for p in range(ncoef) : for k in range(ncoef) : A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave] += monomials[p,fiber]*monomials[k,fiber]*wRtR B[p*nwave:(p+1)*nwave] += monomials[p,fiber]*wRtF log.info("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A[w,:] A_pos_def = A_pos_def[:,w] parameters = B*0 try: parameters[w]=cholesky_solve(A_pos_def,B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format(iteration)) parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0] log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : # loop on polynomial indices unconvolved_fiber_sky_flux = np.zeros(nwave) for p in range(ncoef) : unconvolved_fiber_sky_flux += monomials[p,fiber]*parameters[p*nwave:(p+1)*nwave] # then convolve fiber_convolved_sky_flux = Rsky[fiber].dot(unconvolved_fiber_sky_flux) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # is there a different method to compute this ? log.info("compute covariance") try : parameter_covar=cholesky_invert(A) except np.linalg.linalg.LinAlgError : log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv") parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data=np.mean(frame.resolution_data,axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") cskyflux = np.zeros(frame.flux.shape) cskyivar = np.zeros(frame.flux.shape) log.info("compute convolved parameter covariance") # The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave # A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k # We first sandwich each block with the average resolution. convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave)) for p in range(ncoef) : for k in range(ncoef) : convolved_parameter_covar[p,k] = np.diagonal(Rmean.dot(parameter_covar[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rmean.T.todense())) ''' import astropy.io.fits as pyfits pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True) # other approach log.info("dense Rmean...") Rmean=Rmean.todense() log.info("invert Rinv...") Rinv=np.linalg.inv(Rmean) # check this print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean))) convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave)) for p in range(ncoef) : for k in range(ncoef) : convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv)) # solve for each wave separately convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave)) for i in range(nwave) : print("inverting ivar of wave %d/%d"%(i,nwave)) convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i]) pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True) import sys sys.exit(12) ''' # Now we compute the sky model variance for each fiber individually # accounting for its focal plane coordinates # so that a target fiber distant for a sky fiber will naturally have a larger # sky model variance log.info("compute sky and variance per fiber") for i in range(frame.nspec): # compute monomials M = [] xi=(frame.fibermap["DESIGN_X"][i]-xm)/xs yi=(frame.fibermap["DESIGN_Y"][i]-ym)/ys for dx in range(angular_variation_deg+1) : for dy in range(angular_variation_deg+1-dx) : M.append((xi**dx)*(yi**dy)) M = np.array(M) unconvolved_fiber_sky_flux=np.zeros(nwave) convolved_fiber_skyvar=np.zeros(nwave) for p in range(ncoef) : unconvolved_fiber_sky_flux += M[p]*parameters[p*nwave:(p+1)*nwave] for k in range(ncoef) : convolved_fiber_skyvar += M[p]*M[k]*convolved_parameter_covar[p,k] # convolve sky model with this fiber's resolution cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux) # save inverse of variance cskyivar[i] = (convolved_fiber_skyvar>0)/(convolved_fiber_skyvar+(convolved_fiber_skyvar==0)) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance : modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers) else : modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
def main() : parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-i','--infile', type = str, default = None, required=True, help = 'path to zzbest.fits file') parser.add_argument('-o','--outfile', type = str, default = None, required=True, help = 'path to output json file') args = parser.parse_args() log = get_logger() hdulist=fits.open(args.infile) # find list of lines from table keywords keys=hdulist[1].columns.names table=hdulist[1].data ok=np.where(table["ZWARN"]==0)[0] table=table[ok] lines=[] for k in keys : if k.find("BEST_FLUX_")==0 and k.find("BEST_FLUX_ERR")<0 : numbers=re.findall(r'\d+',k) if len(numbers)==1 : lines.append(string.atoi(numbers[0])) lines=np.unique(np.array(lines)) log.info("lines in file: %s"%str(lines)) oIIline1=3727 oIIline2=3729 try : oIIflux=table["BEST_FLUX_%dA"%oIIline1]+table["BEST_FLUX_%dA"%oIIline2] oIIerr=np.sqrt(table["BEST_FLUX_%dA"%oIIline1]**2+table["BEST_FLUX_%dA"%oIIline2]**2) except KeyError : log.error("cannot compute oII flux") log.error(sys.exc_info()) sys.exit(12) # first step : compute an average set of line ratios # by scaling fluxes wrt to oII # we will then use this average set of line ratios to normalize # all entries and then start the pca selection=np.where((oIIflux>0)&(oIIerr>0))[0] if selection.size == 0 : log.error("no entry with valid oII flux") sys.exit(12) flux=np.zeros((selection.size,lines.size)) ivar=np.zeros((selection.size,lines.size)) for i in range(lines.size) : flux[:,i]=table["BEST_FLUX_%dA"%lines[i]][selection]/oIIflux[selection] var=(table["BEST_FLUX_ERR_%dA"%lines[i]][selection]/oIIflux[selection])**2 # account for error on oIIflux var += (flux[:,i]*oIIerr[selection]/oIIflux[selection])**2 mask=np.where(var>0)[0] ivar[mask,i]=1./var[mask] # test : do not weight with ivar because redshift dependence blurs the picture no_weight = True if no_weight : ivar=(ivar>0)/(0.001)**2 # this is the mean line ratios sivar=np.sum(ivar,axis=0) ok=np.where(sivar>0)[0] lines=lines[ok] mean_flux_wrt_oII=np.sum(ivar*flux,axis=0)[ok]/sivar[ok] err_flux_wrt_oII=1./np.sqrt(sivar[ok]) # refit the amp of each galaxy wrt to mean_flux_wrt_oII ngal=table.size log.info("number of galaxies = %d"%ngal) # fill array flux=np.zeros((ngal,lines.size)) ivar=np.zeros((ngal,lines.size)) for i in range(lines.size) : flux[:,i]=table["BEST_FLUX_%dA"%lines[i]] var=(table["BEST_FLUX_ERR_%dA"%lines[i]])**2 ok=np.where(var>0)[0] ivar[ok,i]=1./var[ok] if no_weight : ivar=(ivar>0)/(0.001)**2 # for each gal, fit scale and apply it a=np.sum(ivar*mean_flux_wrt_oII**2,axis=1) b=np.sum(ivar*mean_flux_wrt_oII*flux,axis=1) scale=b/(a+(a==0)) for i in range(ngal) : if scale[i] > 0 : flux[i] /= scale[i] ivar[i] *= scale[i]**2 else : flux[i]=0. ivar[i]=0. dchi2min=1. if no_weight : ivar=(ivar>0)/(0.001)**2 a = np.sum(ivar,axis=0) mean = np.sum(ivar*flux,axis=0)/a residuals=flux-mean tmpres=residuals.copy() # now we can try to do some sort of pca eigenvectors=np.zeros((lines.size,lines.size)) coefs=np.zeros((ngal,lines.size)) bb=np.zeros((lines.size)) aa=np.zeros((lines.size)) chi2=1e20 for e in range(lines.size) : eigenvectors[e]=np.ones(lines.size) # eigenvectors[e] /= np.sqrt(np.sum(eigenvectors[e]**2)) # orthogonalize for i in range(e) : prod=np.inner(eigenvectors[e],eigenvectors[i]) eigenvectors[e] -= eigenvectors[i] # normalize eigenvectors[e] /= np.sqrt(np.sum(eigenvectors[e]**2)) A=np.zeros((e+1,e+1)).astype(float) B=np.zeros((e+1)).astype(float) for loop in range(500) : # refit coordinates, including previous ones for g in range(ngal) : #log.debug("%d/%d"%(g,ngal)) A *= 0. B *= 0. for i in range(e+1) : B[i]=np.sum(ivar[g]*eigenvectors[i]*residuals[g]) for j in range(e+1) : A[i,j]=np.sum(ivar[g]*eigenvectors[i]*eigenvectors[j]) A[i,i] += 0.00001 # weak prior try : coefs[g,:e+1]=cholesky_solve(A,B) except : log.warning("cholesky_solve error") print "A=",A print "B=",B print "ivar=",ivar[g] print "eigenvectors[e]=",eigenvectors[e] sys.exit(12) log.warning(sys.exc_info()) coefs[g]=0. pass # update residuals tmpres[g] = residuals[g] for i in range(e) : tmpres[g] -= coefs[g,i]*eigenvectors[i] old=eigenvectors[e].copy() # refit this eigen vectors #tmpres = residuals.copy() for i in [e] : #range(e+1) : aa *= 0. bb *= 0. for l in range(lines.size) : bb[l]=np.sum(ivar[:,l]*coefs[:,i]*tmpres[:,l]) aa[l]=np.sum(ivar[:,l]*coefs[:,i]**2) newvect=(aa>0)*bb/(aa+(aa==0)) # orthogonalize for j in range(i) : prod=np.inner(newvect,eigenvectors[j]) newvect -= prod*eigenvectors[j] coefs[:,j] += prod*coefs[:,i] for g in range(ngal) : tmpres[g] -= prod*coefs[g,i]*eigenvectors[j] # normalize norme = np.sqrt(np.sum(newvect**2)) newvect /= norme coefs[:,i] *= norme eigenvectors[i]=newvect # update tmpres for g in range(ngal) : tmpres[g] -= coefs[g,i]*eigenvectors[i] oldchi2=chi2 chi2=np.sum(ivar*tmpres**2) ndf=np.sum(ivar>0)-(e+1) dchi2=oldchi2-chi2 dist=np.max(np.abs(old-eigenvectors[e])) if dist<1e-4 or dchi2<dchi2min : break for i in [e] : #range(e+1) : log.info("#%d-%d chi2=%f chi2/ndf=%f dchi2=%f %s"%(i,loop,chi2,chi2/ndf,dchi2,str(eigenvectors[i]))) fits.writeto("coefs.fits",coefs,clobber=True) log.info("wrote coefs.fits") file=open(args.outfile,"w") file.write('"pca":{\n') file.write('"lines": [') for l in lines : if l != lines[0] : file.write(",") file.write("%d"%l) file.write('],\n') file.write('"mean_flux": [') for e in range(eigenvectors.shape[0]) : if e>0 : file.write(",") file.write("%f"%mean[e]) file.write('],\n') file.write('"components": [\n') for e in range(eigenvectors.shape[0]) : file.write('[') for i in range(eigenvectors.shape[1]) : if i>0 : file.write(",") file.write("%f"%eigenvectors[e,i]) if e<eigenvectors.shape[0]-1 : file.write('],\n') else : file.write(']\n') file.write('],\n') file.write('"mean_coef": [') for e in range(eigenvectors.shape[0]) : if e>0 : file.write(",") file.write("%f"%np.mean(coefs[:,e])) file.write('],\n') file.write('"rms_coef": [') for e in range(eigenvectors.shape[0]) : if e>0 : file.write(",") file.write("%f"%np.std(coefs[:,e])) file.write('],\n') file.write('"min_coef": [') for e in range(eigenvectors.shape[0]) : if e>0 : file.write(",") file.write("%f"%np.min(coefs[:,e])) file.write('],\n') file.write('"max_coef": [') for e in range(eigenvectors.shape[0]) : if e>0 : file.write(",") file.write("%f"%np.max(coefs[:,e])) file.write(']\n') file.write('}\n') file.close() log.info("wrote %s"%args.outfile)
def compute_polynomial_times_sky(frame, nsig_clipping=4.,max_iterations=30,model_ivar=False,add_variance=True,angular_variation_deg=1,chromatic_variation_deg=1) : """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar=None if model_ivar : log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux") input_ivar=current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar,axis=0) median_ivar_vs_fiber = np.median(current_ivar,axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]) : threshold=0.01 current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] # need focal plane coordinates x = frame.fibermap["DESIGN_X"] y = frame.fibermap["DESIGN_Y"] # normalize for numerical stability xm = np.mean(x) ym = np.mean(y) xs = np.std(x) ys = np.std(y) if xs==0 : xs = 1 if ys==0 : ys = 1 x = (x-xm)/xs y = (y-ym)/ys w = (frame.wave-frame.wave[0])/(frame.wave[-1]-frame.wave[0])*2.-1 # precompute the monomials for the sky fibers log.debug("compute monomials for deg={} and {}".format(angular_variation_deg,chromatic_variation_deg)) monomials=[] for dx in range(angular_variation_deg+1) : for dy in range(angular_variation_deg+1-dx) : xypol = (x**dx)*(y**dy) for dw in range(chromatic_variation_deg+1) : wpol=w**dw monomials.append(np.outer(xypol,wpol)) ncoef=len(monomials) coef=np.zeros((ncoef)) allfibers_monomials=np.array(monomials) log.debug("shape of allfibers_monomials = {}".format(allfibers_monomials.shape)) skyfibers_monomials = allfibers_monomials[:,skyfibers,:] log.debug("shape of skyfibers_monomials = {}".format(skyfibers_monomials.shape)) sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) Pol = np.ones(flux.shape,dtype=float) coef[0] = 1. nout_tot=0 previous_chi2=-10. for iteration in range(max_iterations) : # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky # the parameters are the unconvolved sky flux at the wavelength i # and the polynomial coefficients A=np.zeros((nwave,nwave),dtype=float) B=np.zeros((nwave),dtype=float) D=scipy.sparse.lil_matrix((nwave,nwave)) D2=scipy.sparse.lil_matrix((nwave,nwave)) Pol /= coef[0] # force constant term to 1. # solving for the deconvolved mean sky spectrum # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber (1st fit) %d/%d"%(iteration,fiber,nfibers)) D.setdiag(sqrtw[fiber]) D2.setdiag(Pol[fiber]) sqrtwRP = D.dot(Rsky[fiber]).dot(D2) # each row r of R is multiplied by sqrtw[r] A += (sqrtwRP.T*sqrtwRP).todense() B += sqrtwRP.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A[w,:] A_pos_def = A_pos_def[:,w] parameters = B*0 try: parameters[w]=cholesky_solve(A_pos_def,B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format(iteration)) parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0] # parameters = the deconvolved mean sky spectrum # now evaluate the polynomial coefficients Ap=np.zeros((ncoef,ncoef),dtype=float) Bp=np.zeros((ncoef),dtype=float) D2.setdiag(parameters) for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber (2nd fit) %d/%d"%(iteration,fiber,nfibers)) D.setdiag(sqrtw[fiber]) sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot(skyfibers_monomials[:,fiber,:].T) Ap += sqrtwRSM.T.dot(sqrtwRSM) Bp += sqrtwRSM.T.dot(sqrtwflux[fiber]) # Add huge prior on zeroth angular order terms to converge faster # (because those terms are degenerate with the mean deconvolved spectrum) weight=1e24 Ap[0,0] += weight Bp[0] += weight # force 0th term to 1 for i in range(1,chromatic_variation_deg+1) : Ap[i,i] += weight # force other wavelength terms to 0 coef=cholesky_solve(Ap,Bp) log.info("pol coef = {}".format(coef)) # recompute the polynomial values Pol = skyfibers_monomials.T.dot(coef).T # chi2 and outlier rejection log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : chi2[fiber]=current_ivar[fiber]*(flux[fiber]-Rsky[fiber].dot(Pol[fiber]*parameters))**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,abs(sum_chi2-previous_chi2),nout_iter)) if nout_iter == 0 and abs(sum_chi2-previous_chi2)<0.2 : break previous_chi2 = sum_chi2+0. log.info("nout tot=%d"%nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # we ignore here the fact that we have fit a angular variation, # so the sky model uncertainties are inaccurate log.info("compute the parameter covariance") try : parameter_covar=cholesky_invert(A) except np.linalg.linalg.LinAlgError : log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv") parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data=np.mean(frame.resolution_data,axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var=np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) Pol = allfibers_monomials.T.dot(coef).T for fiber in range(frame.nspec): cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber]*parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance : modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers) else : modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
def compute_uniform_sky(frame, nsig_clipping=4.,max_iterations=100,model_ivar=False,add_variance=True) : """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy()*(frame.mask[skyfibers]==0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar=None if model_ivar : log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux") input_ivar=current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar,axis=0) median_ivar_vs_fiber = np.median(current_ivar,axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]) : threshold=0.01 current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) nout_tot=0 for iteration in range(max_iterations) : # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*sky # and the parameters are the unconvolved sky flux at the wavelength i # so, d(model)/di[fiber,w] = R[fiber][w,i] # this gives # A_ij = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j] # A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t # A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t # and # B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w] # B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave] #A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() A=np.zeros((nwave,nwave)) B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] A += (sqrtwR.T*sqrtwR).todense() B += sqrtwR.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A[w,:] A_pos_def = A_pos_def[:,w] parameters = B*0 try: parameters[w]=cholesky_solve(A_pos_def,B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format(iteration)) parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0] log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : # the parameters are directly the unconvolve sky flux # so we simply have to reconvolve it fiber_convolved_sky_flux = Rsky[fiber].dot(parameters) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar log.info("compute the parameter covariance") # we may have to use a different method to compute this # covariance try : parameter_covar=cholesky_invert(A) # the above is too slow # maybe invert per block, sandwich by R except np.linalg.linalg.LinAlgError : log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv") parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data=np.mean(frame.resolution_data,axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var=np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance : modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers) else : modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
def compute_sky(fframe,fibermap=None,nsig_clipping=4., apply_resolution=False): """ Adding in the offline algorithm here to be able to apply resolution for sky compute. We will update this here as needed for quicklook. The original weighted sky compute still is the default. Args: fframe: fiberflat fielded frame object fibermap: fibermap object apply_resolution: if True, uses the resolution in the frame object to evaluate sky allowing fiber to fiber variation of resolution. """ nspec=fframe.nspec nwave=fframe.nwave #- Check with fibermap. exit if None #- use fibermap from frame itself if exists if fframe.fibermap is not None: fibermap=fframe.fibermap if fibermap is None: print("Must have fibermap for Sky compute") sys.exit(0) #- get the sky skyfibers = np.where(fibermap['OBJTYPE'] == 'SKY')[0] skyfluxes=fframe.flux[skyfibers] skyivars=fframe.ivar[skyfibers] nfibers=len(skyfibers) if apply_resolution: max_iterations=100 current_ivar=skyivars.copy() Rsky = fframe.R[skyfibers] sqrtw=np.sqrt(skyivars) sqrtwflux=sqrtw*skyfluxes chi2=np.zeros(skyfluxes.shape) nout_tot=0 for iteration in range(max_iterations) : A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : print("iter %d fiber %d"%(iteration,fiber)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] A = A+(sqrtwR.T*sqrtwR).tocsr() B += sqrtwR.T*sqrtwflux[fiber] print("iter %d solving"%iteration) w = A.diagonal()>0 A_pos_def = A.todense()[w,:] A_pos_def = A_pos_def[:,w] skyflux = B*0 try: skyflux[w]=cholesky_solve(A_pos_def,B[w],rcond=None) except: print("cholesky failed, trying svd in iteration {}".format(iteration)) skyflux[w]=np.linalg.lstsq(A_pos_def,B[w],rcond=None)[0] print("iter %d compute chi2"%iteration) for fiber in range(nfibers) : S = Rsky[fiber].dot(skyflux) chi2[fiber]=current_ivar[fiber]*(skyfluxes[fiber]-S)**2 print("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf print("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break print("nout tot=%d"%nout_tot) # solve once again to get deconvolved sky variance #skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B) skyflux = np.linalg.lstsq(A.todense(),B,rcond=None)[0] skycovar = np.linalg.pinv(A.todense()) #- sky inverse variance, but incomplete and not needed anyway # skyvar=np.diagonal(skycovar) # skyivar=(skyvar>0)/(skyvar+(skyvar==0)) # Use diagonal of skycovar convolved with mean resolution of all fibers # first compute average resolution #- computing mean from matrix itself R= (fframe.R.sum()/fframe.nspec).todia() #mean_res_data=np.mean(fframe.resolution_data,axis=0) #R = Resolution(mean_res_data) # compute convolved sky and ivar cskycovar=R.dot(skycovar).dot(R.T.todense()) cskyvar=np.diagonal(cskycovar) cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0)) # convert cskyivar to 2D; today it is the same for all spectra, # but that may not be the case in the future finalskyivar = np.tile(cskyivar, nspec).reshape(nspec, nwave) # Convolved sky finalskyflux = np.zeros(fframe.flux.shape) for i in range(nspec): finalskyflux[i] = fframe.R[i].dot(skyflux) # need to do better here mask = (finalskyivar==0).astype(np.uint32) else: #- compute weighted average sky ignoring the fiber/wavelength resolution if skyfibers.shape[0] > 1: weights=skyivars #- now get weighted meansky and ivar meanskyflux=np.average(skyfluxes,axis=0,weights=weights) wtot=weights.sum(axis=0) werr2=(weights**2*(skyfluxes-meanskyflux)**2).sum(axis=0) werr=np.sqrt(werr2)/wtot meanskyivar=1./werr**2 else: meanskyflux=skyfluxes meanskyivar=skyivars #- Create a 2d- sky model replicating this finalskyflux=np.tile(meanskyflux,nspec).reshape(nspec,nwave) finalskyivar=np.tile(meanskyivar,nspec).reshape(nspec,nwave) mask=fframe.mask skymodel=SkyModel(fframe.wave,finalskyflux,finalskyivar,mask) return skymodel
def compute_sky(frame, nsig_clipping=4.): """Compute a sky model. Input has to correspond to sky fibers only. Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) #debug #nfibers=min(nfibers,2) nout_tot = 0 for iteration in range(20): A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr() B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d fiber %d" % (iteration, fiber)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD * R # each row r of R is multiplied by sqrtw[r] A = A + (sqrtwR.T * sqrtwR).tocsr() B += sqrtwR.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) skyflux = cholesky_solve(A.todense(), B) log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): S = Rsky[fiber].dot(skyflux) chi2[fiber] = current_ivar[fiber] * (flux[fiber] - S)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # solve once again to get deconvolved sky variance skyflux, skycovar = cholesky_solve_and_invert(A.todense(), B) #- sky inverse variance, but incomplete and not needed anyway # skyvar=np.diagonal(skycovar) # skyivar=(skyvar>0)/(skyvar+(skyvar==0)) # Use diagonal of skycovar convolved with mean resolution of all fibers # first compute average resolution mean_res_data = np.mean(frame.resolution_data, axis=0) R = Resolution(mean_res_data) # compute convolved sky and ivar cskycovar = R.dot(skycovar).dot(R.T.todense()) cskyvar = np.diagonal(cskycovar) cskyivar = (cskyvar > 0) / (cskyvar + (cskyvar == 0)) # convert cskyivar to 2D; today it is the same for all spectra, # but that may not be the case in the future cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave) # Convolved sky cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(skyflux) # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask, nrej=nout_tot)
def compute_fiberflat(frame, nsig_clipping=4., accuracy=5.e-4, minval=0.1, maxval=10.): """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average. Input data are expected to be on the same wavelength grid, with uncorrelated noise. They however do not have exactly the same resolution. Args: frame (desispec.Frame): input Frame object with attributes wave, flux, ivar, resolution_data nsig_clipping : [optional] sigma clipping value for outlier rejection accuracy : [optional] accuracy of fiberflat (end test for the iterative loop) Returns: desispec.FiberFlat object with attributes wave, fiberflat, ivar, mask, meanspec Notes: - we first iteratively : - compute a deconvolved mean spectrum - compute a fiber flat using the resolution convolved mean spectrum for each fiber - smooth the fiber flat along wavelength - clip outliers - then we compute a fiberflat at the native fiber resolution (not smoothed) - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum - the fiberflat is the ratio data/mean , so this flat should be divided to the data NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS, OUTLIER PIXELS, DEAD COLUMNS ... """ log = get_logger() log.info("starting") # # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i ) # # where # w = inverse variance # D = flux data (at the resolution of the fiber) # F = smooth fiber flat # R = resolution data # M = mean deconvolved spectrum # # M = A^{-1} B # with # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli) # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki # # defining R'_fi = sqrt(w_fi) F_fi R_fi # and D'_fi = sqrt(w_fi) D_fi # # A = sum_(fiber f) R'_f R'_f^T # B = sum_(fiber f) R'_f D'_f # (it's faster that way, and we try to use sparse matrices as much as possible) # #- Shortcuts nwave = frame.nwave nfibers = frame.nspec wave = frame.wave.copy() #- this will become part of output too flux = frame.flux ivar = frame.ivar * (frame.mask == 0) # iterative fitting and clipping to get precise mean spectrum # we first need to iterate to converge on a solution of mean spectrum # and smooth fiber flat. several interations are needed when # throughput AND resolution vary from fiber to fiber. # the end test is that the fiber flat has varied by less than accuracy # of previous iteration for all wavelength # we also have a max. number of iterations for this code max_iterations = 100 nout_tot = 0 chi2pdf = 0. smooth_fiberflat = np.ones((frame.flux.shape)) previous_smooth_fiberflat = smooth_fiberflat.copy() chi2 = np.zeros((flux.shape)) # 1st pass is median for spectrum, flat field without resolution # outlier rejection for iteration in range(max_iterations): # use median for spectrum mean_spectrum = np.zeros((flux.shape[1])) for i in range(flux.shape[1]): ok = np.where(ivar[:, i] > 0)[0] if ok.size > 0: mean_spectrum[i] = np.median(flux[ok, i]) # max pixels far from mean spectrum. #log.info("mask pixels with difference smaller than %f or larger than %f of mean") nout_iter = 0 for fiber in range(nfibers): bad = np.where((ivar[fiber] > 0) & ((flux[fiber] > maxval * mean_spectrum) | (flux[fiber] < minval * mean_spectrum)))[0] if bad.size > 100: log.warning( "masking fiber %d because of bad flat field with %d bad pixels" % (fiber, bad.size)) ivar[fiber] = 0. if bad.size > 0: log.warning("masking %d bad pixels for fiber %d" % (bad.size, fiber)) ivar[fiber, bad] = 0. nout_iter += bad.size # fit smooth fiberflat and compute chi2 smoothing_res = 100. #A for fiber in range(nfibers): if np.sum(ivar[fiber] > 0) == 0: continue F = np.ones((flux.shape[1])) ok = np.where((mean_spectrum != 0) & (ivar[fiber] > 0))[0] F[ok] = flux[fiber, ok] / mean_spectrum[ok] smooth_fiberflat[fiber] = spline_fit(wave, wave[ok], F[ok], smoothing_res, ivar[fiber, ok]) # normalize to get a mean fiberflat=1 mean = np.mean(smooth_fiberflat, axis=0) ok = np.where(mean != 0)[0] for fiber in range(nfibers): smooth_fiberflat[fiber, ok] = smooth_fiberflat[fiber, ok] / mean[ok] mean_spectrum *= mean # this is the max difference between two iterations max_diff = np.max( np.abs(smooth_fiberflat - previous_smooth_fiberflat) * (ivar > 0.)) previous_smooth_fiberflat = smooth_fiberflat.copy() # we don't start the rejection tests until we have converged on this if max_diff > 0.01: log.info( "1st pass, max diff. = %g > 0.01 , continue iterating before outlier rejection" % (max_diff)) continue chi2 = ivar * (flux - smooth_fiberflat * mean_spectrum)**2 if True: nsig_clipping_for_this_pass = nsig_clipping # not more than 5 pixels per fiber at a time for fiber in range(nfibers): for loop in range(max_iterations): bad = np.where( chi2[fiber] > nsig_clipping_for_this_pass**2)[0] if bad.size > 0: if bad.size > 5: # not more than 5 pixels at a time ii = np.argsort(chi2[fiber, bad]) bad = bad[ii[-5:]] ivar[fiber, bad] = 0 nout_iter += bad.size ok = np.where((mean_spectrum != 0) & (ivar[fiber] > 0))[0] F[ok] = flux[fiber, ok] / mean_spectrum[ok] smooth_fiberflat[fiber] = spline_fit( wave, wave[ok], F[ok], smoothing_res, ivar[fiber, ok]) chi2[fiber] = ivar[fiber] * ( flux[fiber] - smooth_fiberflat[fiber] * mean_spectrum)**2 else: break nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int( np.sum(chi2 > 0) - nwave - nfibers * (nwave / smoothing_res)) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info( "1st pass iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d (nsig=%f)" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter, nsig_clipping_for_this_pass)) if max_diff > accuracy: log.info( "1st pass iter #%d max diff. = %g > requirement = %g , continue iterating" % (iteration, max_diff, accuracy)) continue if nout_iter == 0: break log.info("after 1st pass : nout = %d/%d" % (np.sum(ivar == 0), np.size(ivar.flatten()))) # 2nd pass is full solution including deconvolved spectrum, no outlier rejection for iteration in range(max_iterations): log.info("2nd pass, iter %d : mean deconvolved spectrum" % iteration) # fit mean spectrum A = scipy.sparse.lil_matrix((nwave, nwave)).tocsr() B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # this is to go a bit faster sqrtwflat = np.sqrt(ivar) * smooth_fiberflat # loop on fiber to handle resolution (this is long) for fiber in range(nfibers): if fiber % 10 == 0: log.info("2nd pass, filling matrix, iter %d fiber %d" % (iteration, fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] SD.setdiag(sqrtwflat[fiber]) sqrtwflatR = SD * R # each row r of R is multiplied by sqrtwflat[r] A = A + (sqrtwflatR.T * sqrtwflatR).tocsr() B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber]) * flux[fiber]) mean_spectrum = cholesky_solve(A.todense(), B) # fit smooth fiberflat smoothing_res = 100. #A for fiber in range(nfibers): if np.sum(ivar[fiber] > 0) == 0: continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = R.dot(mean_spectrum) ok = np.where(M != 0)[0] smooth_fiberflat[fiber] = spline_fit(wave, wave[ok], flux[fiber, ok] / M[ok], smoothing_res, ivar[fiber, ok]) # normalize to get a mean fiberflat=1 mean = np.mean(smooth_fiberflat, axis=0) ok = np.where(mean != 0)[0] smooth_fiberflat[:, ok] /= mean[ok] mean_spectrum *= mean chi2 = ivar * (flux - smooth_fiberflat * mean_spectrum)**2 # this is the max difference between two iterations max_diff = np.max( np.abs(smooth_fiberflat - previous_smooth_fiberflat) * (ivar > 0.)) previous_smooth_fiberflat = smooth_fiberflat.copy() sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave - nfibers * (nwave / smoothing_res)) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f" % (iteration, sum_chi2, ndf, chi2pdf)) if max_diff < accuracy: break log.info( "2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating" % (iteration, max_diff, accuracy)) log.info("Total number of masked pixels=%d" % nout_tot) log.info("3rd pass, final computation of fiber flat") # now use mean spectrum to compute flat field correction without any smoothing # because sharp feature can arise if dead columns fiberflat = np.ones((flux.shape)) fiberflat_ivar = np.zeros((flux.shape)) mask = np.zeros((flux.shape), dtype='uint32') # reset ivar ivar = frame.ivar fiberflat_mask = 12 # place holder for actual mask bit when defined nsig_for_mask = nsig_clipping # only mask out N sigma outliers for fiber in range(nfibers): if np.sum(ivar[fiber] > 0) == 0: continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = np.array(np.dot(R.todense(), mean_spectrum)).flatten() fiberflat[fiber] = (M != 0) * flux[fiber] / (M + (M == 0)) + (M == 0) fiberflat_ivar[fiber] = ivar[fiber] * M**2 nbad_tot = 0 iteration = 0 while iteration < 500: smooth_fiberflat = spline_fit(wave, wave, fiberflat[fiber], smoothing_res, fiberflat_ivar[fiber]) chi2 = fiberflat_ivar[fiber] * (fiberflat[fiber] - smooth_fiberflat)**2 bad = np.where(chi2 > nsig_for_mask**2)[0] if bad.size > 0: if bad.size > 5: # not more than 5 pixels at a time ii = np.argsort(chi2[bad]) bad = bad[ii[-5:]] mask[fiber, bad] += fiberflat_mask fiberflat_ivar[fiber, bad] = 0. nbad_tot += bad.size else: break iteration += 1 # replace bad by smooth fiber flat bad = np.where((mask[fiber] > 0) | (fiberflat_ivar[fiber] == 0) | (fiberflat[fiber] < minval) | (fiberflat[fiber] > maxval))[0] if bad.size > 0: fiberflat_ivar[fiber, bad] = 0 # find max length of segment with bad pix length = 0 for i in range(bad.size): ib = bad[i] ilength = 1 tmp = ib for jb in bad[i + 1:]: if jb == tmp + 1: ilength += 1 tmp = jb else: break length = max(length, ilength) if length > 10: log.info( "3rd pass : fiber #%d has a max length of bad pixels=%d" % (fiber, length)) smoothing_res = float(max(100, 2 * length)) x = np.arange(wave.size) ok = np.where(fiberflat_ivar[fiber] > 0)[0] smooth_fiberflat = spline_fit(x, x[ok], fiberflat[fiber, ok], smoothing_res, fiberflat_ivar[fiber, ok]) fiberflat[fiber, bad] = smooth_fiberflat[bad] if nbad_tot > 0: log.info( "3rd pass : fiber #%d masked pixels = %d (%d iterations)" % (fiber, nbad_tot, iteration)) # set median flat to 1 log.info("set median fiberflat to 1") mean = np.ones((flux.shape[1])) for i in range(flux.shape[1]): ok = np.where((mask[:, i] == 0) & (ivar[:, i] > 0))[0] if ok.size > 0: mean[i] = np.median(fiberflat[ok, i]) ok = np.where(mean != 0)[0] for fiber in range(nfibers): fiberflat[fiber, ok] /= mean[ok] log.info("done fiberflat") return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum, chi2pdf=chi2pdf)
def compute_uniform_sky(frame, nsig_clipping=4., max_iterations=100, model_ivar=False, add_variance=True): """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] Flux[j] Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] input_ivar = None if model_ivar: log.info( "use a model of the inverse variance to remove bias due to correlated ivar and flux" ) input_ivar = current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar, axis=0) median_ivar_vs_fiber = np.median(current_ivar, axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]): threshold = 0.01 current_ivar[f] = median_ivar_vs_fiber[ f] / median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) nout_tot = 0 for iteration in range(max_iterations): # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # the model is model[fiber]=R[fiber]*sky # and the parameters are the unconvolved sky flux at the wavelength i # so, d(model)/di[fiber,w] = R[fiber][w,i] # this gives # A_ij = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j] # A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t # A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t # and # B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w] # B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave] #A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() A = np.zeros((nwave, nwave)) B = np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber %d/%d" % (iteration, fiber, nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD * R # each row r of R is multiplied by sqrtw[r] A += (sqrtwR.T * sqrtwR).todense() B += sqrtwR.T * sqrtwflux[fiber] log.info("iter %d solving" % iteration) w = A.diagonal() > 0 A_pos_def = A[w, :] A_pos_def = A_pos_def[:, w] parameters = B * 0 try: parameters[w] = cholesky_solve(A_pos_def, B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format( iteration)) parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0] log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): # the parameters are directly the unconvolve sky flux # so we simply have to reconvolve it fiber_convolved_sky_flux = Rsky[fiber].dot(parameters) chi2[fiber] = current_ivar[fiber] * (flux[fiber] - fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar log.info("compute the parameter covariance") # we may have to use a different method to compute this # covariance try: parameter_covar = cholesky_invert(A) # the above is too slow # maybe invert per block, sandwich by R except np.linalg.linalg.LinAlgError: log.warning( "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv" ) parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data = np.mean(frame.resolution_data, axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") # The parameters are directly the unconvolved sky # First convolve with average resolution : convolved_sky_covar = Rmean.dot(parameter_covar).dot(Rmean.T.todense()) # and keep only the diagonal convolved_sky_var = np.diagonal(convolved_sky_covar) # inverse convolved_sky_ivar = (convolved_sky_var > 0) / (convolved_sky_var + (convolved_sky_var == 0)) # and simply consider it's the same for all spectra cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave) # The sky model for each fiber (simple convolution with resolution of each fiber) cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(parameters) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance: modified_cskyivar = _model_variance(frame, cskyflux, cskyivar, skyfibers) else: modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel( frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar=cskyivar) # keep a record of the statistical ivar for QA
def compute_sky(frame, nsig_clipping=4.) : """Compute a sky model. Input has to correspond to sky fibers only. Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection returns SkyModel object with attributes wave, flux, ivar, mask """ log=get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave=frame.nwave nfibers=len(skyfibers) current_ivar=frame.ivar[skyfibers].copy() flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] sqrtw=np.sqrt(current_ivar) sqrtwflux=sqrtw*flux chi2=np.zeros(flux.shape) #debug #nfibers=min(nfibers,2) nout_tot=0 for iteration in range(20) : A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # loop on fiber to handle resolution for fiber in range(nfibers) : if fiber%10==0 : log.info("iter %d fiber %d"%(iteration,fiber)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r] A = A+(sqrtwR.T*sqrtwR).tocsr() B += sqrtwR.T*sqrtwflux[fiber] log.info("iter %d solving"%iteration) skyflux=cholesky_solve(A.todense(),B) log.info("iter %d compute chi2"%iteration) for fiber in range(nfibers) : S = Rsky[fiber].dot(skyflux) chi2[fiber]=current_ivar[fiber]*(flux[fiber]-S)**2 log.info("rejecting") nout_iter=0 if iteration<1 : # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0) selection=np.where(nout_per_wave>0)[0] for i in selection : worst_entry=np.argmax(chi2[:,i]) current_ivar[worst_entry,i]=0 sqrtw[worst_entry,i]=0 sqrtwflux[worst_entry,i]=0 nout_iter += 1 else : # remove all of them at once bad=(chi2>nsig_clipping**2) current_ivar *= (bad==0) sqrtw *= (bad==0) sqrtwflux *= (bad==0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter)) if nout_iter == 0 : break log.info("nout tot=%d"%nout_tot) # solve once again to get deconvolved sky variance skyflux,skycovar=cholesky_solve_and_invert(A.todense(),B) #- sky inverse variance, but incomplete and not needed anyway # skyvar=np.diagonal(skycovar) # skyivar=(skyvar>0)/(skyvar+(skyvar==0)) # Use diagonal of skycovar convolved with mean resolution of all fibers # first compute average resolution mean_res_data=np.mean(frame.resolution_data,axis=0) R = Resolution(mean_res_data) # compute convolved sky and ivar cskycovar=R.dot(skycovar).dot(R.T.todense()) cskyvar=np.diagonal(cskycovar) cskyivar=(cskyvar>0)/(cskyvar+(cskyvar==0)) # convert cskyivar to 2D; today it is the same for all spectra, # but that may not be the case in the future cskyivar = np.tile(cskyivar, frame.nspec).reshape(frame.nspec, nwave) # Convolved sky cskyflux = np.zeros(frame.flux.shape) for i in range(frame.nspec): cskyflux[i] = frame.R[i].dot(skyflux) # need to do better here mask = (cskyivar==0).astype(np.uint32) return SkyModel(frame.wave.copy(), cskyflux, cskyivar, mask, nrej=nout_tot)
def compute_non_uniform_sky(frame, nsig_clipping=4., max_iterations=10, model_ivar=False, add_variance=True, angular_variation_deg=1): """Compute a sky model. Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... ) Input flux are expected to be flatfielded! We don't check this in this routine. Args: frame : Frame object, which includes attributes - wave : 1D wavelength grid in Angstroms - flux : 2D flux[nspec, nwave] density - ivar : 2D inverse variance of flux - mask : 2D inverse mask flux (0=good) - resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers) nsig_clipping : [optional] sigma clipping value for outlier rejection Optional: max_iterations : int , number of iterations model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims. add_variance : evaluate calibration error and add this to the sky model variance angular_variation_deg : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength returns SkyModel object with attributes wave, flux, ivar, mask """ log = get_logger() log.info("starting") # Grab sky fibers on this frame skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0] assert np.max(skyfibers) < 500 #- indices, not fiber numbers nwave = frame.nwave nfibers = len(skyfibers) current_ivar = frame.ivar[skyfibers].copy() * (frame.mask[skyfibers] == 0) flux = frame.flux[skyfibers] Rsky = frame.R[skyfibers] # need focal plane coordinates of fibers x = frame.fibermap["FIBERASSIGN_X"][skyfibers] y = frame.fibermap["FIBERASSIGN_Y"][skyfibers] # normalize for numerical stability xm = np.mean(frame.fibermap["FIBERASSIGN_X"]) ym = np.mean(frame.fibermap["FIBERASSIGN_Y"]) xs = np.std(frame.fibermap["FIBERASSIGN_X"]) ys = np.std(frame.fibermap["FIBERASSIGN_Y"]) if xs == 0: xs = 1 if ys == 0: ys = 1 x = (x - xm) / xs y = (y - ym) / ys # precompute the monomials for the sky fibers log.debug("compute monomials for deg={}".format(angular_variation_deg)) monomials = [] for dx in range(angular_variation_deg + 1): for dy in range(angular_variation_deg + 1 - dx): monomials.append((x**dx) * (y**dy)) ncoef = len(monomials) monomials = np.array(monomials) input_ivar = None if model_ivar: log.info( "use a model of the inverse variance to remove bias due to correlated ivar and flux" ) input_ivar = current_ivar.copy() median_ivar_vs_wave = np.median(current_ivar, axis=0) median_ivar_vs_fiber = np.median(current_ivar, axis=1) median_median_ivar = np.median(median_ivar_vs_fiber) for f in range(current_ivar.shape[0]): threshold = 0.01 current_ivar[f] = median_ivar_vs_fiber[ f] / median_median_ivar * median_ivar_vs_wave # keep input ivar for very low weights ii = (input_ivar[f] <= (threshold * median_ivar_vs_wave)) #log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1])) current_ivar[f][ii] = input_ivar[f][ii] sqrtw = np.sqrt(current_ivar) sqrtwflux = sqrtw * flux chi2 = np.zeros(flux.shape) nout_tot = 0 for iteration in range(max_iterations): # the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters # A_ij = 1/2 d2(chi2)/di/dj # A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w] # the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters # B_i = 1/2 d(chi2)/di # B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w]) # with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky) # the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber # sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p) # sky(fiber,i) = sum_p monom[fiber,p] * a_ip # the convolved sky flux at wavelength w is # model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] * a_ip # model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip # # so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where # A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t # similarily # B[p] = sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave] A = np.zeros((nwave * ncoef, nwave * ncoef)) B = np.zeros((nwave * ncoef)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD = scipy.sparse.lil_matrix((nwave, nwave)) # loop on fiber to handle resolution for fiber in range(nfibers): if fiber % 10 == 0: log.info("iter %d sky fiber %d/%d" % (iteration, fiber, nfibers)) R = Rsky[fiber] # diagonal sparse matrix with content = sqrt(ivar) SD.setdiag(sqrtw[fiber]) sqrtwR = SD * R # each row r of R is multiplied by sqrtw[r] #wRtR=(sqrtwR.T*sqrtwR).tocsr() wRtR = (sqrtwR.T * sqrtwR).todense() wRtF = sqrtwR.T * sqrtwflux[fiber] # loop on polynomial coefficients (double loop for A) # fill only blocks of A and B for p in range(ncoef): for k in range(ncoef): A[p * nwave:(p + 1) * nwave, k * nwave:(k + 1) * nwave] += monomials[p, fiber] * monomials[k, fiber] * wRtR B[p * nwave:(p + 1) * nwave] += monomials[p, fiber] * wRtF log.info("iter %d solving" % iteration) w = A.diagonal() > 0 A_pos_def = A[w, :] A_pos_def = A_pos_def[:, w] parameters = B * 0 try: parameters[w] = cholesky_solve(A_pos_def, B[w]) except: log.info("cholesky failed, trying svd in iteration {}".format( iteration)) parameters[w] = np.linalg.lstsq(A_pos_def, B[w])[0] log.info("iter %d compute chi2" % iteration) for fiber in range(nfibers): # loop on polynomial indices unconvolved_fiber_sky_flux = np.zeros(nwave) for p in range(ncoef): unconvolved_fiber_sky_flux += monomials[ p, fiber] * parameters[p * nwave:(p + 1) * nwave] # then convolve fiber_convolved_sky_flux = Rsky[fiber].dot( unconvolved_fiber_sky_flux) chi2[fiber] = current_ivar[fiber] * (flux[fiber] - fiber_convolved_sky_flux)**2 log.info("rejecting") nout_iter = 0 if iteration < 1: # only remove worst outlier per wave # apply rejection iteratively, only one entry per wave among fibers # find waves with outlier (fastest way) nout_per_wave = np.sum(chi2 > nsig_clipping**2, axis=0) selection = np.where(nout_per_wave > 0)[0] for i in selection: worst_entry = np.argmax(chi2[:, i]) current_ivar[worst_entry, i] = 0 sqrtw[worst_entry, i] = 0 sqrtwflux[worst_entry, i] = 0 nout_iter += 1 else: # remove all of them at once bad = (chi2 > nsig_clipping**2) current_ivar *= (bad == 0) sqrtw *= (bad == 0) sqrtwflux *= (bad == 0) nout_iter += np.sum(bad) nout_tot += nout_iter sum_chi2 = float(np.sum(chi2)) ndf = int(np.sum(chi2 > 0) - nwave) chi2pdf = 0. if ndf > 0: chi2pdf = sum_chi2 / ndf log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d" % (iteration, sum_chi2, ndf, chi2pdf, nout_iter)) if nout_iter == 0: break log.info("nout tot=%d" % nout_tot) # we know have to compute the sky model for all fibers # and propagate the uncertainties # no need to restore the original ivar to compute the model errors when modeling ivar # the sky inverse variances are very similar # is there a different method to compute this ? log.info("compute covariance") try: parameter_covar = cholesky_invert(A) except np.linalg.linalg.LinAlgError: log.warning( "cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv" ) parameter_covar = np.linalg.pinv(A) log.info("compute mean resolution") # we make an approximation for the variance to save CPU time # we use the average resolution of all fibers in the frame: mean_res_data = np.mean(frame.resolution_data, axis=0) Rmean = Resolution(mean_res_data) log.info("compute convolved sky and ivar") cskyflux = np.zeros(frame.flux.shape) cskyivar = np.zeros(frame.flux.shape) log.info("compute convolved parameter covariance") # The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave # A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k # We first sandwich each block with the average resolution. convolved_parameter_covar = np.zeros((ncoef, ncoef, nwave)) for p in range(ncoef): for k in range(ncoef): convolved_parameter_covar[p, k] = np.diagonal( Rmean.dot(parameter_covar[p * nwave:(p + 1) * nwave, k * nwave:(k + 1) * nwave]).dot( Rmean.T.todense())) ''' import astropy.io.fits as pyfits pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True) # other approach log.info("dense Rmean...") Rmean=Rmean.todense() log.info("invert Rinv...") Rinv=np.linalg.inv(Rmean) # check this print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean))) convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave)) for p in range(ncoef) : for k in range(ncoef) : convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv)) # solve for each wave separately convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave)) for i in range(nwave) : print("inverting ivar of wave %d/%d"%(i,nwave)) convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i]) pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True) import sys sys.exit(12) ''' # Now we compute the sky model variance for each fiber individually # accounting for its focal plane coordinates # so that a target fiber distant for a sky fiber will naturally have a larger # sky model variance log.info("compute sky and variance per fiber") for i in range(frame.nspec): # compute monomials M = [] xi = (frame.fibermap["FIBERASSIGN_X"][i] - xm) / xs yi = (frame.fibermap["FIBERASSIGN_Y"][i] - ym) / ys for dx in range(angular_variation_deg + 1): for dy in range(angular_variation_deg + 1 - dx): M.append((xi**dx) * (yi**dy)) M = np.array(M) unconvolved_fiber_sky_flux = np.zeros(nwave) convolved_fiber_skyvar = np.zeros(nwave) for p in range(ncoef): unconvolved_fiber_sky_flux += M[p] * parameters[p * nwave:(p + 1) * nwave] for k in range(ncoef): convolved_fiber_skyvar += M[p] * M[ k] * convolved_parameter_covar[p, k] # convolve sky model with this fiber's resolution cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux) # save inverse of variance cskyivar[i] = (convolved_fiber_skyvar > 0) / ( convolved_fiber_skyvar + (convolved_fiber_skyvar == 0)) # look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1 if skyfibers.size > 1 and add_variance: modified_cskyivar = _model_variance(frame, cskyflux, cskyivar, skyfibers) else: modified_cskyivar = cskyivar.copy() # need to do better here mask = (cskyivar == 0).astype(np.uint32) return SkyModel( frame.wave.copy(), cskyflux, modified_cskyivar, mask, nrej=nout_tot, stat_ivar=cskyivar) # keep a record of the statistical ivar for QA
def compute_fiberflat(frame, nsig_clipping=4., accuracy=5.e-4, minval=0.1, maxval=10.) : """Compute fiber flat by deriving an average spectrum and dividing all fiber data by this average. Input data are expected to be on the same wavelength grid, with uncorrelated noise. They however do not have exactly the same resolution. Args: frame (desispec.Frame): input Frame object with attributes wave, flux, ivar, resolution_data nsig_clipping : [optional] sigma clipping value for outlier rejection accuracy : [optional] accuracy of fiberflat (end test for the iterative loop) Returns: desispec.FiberFlat object with attributes wave, fiberflat, ivar, mask, meanspec Notes: - we first iteratively : - compute a deconvolved mean spectrum - compute a fiber flat using the resolution convolved mean spectrum for each fiber - smooth the fiber flat along wavelength - clip outliers - then we compute a fiberflat at the native fiber resolution (not smoothed) - the routine returns the fiberflat, its inverse variance , mask, and the deconvolved mean spectrum - the fiberflat is the ratio data/mean , so this flat should be divided to the data NOTE THAT THIS CODE HAS NOT BEEN TESTED WITH ACTUAL FIBER TRANSMISSION VARIATIONS, OUTLIER PIXELS, DEAD COLUMNS ... """ log=get_logger() log.info("starting") # # chi2 = sum_(fiber f) sum_(wavelenght i) w_fi ( D_fi - F_fi (R_f M)_i ) # # where # w = inverse variance # D = flux data (at the resolution of the fiber) # F = smooth fiber flat # R = resolution data # M = mean deconvolved spectrum # # M = A^{-1} B # with # A_kl = sum_(fiber f) sum_(wavelenght i) w_fi F_fi^2 (R_fki R_fli) # B_k = sum_(fiber f) sum_(wavelenght i) w_fi D_fi F_fi R_fki # # defining R'_fi = sqrt(w_fi) F_fi R_fi # and D'_fi = sqrt(w_fi) D_fi # # A = sum_(fiber f) R'_f R'_f^T # B = sum_(fiber f) R'_f D'_f # (it's faster that way, and we try to use sparse matrices as much as possible) # #- Shortcuts nwave=frame.nwave nfibers=frame.nspec wave = frame.wave.copy() #- this will become part of output too flux = frame.flux ivar = frame.ivar*(frame.mask==0) # iterative fitting and clipping to get precise mean spectrum # we first need to iterate to converge on a solution of mean spectrum # and smooth fiber flat. several interations are needed when # throughput AND resolution vary from fiber to fiber. # the end test is that the fiber flat has varied by less than accuracy # of previous iteration for all wavelength # we also have a max. number of iterations for this code max_iterations = 100 nout_tot=0 chi2pdf = 0. smooth_fiberflat=np.ones((frame.flux.shape)) previous_smooth_fiberflat=smooth_fiberflat.copy() chi2=np.zeros((flux.shape)) # 1st pass is median for spectrum, flat field without resolution # outlier rejection for iteration in range(max_iterations) : # use median for spectrum mean_spectrum=np.zeros((flux.shape[1])) for i in range(flux.shape[1]) : ok=np.where(ivar[:,i]>0)[0] if ok.size > 0 : mean_spectrum[i]=np.median(flux[ok,i]) # max pixels far from mean spectrum. #log.info("mask pixels with difference smaller than %f or larger than %f of mean") nout_iter=0 for fiber in range(nfibers) : bad=np.where((ivar[fiber]>0)&((flux[fiber]>maxval*mean_spectrum)|(flux[fiber]<minval*mean_spectrum)))[0] if bad.size>100 : log.warning("masking fiber %d because of bad flat field with %d bad pixels"%(fiber,bad.size)) ivar[fiber]=0. if bad.size>0 : log.warning("masking %d bad pixels for fiber %d"%(bad.size,fiber)) ivar[fiber,bad]=0. nout_iter += bad.size # fit smooth fiberflat and compute chi2 smoothing_res=100. #A for fiber in range(nfibers) : if np.sum(ivar[fiber]>0)==0 : continue F = np.ones((flux.shape[1])) ok=np.where((mean_spectrum!=0)&(ivar[fiber]>0))[0] F[ok] = flux[fiber,ok]/mean_spectrum[ok] smooth_fiberflat[fiber]=spline_fit(wave,wave[ok],F[ok],smoothing_res,ivar[fiber,ok]) # normalize to get a mean fiberflat=1 mean=np.mean(smooth_fiberflat,axis=0) ok=np.where(mean!=0)[0] for fiber in range(nfibers) : smooth_fiberflat[fiber,ok] = smooth_fiberflat[fiber,ok]/mean[ok] mean_spectrum *= mean # this is the max difference between two iterations max_diff=np.max(np.abs(smooth_fiberflat-previous_smooth_fiberflat)*(ivar>0.)) previous_smooth_fiberflat=smooth_fiberflat.copy() # we don't start the rejection tests until we have converged on this if max_diff>0.01 : log.info("1st pass, max diff. = %g > 0.01 , continue iterating before outlier rejection"%(max_diff)) continue chi2=ivar*(flux-smooth_fiberflat*mean_spectrum)**2 if True : nsig_clipping_for_this_pass = nsig_clipping # not more than 5 pixels per fiber at a time for fiber in range(nfibers) : for loop in range(max_iterations) : bad=np.where(chi2[fiber]>nsig_clipping_for_this_pass**2)[0] if bad.size>0 : if bad.size>5 : # not more than 5 pixels at a time ii=np.argsort(chi2[fiber,bad]) bad=bad[ii[-5:]] ivar[fiber,bad] = 0 nout_iter += bad.size ok=np.where((mean_spectrum!=0)&(ivar[fiber]>0))[0] F[ok] = flux[fiber,ok]/mean_spectrum[ok] smooth_fiberflat[fiber]=spline_fit(wave,wave[ok],F[ok],smoothing_res,ivar[fiber,ok]) chi2[fiber]=ivar[fiber]*(flux[fiber]-smooth_fiberflat[fiber]*mean_spectrum)**2 else : break nout_tot += nout_iter sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave-nfibers*(nwave/smoothing_res)) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("1st pass iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d (nsig=%f)"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter,nsig_clipping_for_this_pass)) if max_diff>accuracy : log.info("1st pass iter #%d max diff. = %g > requirement = %g , continue iterating"%(iteration,max_diff,accuracy)) continue if nout_iter == 0 : break log.info("after 1st pass : nout = %d/%d"%(np.sum(ivar==0),np.size(ivar.flatten()))) # 2nd pass is full solution including deconvolved spectrum, no outlier rejection for iteration in range(max_iterations) : log.info("2nd pass, iter %d : mean deconvolved spectrum"%iteration) # fit mean spectrum A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr() B=np.zeros((nwave)) # diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber SD=scipy.sparse.lil_matrix((nwave,nwave)) # this is to go a bit faster sqrtwflat=np.sqrt(ivar)*smooth_fiberflat # loop on fiber to handle resolution (this is long) for fiber in range(nfibers) : if fiber%10==0 : log.info("2nd pass, filling matrix, iter %d fiber %d"%(iteration,fiber)) ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] SD.setdiag(sqrtwflat[fiber]) sqrtwflatR = SD*R # each row r of R is multiplied by sqrtwflat[r] A = A+(sqrtwflatR.T*sqrtwflatR).tocsr() B += sqrtwflatR.T.dot(np.sqrt(ivar[fiber])*flux[fiber]) mean_spectrum=cholesky_solve(A.todense(),B) # fit smooth fiberflat smoothing_res=100. #A for fiber in range(nfibers) : if np.sum(ivar[fiber]>0)==0 : continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = R.dot(mean_spectrum) ok=np.where(M!=0)[0] smooth_fiberflat[fiber]=spline_fit(wave,wave[ok],flux[fiber,ok]/M[ok],smoothing_res,ivar[fiber,ok]) # normalize to get a mean fiberflat=1 mean=np.mean(smooth_fiberflat,axis=0) ok=np.where(mean!=0)[0] smooth_fiberflat[:,ok] /= mean[ok] mean_spectrum *= mean chi2=ivar*(flux-smooth_fiberflat*mean_spectrum)**2 # this is the max difference between two iterations max_diff=np.max(np.abs(smooth_fiberflat-previous_smooth_fiberflat)*(ivar>0.)) previous_smooth_fiberflat=smooth_fiberflat.copy() sum_chi2=float(np.sum(chi2)) ndf=int(np.sum(chi2>0)-nwave-nfibers*(nwave/smoothing_res)) chi2pdf=0. if ndf>0 : chi2pdf=sum_chi2/ndf log.info("2nd pass, iter %d, chi2=%f ndf=%d chi2pdf=%f"%(iteration,sum_chi2,ndf,chi2pdf)) if max_diff<accuracy : break log.info("2nd pass, iter %d, max diff. = %g > requirement = %g, continue iterating"%(iteration,max_diff,accuracy)) log.info("Total number of masked pixels=%d"%nout_tot) log.info("3rd pass, final computation of fiber flat") # now use mean spectrum to compute flat field correction without any smoothing # because sharp feature can arise if dead columns fiberflat=np.ones((flux.shape)) fiberflat_ivar=np.zeros((flux.shape)) mask=np.zeros((flux.shape)).astype(long) # SOMEONE CHECK THIS ! # reset ivar ivar=frame.ivar fiberflat_mask=12 # place holder for actual mask bit when defined nsig_for_mask=nsig_clipping # only mask out N sigma outliers for fiber in range(nfibers) : if np.sum(ivar[fiber]>0)==0 : continue ### R = Resolution(resolution_data[fiber]) R = frame.R[fiber] M = np.array(np.dot(R.todense(),mean_spectrum)).flatten() fiberflat[fiber] = (M!=0)*flux[fiber]/(M+(M==0)) + (M==0) fiberflat_ivar[fiber] = ivar[fiber]*M**2 nbad_tot=0 iteration=0 while iteration<500 : smooth_fiberflat=spline_fit(wave,wave,fiberflat[fiber],smoothing_res,fiberflat_ivar[fiber]) chi2=fiberflat_ivar[fiber]*(fiberflat[fiber]-smooth_fiberflat)**2 bad=np.where(chi2>nsig_for_mask**2)[0] if bad.size>0 : if bad.size>5 : # not more than 5 pixels at a time ii=np.argsort(chi2[bad]) bad=bad[ii[-5:]] mask[fiber,bad] += fiberflat_mask fiberflat_ivar[fiber,bad] = 0. nbad_tot += bad.size else : break iteration += 1 # replace bad by smooth fiber flat bad=np.where((mask[fiber]>0)|(fiberflat_ivar[fiber]==0)|(fiberflat[fiber]<minval)|(fiberflat[fiber]>maxval))[0] if bad.size>0 : fiberflat_ivar[fiber,bad] = 0 # find max length of segment with bad pix length=0 for i in range(bad.size) : ib=bad[i] ilength=1 tmp=ib for jb in bad[i+1:] : if jb==tmp+1 : ilength +=1 tmp=jb else : break length=max(length,ilength) if length>10 : log.info("3rd pass : fiber #%d has a max length of bad pixels=%d"%(fiber,length)) smoothing_res=float(max(100,2*length)) x=np.arange(wave.size) ok=np.where(fiberflat_ivar[fiber]>0)[0] smooth_fiberflat=spline_fit(x,x[ok],fiberflat[fiber,ok],smoothing_res,fiberflat_ivar[fiber,ok]) fiberflat[fiber,bad] = smooth_fiberflat[bad] if nbad_tot>0 : log.info("3rd pass : fiber #%d masked pixels = %d (%d iterations)"%(fiber,nbad_tot,iteration)) # set median flat to 1 log.info("set median fiberflat to 1") mean=np.ones((flux.shape[1])) for i in range(flux.shape[1]) : ok=np.where((mask[:,i]==0)&(ivar[:,i]>0))[0] if ok.size > 0 : mean[i] = np.median(fiberflat[ok,i]) ok=np.where(mean!=0)[0] for fiber in range(nfibers) : fiberflat[fiber,ok] /= mean[ok] log.info("done fiberflat") return FiberFlat(wave, fiberflat, fiberflat_ivar, mask, mean_spectrum, chi2pdf=chi2pdf)