def w2p(dispersion, w, extrapolate=True): """ Convert wavelength values to pixels for a "dispersion solution". Parameters ---------- dispersion : 1D array The dispersion solution. This is basically just a 1D array of monotonically increasing (or decreasing) wavelengths. w : array Array of wavelength values to convert to pixels. extrapolate : bool, optional Extrapolate beyond the dispersion solution, if necessary. This is True by default. Returns ------- x : array Array of converted pixel values. Example ------- .. code-block:: python x = w2p(disp,w) """ x = interp1d(dispersion, np.arange(len(dispersion)), kind='cubic', bounds_error=False, fill_value=(np.nan, np.nan), assume_sorted=False)(w) # Need to extrapolate if ((np.min(w) < np.min(dispersion)) | (np.max(w) > np.max(dispersion))) & (extrapolate is True): win = dispersion xin = np.arange(len(dispersion)) si = np.argsort(win) win = win[si] xin = xin[si] npix = len(win) # At the beginning if (np.min(w) < np.min(dispersion)): #coef1 = dln.poly_fit(win[0:10], xin[0:10], 2) coef1 = dln.quadratic_coefficients(win[0:10], xin[0:10]) bd1, nbd1 = dln.where(w < np.min(dispersion)) x[bd1] = dln.poly(w[bd1], coef1) # At the end if (np.max(w) > np.max(dispersion)): #coef2 = dln.poly_fit(win[npix-10:], xin[npix-10:], 2) coef2 = dln.quadratic_coefficients(win[npix - 10:], xin[npix - 10:]) bd2, nbd2 = dln.where(w > np.max(dispersion)) x[bd2] = dln.poly(w[bd2], coef2) return x
def p2w(dispersion, x, extrapolate=True): """ Convert pixel values to wavelengths for a "dispersion solution". Parameters ---------- dispersion : 1D array The dispersion solution. This is basically just a 1D array of monotonically increasing (or decreasing) wavelengths. x : array Array of pixel values to convert to wavelengths. extrapolate : bool, optional Extrapolate beyond the dispersion solution, if necessary. This is True by default. Returns ------- w : array Array of converted wavelengths. Example ------- .. code-block:: python w = p2w(disp,x) """ npix = len(dispersion) w = interp1d(np.arange(len(dispersion)), dispersion, kind='cubic', bounds_error=False, fill_value=(np.nan, np.nan), assume_sorted=False)(x) # Need to extrapolate if ((np.min(x) < 0) | (np.max(x) > (npix - 1))) & (extrapolate is True): xin = np.arange(npix) win = dispersion # At the beginning if (np.min(x) < 0): #coef1 = dln.poly_fit(xin[0:10], win[0:10], 2) coef1 = dln.quadratic_coefficients(xin[0:10], win[0:10]) bd1, nbd1 = dln.where(x < 0) w[bd1] = dln.poly(x[bd1], coef1) # At the end if (np.max(x) > (npix - 1)): #coef2 = dln.poly_fit(xin[npix-10:], win[npix-10:], 2) coef2 = dln.quadratic_coefficients(xin[npix - 10:], win[npix - 10:]) bd2, nbd2 = dln.where(x > (npix - 1)) w[bd2] = dln.poly(x[bd2], coef2) return w
def replaceidlcode(lines, mjd, day=None): """ Replace IDL code in lines (array of strings) with the results of code execution. This is a small helper function for translate_idl_mjd5_script(). """ # day # psfid=day+138 # domeid=day+134 if day is not None: ind, nind = dln.where((lines.lower().find('day') > -1) & (lines.lower().startswith('day=') == False)) if nind > 0: lines[ind] = lines[ind].replace('day', str(day)) # indgen # ims=day+149+indgen(2) ind, nind = dln.where(lines.lower().find('indgen(') > -1) if nind > 0: lines[ind] = lines[ind].replace('indgen(', 'np.arange(') # Deal with assignment lines with code to execute ind, nind = dln.where(( (lines.lower().find('+') > -1) | (lines.lower().find('-') > -1) | (lines.lower().find('*') > -1) | (lines.lower().find('np.arange') > -1)) & (lines.lower().find('=') > -1) & (lines.lower().find('mkplan') == -1)) for i in range(nind): line1 = lines[ind[i]] lo = line1.find('=') key = line1[0:lo] val = eval(line1[lo + 1:]) if (type(val) is int) | (type(val) is str): lines[ind[i]] = key + '=' + str(val) else: lines[ind[i]] = key + '=' + str(list(val)) # Deal with mkplan lines with code to execute ind, nind = dln.where(( (lines.lower().find('+') > -1) | (lines.lower().find('-') > -1) | (lines.lower().find('*') > -1) | (lines.lower().find('np.arange') > -1)) & (lines.lower().find('=') > -1) & (lines.lower().find('mkplan') > -1)) for i in range(nind): line1 = lines[ind[i]] raise ValueError('This has not been implemented yet') return lines
def denoise(flux, err=None, wtype='db2'): """ Try to remove the noise using wavelet denoising.""" # sym9 seems to produce a smoother curve # db2 is also pretty good # harr and sb1 give more "blocky" results # Get shape if flux.ndim == 1: npix = len(flux) norder = 1 else: npix, norder = flux.shape # Loop over the orders out = np.zeros((npix, norder), float) for i in range(norder): if norder == 1: f = flux e = err else: f = flux[:, 0] e = err[:, 0] # Normalize medf = np.median(f) f /= medf e /= medf # Values must be finite smlen = np.minimum(31, int(np.round(0.1 * npix))) if smlen % 2 == 0: # smlen must be odd smlen += 1 bd, nbd = dln.where(~np.isfinite(f)) if nbd > 0: smf = dln.medfilt(f, smlen) f[bd] = smf[bd] sme = dln.medfilt(e, smlen) e[bd] = sme[bd] # Get median err if err is not None: mede = np.median(e) else: mede = dln.mad(f - dln.medfilt(f, 5)) # Do the denoising, and rescale with medf out[:, i] = medf * denoise_wavelet(f, mede, wavelet=wtype, multichannel=False, method='BayesShrink', mode='soft', rescale_sigma=True) # Only one order if norder == 1: out = out.flatten() return out
def parsecaldict(caldict,mjd): """ Small helper function for getcal() to select the entry in a calibration dictionary that is valid for a MJD.""" gd,ngd = dln.where( (mjd >= caldict['mjd1']) & (mjd <= caldict['mjd2']) ) if ngd>0: if ngd>1: gd = gd[-1] print('Multiple cal products found for mjd '+str(mjd)+' will use last: '+caldict['name'][gd]) return caldict['name'][gd] else: return None
def loadcaltype(lines,caltype,dt): """ A small helper function for readcal(). """ # Add a space at the end to make sure we are getting the right calibration type # e.g., "persist" and not "persistmodel" gd,ngd = dln.where(lines.find(caltype+' ') == 0) cat = None if ngd>0: cat = np.zeros(ngd,dtype=dt) for i in range(ngd): dum = lines[gd[i]].split() for j,n in enumerate(cat.dtype.names): cat[n][i] = dum[j+1] return cat
def maskdiscrepant(spec,model,nsig=10,verbose=False,logger=None): """ Mask pixels that are discrepant when compared to a model. """ if logger is None: logger = dln.basiclogger() spec2 = spec.copy() wave = spec2.wave.copy().reshape(spec2.npix,spec2.norder) # make 2D flux = spec2.flux.copy().reshape(spec2.npix,spec2.norder) # make 2D err = spec2.err.copy().reshape(spec2.npix,spec2.norder) # make 2D mask = spec2.mask.copy().reshape(spec2.npix,spec2.norder) # make 2D mflux = model.flux.copy().reshape(spec2.npix,spec2.norder) # make 2D totnbd = 0 for o in range(spec2.norder): w = wave[:,o].copy() x = (w-np.median(w))/(np.max(w*0.5)-np.min(w*0.5)) # -1 to +1 y = flux[:,o].copy() m = mask[:,o].copy() my = mflux[:,o].copy() # Divide by median medy = np.nanmedian(y) y /= medy my /= medy # Perform sigma clipping out large positive outliers coef = dln.poly_fit(x,y,2,robust=True) sig = dln.mad(y-my) bd,nbd = dln.where( np.abs(y-my) > nsig*sig ) totnbd += nbd if nbd>0: flux[bd,o] = dln.poly(x[bd],coef)*medy err[bd,o] = 1e30 mask[bd,o] = True # Flatten to 1D if norder=1 if spec2.norder==1: flux = flux.flatten() err = err.flatten() mask = mask.flatten() # Stuff back in spec2.flux = flux spec2.err = err spec2.mask = mask if verbose is True: logger.info('Masked '+str(totnbd)+' discrepant pixels') return spec2
def clean(self): """ Clean up bad input Gaussian sigma values.""" if self._sigma is not None: smlen = np.round(self.npix // 50).astype(int) if smlen == 0: smlen = 3 _sigma = self._sigma.reshape(self.npix, self.norder) # make 2D for o in range(self.norder): sig = _sigma[:, o] smsig = dln.gsmooth(sig, smlen) bd, nbd = dln.where(sig <= 0) if nbd > 0: sig[bd] = smsig[bd] if self.ndim == 2: self._sigma[:, o] = sig else: self._sigma = sig
def maskoutliers(spec,nsig=5,verbose=False,logger=None): """ Mask large positive outliers and negative flux pixels in the spectrum. """ if logger is None: logger = dln.basiclogger() spec2 = spec.copy() wave = spec2.wave.copy().reshape(spec2.npix,spec2.norder) # make 2D flux = spec2.flux.copy().reshape(spec2.npix,spec2.norder) # make 2D err = spec2.err.copy().reshape(spec2.npix,spec2.norder) # make 2D mask = spec2.mask.copy().reshape(spec2.npix,spec2.norder) # make 2D totnbd = 0 for o in range(spec2.norder): w = wave[:,o].copy() x = (w-np.median(w))/(np.max(w*0.5)-np.min(w*0.5)) # -1 to +1 y = flux[:,o].copy() m = mask[:,o].copy() # Divide by median medy = np.nanmedian(y) y /= medy # Perform sigma clipping out large positive outliers coef = dln.poly_fit(x,y,2,robust=True) sig = dln.mad(y-dln.poly(x,coef)) bd,nbd = dln.where( ((y-dln.poly(x,coef)) > nsig*sig) | (y<0)) totnbd += nbd if nbd>0: flux[bd,o] = dln.poly(x[bd],coef)*medy err[bd,o] = 1e30 mask[bd,o] = True # Flatten to 1D if norder=1 if spec2.norder==1: flux = flux.flatten() err = err.flatten() mask = mask.flatten() # Stuff back in spec2.flux = flux spec2.err = err spec2.mask = mask if verbose is True: logger.info('Masked '+str(totnbd)+' outlier or negative pixels') return spec2
def maskdiscrepant(spec, model, nsig=10, verbose=False): """ Mask pixels that are discrepant when compared to a model. Parameters ---------- spec : Spec1D object Observed spectrum for which to mask discrepant values. model : Spec1D object Reference/model spectrum to use to find discrepant values. nsig : int, optional Number of standard deviations to use for the discrepant values. Default is 10.0. verbose : boolean, optional Verbose output. Default is False. Returns ------- spec2 : Spec1D object Spectrum with discrepant values masked. Example ------- .. code-block:: python spec = maskdiscrepant(spec,nsig=5) """ print = getprintfunc( ) # Get print function to be used locally, allows for easy logging spec2 = spec.copy() wave = spec2.wave.copy().reshape(spec2.npix, spec2.norder) # make 2D flux = spec2.flux.copy().reshape(spec2.npix, spec2.norder) # make 2D err = spec2.err.copy().reshape(spec2.npix, spec2.norder) # make 2D mask = spec2.mask.copy().reshape(spec2.npix, spec2.norder) # make 2D mflux = model.flux.copy().reshape(spec2.npix, spec2.norder) # make 2D totnbd = 0 for o in range(spec2.norder): w = wave[:, o].copy() x = (w - np.median(w)) / (np.max(w * 0.5) - np.min(w * 0.5) ) # -1 to +1 y = flux[:, o].copy() m = mask[:, o].copy() my = mflux[:, o].copy() # Divide by median medy = np.nanmedian(y) if medy <= 0.0: medy = 1.0 y /= medy my /= medy # Perform sigma clipping out large positive outliers coef = dln.poly_fit(x, y, 2, robust=True) sig = dln.mad(y - my) bd, nbd = dln.where(np.abs(y - my) > nsig * sig) totnbd += nbd if nbd > 0: flux[bd, o] = dln.poly(x[bd], coef) * medy err[bd, o] = 1e30 mask[bd, o] = True # Flatten to 1D if norder=1 if spec2.norder == 1: flux = flux.flatten() err = err.flatten() mask = mask.flatten() # Stuff back in spec2.flux = flux spec2.err = err spec2.mask = mask if verbose is True: print('Masked ' + str(totnbd) + ' discrepant pixels') return spec2
def translate_idl_mjd5_script(scriptfile): """ Translate an IDL MJD5.pro script file to yaml. It returns a list of strings that can be written to a file. Parameters ---------- scriptfile : str Name of MJD5.pro script file. Returns ------- flines : numpy char array The lines of the script file translated to yaml. Examples -------- flines = mkplan.translate_idl_mjd5_script('apo25m_59085.pro') Example file, top part of apo25m_59085.pro apsetver,telescope='apo25m' mjd=59085 plate=11950 psfid=35230030 fluxid=35230030 ims=[35230018,35230019,35230020,35230021,35230022,35230023,35230024,35230025,35230026,35230027,35230028,35230029] mkplan,ims,plate,mjd,psfid,fluxid,vers=vers ;these are not sky frames plate = 12767 psfid=35230015 fluxid=35230015 ims=[35230011,35230012,35230013,35230014] mkplan,ims,plate,mjd,psfid,fluxid,vers=vers;,/sky plate=12673 psfid=35230037 fluxid=35230037 ims=[35230033,35230034,35230035,35230036] mkplan,ims,plate,mjd,psfid,fluxid,vers=vers By D.Nidever, Oct 2020 """ # Check that the file exists if os.path.exists(scriptfile) == False: raise ValueError(scriptfile + " NOT FOUND") # Load the file lines = dln.readlines(scriptfile) lines = np.char.array(lines) # Fix continuation lines lines = fixidlcontinuation(lines) # Remove comments lines = removeidlcomments(lines) # Get telescope from apserver line ind, nind = dln.where(lines.strip().lower().find('apsetver') == 0) telescope = None if nind == 0: print('No APSERVER line found') if scriptfile.lower().find('apo25m') > -1: telescope = 'apo25m' if scriptfile.lower().find('lco25m') > -1: telescope = 'lcoo25m' if telescope is None: raise ValueError('Cannot find TELESCOPE') else: setverline = lines[ind[0]] telescope = setverline[setverline.lower().find('telescope=') + 10:] telescope = telescope.replace("'", "") telescopeline = "telescope: " + telescope # Get MJD ind, nind = dln.where(lines.strip().lower().find('mjd=') == 0) if nind == 0: raise ValueError('No MJD line found') mjdline = lines[ind[0]] mjd = int(mjdline[mjdline.find('=') + 1:]) mjdline = 'mjd: ' + str(mjd) # Get day number ind, nind = dln.where(lines.lower().find('day=') > -1) if nind > 0: dayline = lines[ind[0]].lower() # day=getnum(mjd)*10000 if dayline.lower().find('getnum') > -1: dayline = dayline.replace('getnum(mjd)', '(mjd-55562)') day = int(eval(dayline[dayline.find('=') + 1:])) else: day = None # Remove apvers, mjd and day line gd, ngd = dln.where((lines.strip('').lower().startswith('day=') == False) & (lines.strip('').lower().find('apsetver') == -1) & (lines.strip('').lower().startswith('mjd=') == False)) lines = lines[gd] # Deal with IDL code using day, indgen(), etc. lines = replaceidlcode(lines, mjd, day=day) # Initalize final lines flines = ['---'] # start of yaml file # Loop over mkplan blocks # mkplan command is at the end of the block ind, nind = dln.where(lines.lower().find('mkplan') != -1) for i in range(nind): if i == 0: lo = 0 else: lo = ind[i - 1] + 1 lines1 = lines[lo:ind[i] + 1] nlines1 = len(lines1) # Add TELESCOPE line flines.append("- " + telescopeline) # Add MJD line flines.append(" " + mjdline) # Assume all lines in this block except for mkplan are key: value pairs kvlines = lines1[0:-1] for kvl in kvlines: if kvl.strip() != '': lo = kvl.find('=') key = kvl[0:lo].strip() val = kvl[lo + 1:].strip() flines.append(" " + key + ": " + val) # Deal with mkplan lines planline = lines1[-1] # Trim off the first bit that's always the same, "mkplan,ims,plate,mjd,psfid,fluxid," planline = planline[planline.lower().find('fluxid') + 7:] # Remove vers=vers if it's there if planline.lower().find('vers=vers') == 0: planline = planline[9:] # Deal with keywords if planline != '': if planline[0] == ',': planline = planline[1:] # Add lines for sky, dark, cal if planline.lower().find('/sky') > -1: flines.append(' sky: True') planline = removeidlkeyword(planline, '/sky') # Trim off /sky if planline.lower().find('/dark') > -1: flines.append(' dark: True') planline = removeidlkeyword(planline, '/dark') # Trim off /dark if planline.lower().find('/cal') > -1: flines.append(' cal: True') planline = removeidlkeyword(planline, '/cal') # Trim off /cal # Deal with remaining arguments if planline != '': # Return leftover line as a dictionary import pdb pdb.set_trace() exec("args=args2dict(" + planline + ")") # Loop over keys and add them for k in args.keys(): val = args[k] if (type(val) is int) | (type(val) is str): flines.append(" " + k + ": " + str(val)) else: flines.append(" " + k + ": " + str(list(val))) # End of yaml file flines.append('...') return flines
def normalize(self, ncorder=6, perclevel=0.95): """ Normalize the spectrum. Parameters ---------- ncorder : float, optional Polynomial order to use for the continuum fitting. The default is 6. perclevel : float, optional Percent level (1.0 for 100%) to use for the continuum value in large bins. Default is 0.95. Returns ------- The flux and err arrays will normalized (divided) by the continuum and the continuum saved in cont. The normalized property is set to True. Examples -------- spec.normalize() """ self._flux = self.flux # Save the original #nspec, cont, masked = normspec(self,ncorder=ncorder,perclevel=perclevel) binsize = 0.10 perclevel = 90.0 wave = self.wave.copy().reshape(self.npix, self.norder) # make 2D flux = self.flux.copy().reshape(self.npix, self.norder) # make 2D err = self.err.copy().reshape(self.npix, self.norder) # make 2D mask = self.mask.copy().reshape(self.npix, self.norder) # make 2D cont = err.copy() * 0.0 + 1 for o in range(self.norder): w = wave[:, o].copy() x = (w - np.median(w)) / (np.max(w * 0.5) - np.min(w * 0.5) ) # -1 to +1 y = flux[:, o].copy() m = mask[:, o].copy() # Divide by median medy = np.nanmedian(y) y /= medy # Perform sigma clipping out large positive outliers coef = dln.poly_fit(x, y, 2, robust=True) sig = dln.mad(y - dln.poly(x, coef)) bd, nbd = dln.where((y - dln.poly(x, coef)) > 5 * sig) if nbd > 0: m[bd] = True gdmask = (y > 0) & (m == False ) # need positive fluxes and no mask set # Bin the data points xr = [np.nanmin(x), np.nanmax(x)] bins = np.ceil((xr[1] - xr[0]) / binsize) + 1 ybin, bin_edges, binnumber = bindata.binned_statistic( x[gdmask], y[gdmask], statistic='percentile', percentile=perclevel, bins=bins, range=None) xbin = bin_edges[0:-1] + 0.5 * binsize # Interpolate to full grid fnt = np.isfinite(ybin) cont1 = dln.interp(xbin[fnt], ybin[fnt], x, extrapolate=True) cont1 *= medy flux[:, o] /= cont1 err[:, o] /= cont1 cont[:, o] = cont1 # Flatten to 1D if norder=1 if self.norder == 1: flux = flux.flatten() err = err.flatten() cont = cont.flatten() # Stuff back in self.flux = flux self.err = err self.cont = cont self.normalized = True return
def query(self, table=None, cols='*', where=None, groupby=None, sql=None, fmt='numpy', verbose=False): """ Query the APOGEE DRP database. Parameters ---------- table : str, optional Name of table to query. Default is to use the apogee_drp schema, but table names with schema (e.g. catalogdb.gaia_dr2_source) can also be input. If the sql command is given directly, then this is not needed. cols : str, optional Comma-separated list of columns to return. Default is "*", all columns. where : str, optional Constraints on the selection. groupby : str, optional Column to group data by. sql : str, optional Enter the SQL command directly. fmt : str, optional The output format: -numpy: numpy structured array (default) -table: astropy table -list: list of tuples, first row has column names -raw: raw output, list of tuples verbose : bool, optional Print verbose output to screen. False by default. Returns ------- cat : numpy structured array The data in a catalog format. If raw=True then the data will be returned as a list of tuples. Examples -------- cat = db.query('visit',where="apogee_id='2M09241296+2723318'") cat = db.query(sql='select * from apgoee_drp.visit as v join catalogdb.something as c on v.apogee_id=c.2mass_type') """ cur = self.connection.cursor() # Simple table query if sql is None: # Schema if table.find('.') > -1: schema, tab = table.split('.') else: schema = 'apogee_drp' tab = table # Start the SELECT statement cmd = 'SELECT ' + cols + ' FROM ' + schema + '.' + tab # Add WHERE statement if where is not None: cmd += ' WHERE ' + where # Add GROUP BY statement if groupby is not None: cmd += ' GROUP BY ' + groupby # Execute the select command if verbose: print('CMD = ' + cmd) cur.execute(cmd) data = cur.fetchall() if len(data) == 0: cur.close() return np.array([]) # Return the raw results if fmt == 'raw': cur.close() return data # Get table column names and data types cur.execute( "select column_name,data_type from information_schema.columns where table_schema='" + schema + "' and table_name='" + tab + "'") head = cur.fetchall() cur.close() colnames = [h[0] for h in head] # Return fmt="list" format if fmt == 'list': data = [tuple(colnames)] + data cur.close() return data # Get numpy data types d2d = { 'smallint': np.int, 'integer': np.int, 'bigint': np.int, 'real': np.float32, 'double precision': np.float64, 'text': (np.str, 200), 'char': (np.str, 5), 'timestamp': (np.str, 50), 'timestamp with time zone': (np.str, 50), 'timestamp without time zone': (np.str, 50), 'boolean': np.bool } dt = [] for i, h in enumerate(head): if h[1] == 'ARRAY': # Get number if elements and type from the data itself shp = np.array(data[0][i]).shape type1 = np.array(data[0][i]).dtype.type dt.append((h[0], type1, shp)) else: dt.append((h[0], d2d[h[1]])) dtype = np.dtype(dt) # Convert to numpy structured array cat = np.zeros(len(data), dtype=dtype) cat[...] = data del (data) # SQL command input else: # Execute the command if verbose: print('CMD = ' + sql) cur.execute(sql) data = cur.fetchall() if len(data) == 0: cur.close() return np.array([]) # Return the raw results if fmt == 'raw': cur.close() return data # Return fmt="list" format if fmt == 'list': colnames = [desc[0] for desc in cur.description] data = [tuple(colnames)] + data cur.close() return data # Get table column names and data types colnames = [desc[0] for desc in cur.description] colnames = np.array(colnames) # Fix duplicate column names cindex = dln.create_index(colnames) bd, nbd = dln.where(cindex['num'] > 1) for i in range(nbd): ind = cindex['index'][cindex['lo'][bd[i]]:cindex['hi'][bd[i]] + 1] ind.sort() nind = len(ind) for j in np.arange(1, nind): colnames[ind[j]] += str(j + 1) # Use the data returned to get the type dt = [] for i, c in enumerate(colnames): type1 = type(data[0][i]) if type1 is str: dt.append((c, type(data[0][i]), 300)) elif type1 is list: # convert list to array nlist = len(data[0][i]) dtype1 = type(data[0][i][0]) dt.append((c, dtype1, nlist)) else: dt.append((c, type(data[0][i]))) dtype = np.dtype(dt) # Convert to numpy structured array cat = np.zeros(len(data), dtype=dtype) cat[...] = data del (data) # For string columns change size to maximum length of that column dt2 = [] names = dtype.names nplen = np.vectorize(len) needcopy = False for i in range(len(dtype)): type1 = type(cat[names[i]][0]) if type1 is str or type1 is np.str_: maxlen = np.max(nplen(cat[names[i]])) dt2.append((names[i], str, maxlen + 10)) needcopy = True else: dt2.append(dt[i]) # reuse dt value # We need to copy if needcopy == True: dtype2 = np.dtype(dt2) cat2 = np.zeros(len(cat), dtype=dtype2) for n in names: cat2[n] = cat[n] cat = cat2 del cat2 # Convert to astropy table if fmt == 'table': cat = Table(cat) return cat
def model_interp(teff, logg, metal, mtype='odfnew'): """ Interpolate Kurucz model.""" availteff = np.arange(27) * 250 + 3500.0 availlogg = np.arange(11) * .5 + 0. availmetal = np.arange(7) * 0.5 - 2.5 if mtype is None: mtype = 'odfnew' if mtype == 'old': availmetal = np.arange(13) * 0.5 - 5.0 if mtype == 'old': ntau = 64 else: ntau = 72 if mtype == 'odfnew' and teff > 10000: avail = Table.read(modeldir() + 'tefflogg.txt', format='ascii') avail['col1'].name = 'teff' avail['col2'].name = 'logg' availteff = avail['teff'].data availlogg = avail['logg'].data v1, nv1 = dln.where((abs(availteff - teff) < 0.1) & (abs(availlogg - logg) <= 0.001)) v2 = v1 v3, nv3 = dln.where(abs(availmetal - metal) <= 0.001) else: v1, nv1 = dln.where(abs(availteff - teff) <= .1) v2, nv2 = dln.where(abs(availlogg - logg) <= 0.001) v3, nv3 = dln.where(abs(availmetal - metal) <= 0.001) # Linear Interpolation teffimif = max(np.where(availteff <= teff)[0]) # immediately inferior Teff loggimif = max(np.where(availlogg <= logg)[0]) # immediately inferior logg metalimif = max( np.where(availmetal <= metal)[0]) #immediately inferior [Fe/H] teffimsu = min(np.where(availteff >= teff)[0]) # immediately superior Teff loggimsu = min(np.where(availlogg >= logg)[0]) # immediately superior logg metalimsu = min( np.where(availmetal >= metal)[0]) #immediately superior [Fe/H] if mtype == 'old': ncols = 7 else: ncols = 10 grid = np.zeros((2, 2, 2, ncols), dtype=np.float64) tm1 = availteff[teffimif] tp1 = availteff[teffimsu] lm1 = availlogg[loggimif] lp1 = availlogg[loggimsu] mm1 = availmetal[metalimif] mp1 = availmetal[metalimsu] if (tp1 != tm1): mapteff = (teff - tm1) / (tp1 - tm1) else: mapteff = 0.5 if (lp1 != lm1): maplogg = (logg - lm1) / (lp1 - lm1) else: maplogg = 0.5 if (mp1 != mm1): mapmetal = (metal - mm1) / (mp1 - mm1) else: mapmetal = 0.5 # Reading the corresponding models for i in np.arange(8) + 1: if i == 1: model, header, tail = read_kurucz(tm1, lm1, mm1, mtype=mtype) if i == 2: model, h, t = read_kurucz(tm1, lm1, mp1, mtype=mtype) if i == 3: model, h, t = read_kurucz(tm1, lp1, mm1, mtype=mtype) if i == 4: model, h, t = read_kurucz(tm1, lp1, mp1, mtype=mtype) if i == 5: model, h, t = read_kurucz(tp1, lm1, mm1, mtype=mtype) if i == 6: model, h, t = read_kurucz(tp1, lm1, mp1, mtype=mtype) if i == 7: model, h, t = read_kurucz(tp1, lp1, mm1, mtype=mtype) if i == 8: model, h, t = read_kurucz(tp1, lp1, mp1, mtype=mtype) if (len(model[0, :]) > ntau): m2 = np.zeros((ncols, ntau), dtype=np.float64) m2[0, :] = interpol(model[0, :], ntau) for j in range(ncols): m2[j, :] = interpol(model[j, :], model[0, :], m2[0, :]) model = m2 # getting the tauross scale rhox = model[0, :] kappaross = model[4, :] tauross = np.zeros(ntau, dtype=np.float64) tauross[0] = rhox[0] * kappaross[0] for ii in np.arange(ntau - 1) + 1: tauross[ii] = trapz(rhox[0:ii + 1], kappaross[0:ii + 1]) if i == 1: model1 = model tauross1 = tauross elif i == 2: model2 = model tauross2 = tauross elif i == 3: model3 = model tauross3 = tauross elif i == 4: model4 = model tauross4 = tauross elif i == 5: model5 = model tauross5 = tauross elif i == 6: model6 = model tauross6 = tauross elif i == 7: model7 = model tauross7 = tauross elif i == 8: model8 = model tauross8 = tauross else: print('% KMOD: i should be 1--8!') model = np.zeros((ncols, ntau), dtype=np.float64) # cleaning up for re-using the matrix # Defining the mass (RHOX#gr cm-2) sampling tauross = tauross1 # re-using the vector tauross bot_tauross = min([ tauross1[ntau - 1], tauross2[ntau - 1], tauross3[ntau - 1], tauross4[ntau - 1], tauross5[ntau - 1], tauross6[ntau - 1], tauross7[ntau - 1], tauross8[ntau - 1] ]) top_tauross = max([ tauross1[0], tauross2[0], tauross3[0], tauross4[0], tauross5[0], tauross6[0], tauross7[0], tauross8[0] ]) g, = np.where((tauross >= top_tauross) & (tauross <= bot_tauross)) tauross_new = dln.interp(np.linspace(0, 1, len(g)), tauross[g], np.linspace(0, 1, ntau), kind='linear') # Let's interpolate for every depth points = (np.arange(2), np.arange(2), np.arange(2)) for i in np.arange(ntau - 1) + 1: for j in range(ncols): grid[0, 0, 0, j] = dln.interp(tauross1[1:], model1[j, 1:], tauross_new[i], kind='linear') grid[0, 0, 1, j] = dln.interp(tauross2[1:], model2[j, 1:], tauross_new[i], kind='linear') grid[0, 1, 0, j] = dln.interp(tauross3[1:], model3[j, 1:], tauross_new[i], kind='linear') grid[0, 1, 1, j] = dln.interp(tauross4[1:], model4[j, 1:], tauross_new[i], kind='linear') grid[1, 0, 0, j] = dln.interp(tauross5[1:], model5[j, 1:], tauross_new[i], kind='linear') grid[1, 0, 1, j] = dln.interp(tauross6[1:], model6[j, 1:], tauross_new[i], kind='linear') grid[1, 1, 0, j] = dln.interp(tauross7[1:], model7[j, 1:], tauross_new[i], kind='linear') grid[1, 1, 1, j] = dln.interp(tauross8[1:], model8[j, 1:], tauross_new[i], kind='linear') model[j, i] = interpn(points, grid[:, :, :, j], (mapteff, maplogg, mapmetal), method='linear') for j in range(ncols): model[j, 0] = model[j, 1] * 0.999 # Editing the header header[0] = strput(header[0], '%7.0f' % teff, 4) header[0] = strput(header[0], '%8.5f' % logg, 21) tmpstr1 = header[1] tmpstr2 = header[4] if (metal < 0.0): if type == 'old': header[1] = strput(header[1], '-%3.1f' % abs(metal), 18) else: header[1] = strput(header[1], '-%3.1f' % abs(metal), 8) header[4] = strput(header[4], '%9.5f' % 10**metal, 16) else: if type == 'old': header[1] = strput(header[1], '+%3.1f' % abs(metal), 18) else: header[1] = strput(header[1], '+%3.1f' % abs(metal), 8) header[4] = strput(header[4], '%9.5f' % 10**metal, 16) header[22] = strput(header[22], '%2i' % ntau, 11) return model, header, tail
def mkmodel(teff, logg, metal, outfile=None, ntau=None, mtype='odfnew'): """ Extracts and if necessary interpolates (linearly) a kurucz model from his grid. The routine is intended for stars cooler than 10000 K. The grid was ftp'ed from CCP7. IN: teff - float - Effective temperature (K) logg - float - log(g) log_10 of the gravity (cm s-2) metal - float - [Fe/H] = log N(Fe)/N(H) - log N(Fe)/N(H)[Sun] OUT: outfile - string - name for the output file KEYWORD: ntau - returns the number of depth points in the output model type - by default, the k2odfnew grid is used ('type' is internally set to 'odfnew') but this keyword can be also set to 'old' or 'alpha' to use the old models from CCP7, or the ak2odfnew models ([alpha/Fe]=+0.4),respectively. C. Allende Prieto, UT, May 1999 bug fixed, UT, Aug 1999 bug fixed to avoid rounoff errors, keyword ntau added UT, April 2005 bug fixed, assignment of the right tauscale to each model (deltaT<1%), UT, March 2006 odfnew grids (type keyword), April 2006 """ # Constants h = 6.626176e-27 # erg s c = 299792458e2 # cm s-1 k = 1.380662e-16 # erg K-1 R = 1.097373177e-3 # A-1 e = 1.6021892e-19 # C mn = 1.6749543e-24 # gr HIP = 13.60e0 availteff = np.arange(27) * 250 + 3500.0 availlogg = np.arange(11) * .5 + 0. availmetal = np.arange(7) * 0.5 - 2.5 if mtype is None: mtype = 'odfnew' if mtype == 'old': availmetal = np.arange(13) * 0.5 - 5.0 if mtype == 'old': ntau = 64 else: ntau = 72 if mtype == 'odfnew' and teff > 10000: avail = Table.read(modeldir() + 'tefflogg.txt', format='ascii') avail['col1'].name = 'teff' avail['col2'].name = 'logg' availteff = avail['teff'].data availlogg = avail['logg'].data v1, nv1 = dln.where((np.abs(availteff - teff) < 0.1) & (np.abs(availlogg - logg) <= 0.001)) v2 = v1 nv2 = nv1 v3, nv3 = dln.where(abs(availmetal - metal) <= 0.001) else: v1, nv1 = dln.where(abs(availteff - teff) <= .1) v2, nv2 = dln.where(abs(availlogg - logg) <= 0.001) v3, nv3 = dln.where(abs(availmetal - metal) <= 0.001) if (teff <= max(availteff) and teff >= min(availteff) and logg <= max(availlogg) and logg >= min(availlogg) and metal >= min(availmetal) and metal <= max(availmetal)): # Model found, just read it if (nv1 > 0 and nv2 > 0 and nv3 > 0): # Direct extraction of the model teff = availteff[v1[0]] logg = availlogg[v2[0]] metal = availmetal[v3[0]] model, header, tail = read_kurucz(teff, logg, metal, mtype=mtype) ntau = len(model[0, :]) # Need to interpolate else: model, header, tail = model_interp(teff, logg, metal, mtype=mtype) else: print( '% KMOD: The requested values of ([Fe/H],logg,Teff) fall outside') print('% KMOD: the boundaries of the grid.') print( '% KMOD: Temperatures higher that 10000 K can be reached, by modifying rd_kmod.' ) import pdb pdb.set_trace() return None, None, None # Writing the outputfile if outfile is not None: if os.path.exists(outfile): os.remove(outfile) with open(outfile, 'w') as fil: for i in range(len(header)): fil.write(header[i] + '\n') if type == 'old': for i in range(ntau): fil.write('%15.8E %8.1f %9.3E %9.3E %9.3E %9.3E %9.3E\n' % tuple(model[:, i])) else: for i in range(ntau): fil.write( '%15.8E %8.1f %9.3E %9.3E %9.3E %9.3E %9.3E %9.3E %9.3E %9.3E\n' % tuple(model[:, i])) for i in range(len(tail)): if i != len(tail) - 1: fil.write(tail[i] + '\n') else: fil.write(tail[i]) return model, header, tail
def selectvariables(obj): """ Select variables using photometric variability indices.""" nobj = len(obj) fidmag = np.zeros(nobj, float) + np.nan # fiducial magnitude # Loop over the objects for i in range(nobj): # Fiducial magnitude, used to select variables below # order of priority: r,g,i,z,Y,VR,u if obj['nphot'][i] > 0: magarr = np.zeros(7, float) for ii, nn in enumerate( ['rmag', 'gmag', 'imag', 'zmag', 'ymag', 'vrmag', 'umag']): magarr[ii] = obj[nn][i] gfid, ngfid = dln.where(magarr < 50) if ngfid > 0: fidmag[i] = magarr[gfid[0]] # Select Variables # 1) Construct fiducial magnitude (done in loop above) # 2) Construct median VAR and sigma VAR versus magnitude # 3) Find objects that Nsigma above the median VAR line si = np.argsort(fidmag) # NaNs are at end varcol = 'madvar' gdvar, ngdvar, bdvar, nbdvar = dln.where(np.isfinite(obj[varcol]) & np.isfinite(fidmag), comp=True) if ngdvar > 0: nbins = np.ceil((np.max(fidmag[gdvar]) - np.min(fidmag[gdvar])) / 0.25) nbins = int(np.max([2, nbins])) fidmagmed, bin_edges1, binnumber1 = bindata.binned_statistic( fidmag[gdvar], fidmag[gdvar], statistic='nanmedian', bins=nbins) numhist, _, _ = bindata.binned_statistic(fidmag[gdvar], fidmag[gdvar], statistic='count', bins=nbins) # Fix NaNs in fidmagmed bdfidmagmed, nbdfidmagmed = dln.where(np.isfinite(fidmagmed) == False) if nbdfidmagmed > 0: fidmagmed_bins = 0.5 * (bin_edges1[0:-1] + bin_edges1[1:]) fidmagmed[bdfidmagmed] = fidmagmed_bins[bdfidmagmed] # Median metric varmed, bin_edges2, binnumber2 = bindata.binned_statistic( fidmag[gdvar], obj[varcol][gdvar], statistic='nanmedian', bins=nbins) # Smooth, it handles NaNs well smlen = 5 smvarmed = dln.gsmooth(varmed, smlen) bdsmvarmed, nbdsmvarmed = dln.where(np.isfinite(smvarmed) == False) if nbdsmvarmed > 0: smvarmed[bdsmvarmed] = np.nanmedian(smvarmed) # Interpolate to all the objects gv, ngv, bv, nbv = dln.where(np.isfinite(smvarmed), comp=True) fvarmed = interp1d(fidmagmed[gv], smvarmed[gv], kind='linear', bounds_error=False, fill_value=(smvarmed[0], smvarmed[-1]), assume_sorted=True) objvarmed = np.zeros(nobj, float) objvarmed[gdvar] = fvarmed(fidmag[gdvar]) objvarmed[gdvar] = np.maximum(np.min(smvarmed[gv]), objvarmed[gdvar]) # lower limit if nbdvar > 0: objvarmed[bdvar] = smvarmed[ gv[-1]] # objects with bad fidmag, set to last value # Scatter in metric around median # calculate MAD ourselves so that it's around our computed median metric line varsig, bin_edges3, binnumber3 = bindata.binned_statistic( fidmag[gdvar], np.abs(obj[varcol][gdvar] - objvarmed[gdvar]), statistic='nanmedian', bins=nbins) varsig *= 1.4826 # scale MAD to stddev # Fix values for bins with few points bdhist, nbdhist, gdhist, ngdhist = dln.where(numhist < 3, comp=True) if nbdhist > 0: if ngdhist > 0: varsig[bdhist] = np.nanmedian(varsig[gdhist]) else: varsig[:] = 0.02 # Smooth smvarsig = dln.gsmooth(varsig, smlen) # Interpolate to all the objects gv, ngv, bv, nbv = dln.where(np.isfinite(smvarsig), comp=True) fvarsig = interp1d(fidmagmed[gv], smvarsig[gv], kind='linear', bounds_error=False, fill_value=(smvarsig[gv[0]], smvarsig[gv[-1]]), assume_sorted=True) objvarsig = np.zeros(nobj, float) objvarsig[gdvar] = fvarsig(fidmag[gdvar]) objvarsig[gdvar] = np.maximum(np.min(smvarsig[gv]), objvarsig[gdvar]) # lower limit if nbdvar > 0: objvarsig[bdvar] = smvarsig[ gv[-1]] # objects with bad fidmag, set to last value # Detect positive outliers nsigvarthresh = 10.0 nsigvar = (obj[varcol] - objvarmed) / objvarsig obj['nsigvar'][gdvar] = nsigvar[gdvar] isvar, nisvar = dln.where(nsigvar[gdvar] > nsigvarthresh) print(str(nisvar) + ' variables detected') if nisvar > 0: obj['variable10sig'][gdvar[isvar]] = 1 return obj
def varmetric(inpmeas): """ Compute photometric variability metrics.""" # meas is a catalog of measurements for a single object nmeas = len(inpmeas) # Need the catalog to be a numpy array if isinstance(inpmeas, np.ndarray): meas = inpmeas else: meas = np.array(inpmeas) filtcol = 'FILTER' if filtcol not in meas.dtype.names: filtcol = 'filter' if filtcol not in meas.dtype.names: raise ValueError('No filter column') magcol = 'MAG_AUTO' if magcol not in meas.dtype.names: magcol = 'mag_auto' if magcol not in meas.dtype.names: raise ValueError('No mag_auto column') errcol = 'MAGERR_AUTO' if errcol not in meas.dtype.names: errcol = 'magerr_auto' if errcol not in meas.dtype.names: raise ValueError('No magerr_auto column') mjdcol = 'MJD' if mjdcol not in meas.dtype.names: mjdcol = 'mjd' if mjdcol not in meas.dtype.names: raise ValueError('No mjd column') # OBJ schema dtype_obj = np.dtype([('deltamjd', np.float32), ('ndet', np.int16), ('nphot', np.int16), ('ndetu', np.int16), ('nphotu', np.int16), ('umag', np.float32), ('urms', np.float32), ('uerr', np.float32), ('ndetg', np.int16), ('nphotg', np.int16), ('gmag', np.float32), ('grms', np.float32), ('gerr', np.float32), ('ndetr', np.int16), ('nphotr', np.int16), ('rmag', np.float32), ('rrms', np.float32), ('rerr', np.float32), ('ndeti', np.int16), ('nphoti', np.int16), ('imag', np.float32), ('irms', np.float32), ('ierr', np.float32), ('ndetz', np.int16), ('nphotz', np.int16), ('zmag', np.float32), ('zrms', np.float32), ('zerr', np.float32), ('ndety', np.int16), ('nphoty', np.int16), ('ymag', np.float32), ('yrms', np.float32), ('yerr', np.float32), ('ndetvr', np.int16), ('nphotvr', np.int16), ('vrmag', np.float32), ('vrrms', np.float32), ('vrerr', np.float32), ('rmsvar', np.float32), ('madvar', np.float32), ('iqrvar', np.float32), ('etavar', np.float32), ('jvar', np.float32), ('kvar', np.float32), ('chivar', np.float32), ('romsvar', np.float32), ('variable10sig', np.int16), ('nsigvar', np.float32)]) obj = np.zeros(1, dtype=dtype_obj) # Initialize the OBJ structured array obj = np.zeros(1, dtype=dtype_obj) # all bad to start for f in [ 'rmsvar', 'madvar', 'iqrvar', 'etavar', 'jvar', 'kvar', 'chivar', 'romsvar' ]: obj[f] = np.nan for f in ['u', 'g', 'r', 'i', 'z', 'y', 'vr']: obj[f + 'mag'] = 99.99 obj[f + 'err'] = 9.99 obj[f + 'rms'] = np.nan obj['ndet'] = nmeas obj['deltamjd'] = np.max(meas[mjdcol]) - np.min(meas[mjdcol]) # Mean magnitudes # Convert totalwt and totalfluxwt to MAG and ERR # and average the morphology parameters PER FILTER filtindex = dln.create_index(meas[filtcol].astype(np.str)) nfilters = len(filtindex['value']) resid = np.zeros(nmeas) + np.nan # residual mag relresid = np.zeros( nmeas) + np.nan # residual mag relative to the uncertainty for f in range(nfilters): filt = filtindex['value'][f].lower() findx = filtindex['index'][filtindex['lo'][f]:filtindex['hi'][f] + 1] obj['ndet' + filt] = filtindex['num'][f] gph, ngph = dln.where(meas[magcol][findx] < 50) obj['nphot' + filt] = ngph if ngph == 1: obj[filt + 'mag'] = meas[magcol][findx[gph]] obj[filt + 'err'] = meas[errcol][findx[gph]] if ngph > 1: newmag, newerr = dln.wtmean(meas[magcol][findx[gph]], meas[errcol][findx[gph]], magnitude=True, reweight=True, error=True) obj[filt + 'mag'] = newmag obj[filt + 'err'] = newerr # Calculate RMS obj[filt + 'rms'] = np.sqrt( np.mean((meas[magcol][findx[gph]] - newmag)**2)) # Residual mag resid[findx[gph]] = meas[magcol][findx[gph]] - newmag # Residual mag relative to the uncertainty # set a lower threshold of 0.02 in the uncertainty relresid[findx[gph]] = np.sqrt( ngph / (ngph - 1)) * (meas[magcol][findx[gph]] - newmag) / np.maximum( meas[errcol][findx[gph]], 0.02) # Calculate variability indices gdresid = np.isfinite(resid) ngdresid = np.sum(gdresid) if ngdresid > 0: resid2 = resid[gdresid] sumresidsq = np.sum(resid2**2) tsi = np.argsort(meas[mjdcol][gdresid]) resid2tsi = resid2[tsi] quartiles = np.percentile(resid2, [25, 50, 75]) # RMS rms = np.sqrt(sumresidsq / ngdresid) # MAD madvar = 1.4826 * np.median(np.abs(resid2 - quartiles[1])) # IQR iqrvar = 0.741289 * (quartiles[2] - quartiles[0]) # 1/eta etavar = sumresidsq / np.sum((resid2tsi[1:] - resid2tsi[0:-1])**2) obj['rmsvar'] = rms obj['madvar'] = madvar obj['iqrvar'] = iqrvar obj['etavar'] = etavar # Calculate variability indices wrt to uncertainties gdrelresid = np.isfinite(relresid) ngdrelresid = np.sum(gdrelresid) if ngdrelresid > 0: relresid2 = relresid[gdrelresid] pk = relresid2**2 - 1 jvar = np.sum(np.sign(pk) * np.sqrt(np.abs(pk))) / ngdrelresid #avgrelvar = np.mean(np.abs(relresid2)) # average of absolute relative residuals chivar = np.sqrt(np.sum(relresid2**2)) / ngdrelresid kdenom = np.sqrt(np.sum(relresid2**2) / ngdrelresid) if kdenom != 0: kvar = (np.sum(np.abs(relresid2)) / ngdrelresid) / kdenom else: kvar = np.nan # RoMS romsvar = np.sum(np.abs(relresid2)) / (ngdrelresid - 1) obj['jvar'] = jvar obj['kvar'] = kvar #obj['avgrelvar'] = avgrelvar obj['chivar'] = chivar obj['romsvar'] = romsvar #if chivar>50: import pdb; pdb.set_trace() # Make NPHOT from NPHOTX obj['nphot'] = obj['nphotu'] + obj['nphotg'] + obj['nphotr'] + obj[ 'nphoti'] + obj['nphotz'] + obj['nphoty'] + obj['nphotvr'] # Fiducial magnitude, used to select variables below # order of priority: r,g,i,z,Y,VR,u if obj['nphot'] > 0: magarr = np.zeros(7, float) for ii, nn in enumerate( ['rmag', 'gmag', 'imag', 'zmag', 'ymag', 'vrmag', 'umag']): magarr[ii] = obj[nn] gfid, ngfid = dln.where(magarr < 50) if ngfid > 0: fidmag = magarr[gfid[0]] return obj
alldirs = np.zeros(nexpdir,(np.str,200)) alldirs[:] = tmpdir nallcmd = len(allcmd) # Check what's been done already check = False #if not redo: if check: rootLogger.info("Checking if any have already been done") exists = np.zeros(dln.size(allexpdir),bool)+False for ip,p in enumerate(allexpdir): base = os.path.basename(allexpdir[ip]) outfile = allexpdir[ip]+'/'+base+'.updated' if os.path.exists(outfile): exists[ip]=True if ip % 1000 == 0: print(str(ip)+' '+str(p)) bd,nbd,gd,ngd = dln.where(exists,comp=True) if ngd==0: rootLogger.info('All exposures were previously updated. Nothing to run') sys.exit() if nbd>0: rootLogger.info(str(nbd)+' exposures previously updated. Removing them from the list. '+str(ngd)+' left.') allexpdir = allexpdir[gd] allcmd = allcmd[gd] alldirs = alldirs[gd] nallcmd = len(allcmd) rootLogger.info(str(nallcmd)+' exposures to process') # RANDOMIZE np.random.seed(0) rnd = np.argsort(np.random.rand(nallcmd))
def continuum(spec, norder=6, perclevel=90.0, binsize=0.1, interp=True): """ Measure the continuum of a spectrum. Parameters ---------- spec : Spec1D object A spectrum object. This at least needs to have a FLUX and WAVE attribute. norder : float, optional Polynomial order to use for the continuum fitting. The default is 6. perclevel : float, optional Percent level to use for the continuum value in large bins. Default is 90. binsize : float, optional Fraction of the wavelength range (scaled to -1 to +1) to bin. Default is 0.1. interp : bool, optional Use interpolation of the binned values instead of a polynomial fit. Default is True. Returns ------- cont : numpy array The continuum array, in the same shape as the input flux. Examples -------- .. code-block:: python cont = continuum(spec) """ wave = spec.wave.copy().reshape(spec.npix, spec.norder) # make 2D flux = spec.flux.copy().reshape(spec.npix, spec.norder) # make 2D err = spec.err.copy().reshape(spec.npix, spec.norder) # make 2D mask = spec.mask.copy().reshape(spec.npix, spec.norder) # make 2D cont = err.copy() * 0.0 + 1 for o in range(spec.norder): w = wave[:, o].copy() wr = [np.min(w), np.max(w)] x = (w - np.mean(wr)) / dln.valrange(wr) * 2 # -1 to +1 y = flux[:, o].copy() m = mask[:, o].copy() # Divide by median medy = np.nanmedian(y) if medy <= 0.0: medy = 1.0 y /= medy # Perform sigma clipping out large positive outliers coef = dln.poly_fit(x[~m], y[~m], 2, robust=True) sig = dln.mad(y - dln.poly(x, coef)) bd, nbd = dln.where((y - dln.poly(x, coef)) > 5 * sig) if nbd > 0: m[bd] = True gdmask = (y > 0) & (m == False) # need positive fluxes and no mask set if np.sum(gdmask) == 0: continue # Bin the data points xr = [np.nanmin(x), np.nanmax(x)] bins = int(np.ceil((xr[1] - xr[0]) / binsize)) ybin, bin_edges, binnumber = bindata.binned_statistic( x[gdmask], y[gdmask], statistic='percentile', percentile=perclevel, bins=bins, range=None) xbin = bin_edges[0:-1] + 0.5 * binsize # Interpolate to full grid if interp is True: fnt = np.isfinite(ybin) cont1 = dln.interp(xbin[fnt], ybin[fnt], x, kind='quadratic', extrapolate=True, exporder=1) else: coef = dln.poly_fit(xbin, ybin, norder) cont1 = dln.poly(x, coef) cont1 *= medy cont[:, o] = cont1 # Flatten to 1D if norder=1 if spec.norder == 1: cont = cont.flatten() return cont
def apvisit(filename,badval=20735): """ Read a SDSS APOGEE apVisit spectrum. Parameters ---------- filename : string The name of the spectrum file to load. Returns ------- spec : Spec1D object The spectrum as a Spec1D object. Examples -------- spec = apvisit('spec.fits') """ base, ext = os.path.splitext(os.path.basename(filename)) # APOGEE apVisit, visit-level spectrum if (base.find("apVisit") > -1) | (base.find("asVisit") > -1): # HISTORY AP1DVISIT: HDU0 = Header only # HISTORY AP1DVISIT: HDU1 - Flux (10^-17 ergs/s/cm^2/Ang) # HISTORY AP1DVISIT: HDU2 - Error (10^-17 ergs/s/cm^2/Ang) # HISTORY AP1DVISIT: HDU3 - Flag mask (bitwise OR combined) # HISTORY AP1DVISIT: HDU4 - Wavelength (Ang) # HISTORY AP1DVISIT: HDU5 - Sky (10^-17 ergs/s/cm^2/Ang) # HISTORY AP1DVISIT: HDU6 - Sky Error (10^-17 ergs/s/cm^2/Ang) # HISTORY AP1DVISIT: HDU7 - Telluric # HISTORY AP1DVISIT: HDU8 - Telluric Error # HISTORY AP1DVISIT: HDU9 - Wavelength coefficients # HISTORY AP1DVISIT: HDU10 - LSF coefficients # HISTORY AP1DVISIT: HDU11 - RV catalog # Get number of extensions hdulist = fits.open(filename) nhdu = len(hdulist) hdulist.close() # flux, err, sky, skyerr are in units of 1e-17 flux = fits.getdata(filename,1).T * 1e-17 # [Npix,Norder] wave = fits.getdata(filename,4).T lsfcoef = fits.getdata(filename,10).T spec = Spec1D(flux,wave=wave,lsfpars=lsfcoef,lsftype='Gauss-Hermite',lsfxtype='Pixels') spec.filename = filename spec.sptype = "Visit" spec.waveregime = "NIR" spec.instrument = "APOGEE" spec.head = fits.getheader(filename,0) spec.err = fits.getdata(filename,2).T * 1e-17 # [Npix,Norder] #bad = (spec.err<=0) # fix bad error values #if np.sum(bad) > 0: # spec.err[bad] = 1e30 spec.bitmask = fits.getdata(filename,3).T spec.sky = fits.getdata(filename,5).T * 1e-17 spec.skyerr = fits.getdata(filename,6).T * 1e-17 spec.telluric = fits.getdata(filename,7).T spec.telerr = fits.getdata(filename,8).T spec.wcoef = fits.getdata(filename,9).T # Create the bad pixel mask # "bad" pixels: # flag = ['BADPIX','CRPIX','SATPIX','UNFIXABLE','BADDARK','BADFLAT','BADERR','NOSKY', # 'LITTROW_GHOST','PERSIST_HIGH','PERSIST_MED','PERSIST_LOW','SIG_SKYLINE','SIG_TELLURIC','NOT_ENOUGH_PSF',''] # badflag = [1,1,1,1,1,1,1,1, # 0,0,0,0,0,0,1,0] #mask = (np.bitwise_and(spec.bitmask,16639)!=0) | (np.isfinite(spec.flux)==False) mask = (np.bitwise_and(spec.bitmask,badval)!=0) | (np.isfinite(spec.flux)==False) # Extra masking for bright skylines # Commented out in favor of using SIG_SKYLINE in bitmask # This can also mask too many pixels #x = np.arange(spec.npix) #nsky = 4 ##plt.clf() #for i in range(spec.norder): # sky = spec.sky[:,i] # medsky = median_filter(sky,201,mode='reflect') # medcoef = dln.poly_fit(x,medsky/np.nanmedian(medsky),2) # medsky2 = dln.poly(x,medcoef)*np.nanmedian(medsky) # skymask1 = (sky>nsky*medsky2) # pixels Nsig above median sky # #mask[:,i] = np.logical_or(mask[:,i],skymask1) # OR combine # #plt.plot(spec.wave[:,i],sky) # #plt.plot(spec.wave[:,i],nsky*medsky2) # #plt.plot(spec.wave[:,i],spec.flux[:,i]) ##plt.draw() spec.mask = mask # Fix NaN pixels for i in range(spec.norder): bd,nbd = dln.where( (np.isfinite(spec.flux[:,i])==False) | (spec.err[:,i] <= 0) ) if nbd>0: spec.flux[bd,i] = 0.0 spec.err[bd,i] = 1e30 spec.mask[bd,i] = True if (nhdu>=11): spec.meta = fits.getdata(filename,11) # catalog of RV and other meta-data # Spectrum, error, sky, skyerr are in units of 1e-17 spec.snr = spec.head["SNR"] if base.find("apVisit") > -1: spec.observatory = 'apo' else: spec.observatory = 'lco' spec.wavevac = True return spec
def maskoutliers(spec, nsig=5, verbose=False): """ Mask large positive outliers and negative flux pixels in the spectrum. Parameters ---------- spec : Spec1D object Observed spectrum to mask outliers on. nsig : int, optional Number of standard deviations to use for the outlier rejection. Default is 5.0. verbose : boolean, optional Verbose output. Default is False. Returns ------- spec2 : Spec1D object Spectrum with outliers masked. Example ------- .. code-block:: python spec = maskoutliers(spec,nsig=5) """ print = getprintfunc( ) # Get print function to be used locally, allows for easy logging spec2 = spec.copy() wave = spec2.wave.copy().reshape(spec2.npix, spec2.norder) # make 2D flux = spec2.flux.copy().reshape(spec2.npix, spec2.norder) # make 2D err = spec2.err.copy().reshape(spec2.npix, spec2.norder) # make 2D mask = spec2.mask.copy().reshape(spec2.npix, spec2.norder) # make 2D totnbd = 0 for o in range(spec2.norder): w = wave[:, o].copy() x = (w - np.median(w)) / (np.max(w * 0.5) - np.min(w * 0.5) ) # -1 to +1 y = flux[:, o].copy() m = mask[:, o].copy() # Divide by median medy = np.nanmedian(y) if medy <= 0.0: medy = 1.0 y /= medy # Perform sigma clipping out large positive outliers coef = dln.poly_fit(x, y, 2, robust=True) sig = dln.mad(y - dln.poly(x, coef)) bd, nbd = dln.where(((y - dln.poly(x, coef)) > nsig * sig) | (y < 0)) totnbd += nbd if nbd > 0: flux[bd, o] = dln.poly(x[bd], coef) * medy err[bd, o] = 1e30 mask[bd, o] = True # Flatten to 1D if norder=1 if spec2.norder == 1: flux = flux.flatten() err = err.flatten() mask = mask.flatten() # Stuff back in spec2.flux = flux spec2.err = err spec2.mask = mask if verbose is True: print('Masked ' + str(totnbd) + ' outlier or negative pixels') return spec2
def readcal(calfile): """ This reads all of the information from a master calibration index and returns it in a dictionary where each calibration type has a structured arrays that can be accessed by the calibration name (e.g. 'dark'). """ if os.path.exists(calfile) == False: raise ValueError(calfile+' NOT FOUND') lines = dln.readlines(calfile) lines = np.char.array(lines) # Get rid of comment and blank lines gd,ngd = dln.where(( lines.find('#') != 0) & (lines=='')==False ) if ngd==0: raise ValueError('No good calibration lines') lines = lines[gd] # Initialize calibration dictionary caldict = OrderedDict() dtdict = OrderedDict() # -- Darks -- # mjd1, mjd2, name, frames # dark 55600 56860 12910009 12910009-12910037 # dark 56861 99999 15640003 15640003-15640021 dtdict['dark'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('frames',np.str,100)]) # -- Flats -- # mjd1, mjd2, name, frames, nrep, dithered # flat 99999 55761 01380106 1380106-1380134 1 1 # flat 99999 99999 02410013 2410013-2410022 1 0 dtdict['flat'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('frames',np.str,100), ('nrep',int),('dithered',int)]) # -- Sparse -- # mjd1, mjd2, name, frames, darkframes, dmax, maxread # sparse 55600 55761 01590015 1590015-1590024 0 21 30,30,20 # sparse 55797 99999 02410059 2410059-2410068 2410058,2410069 21 30,30,20 dtdict['sparse'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('frames',np.str,100), ('darkframes',np.str,100),('dmax',int),('maxread',np.str,100)]) # -- Fiber -- # mjd1, mjd2, name # fiber 55600 55761 01970078 # fiber 55797 56860 02410024 dtdict['fiber'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50)]) # -- Badfiber -- # mjd1, mjd2, frames # badfiber 55600 57008 0 # badfiber 57009 57177 195 dtdict['badfiber'] = np.dtype([('mjd1',int),('mjd2',int),('frames',np.str,100)]) # -- Fixfiber -- # mjd1, mjd2, name # fixfiber 56764 56773 1 # fixfiber 58038 58046 2 dtdict['fixfiber'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50)]) # -- Wave -- # mjd1, mjd2, name, frames, psfid # wave 55699 55699 01370096 1370096,1370099 1370098 # wave 55700 55700 01380079 1380079 1380081 dtdict['wave'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('frames',np.str,100), ('psfid',int)]) # -- Multiwave -- # mjd1, mjd2, name, frames # multiwave 55800 56130 2380000 02390007,02390008,02500007 # multiwave 56130 56512 5680000 05870007,05870008,05870018,05870019 dtdict['multiwave'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('frames',np.str,500)]) # -- LSF -- # mjd1, mjd2, name, frames, psfid # lsf 55800 56130 03430016 03430016 03430020 # lsf 56130 56512 07510018 07510018 07510022 dtdict['lsf'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('frames',np.str,100), ('psfid',int)]) # -- Det -- # mjd1, mjd2, name, linid # det 99999 99999 55640 0 # det 55600 56860 11870003 11870003 dtdict['det'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('linid',int)]) # -- BPM -- # mjd1, mjd2, name, darkid, flatid # bpm 99999 99999 05560001 5560001 4750009 # bpm 55600 56860 12910009 12910009 4750009 dtdict['bpm'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('darkid',int), ('flatid',int)]) # -- Littrow -- # mjd1, mjd2, name, psfid # littrow 55600 56860 06670109 6670109 # littrow 56861 99999 13400052 13400052 dtdict['littrow'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('psfid',int)]) # -- Persist -- # mjd1, mjd2, name, darkid, flatid, thresh # persist 55600 56860 04680019 4680019 4680018 0.03 # persist 56861 99999 13400061 13400061 13400060 0.03 dtdict['persist'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('darkid',int), ('flatid',int),('thresh',float)]) # -- Persistmodel -- # mjd1, mjd2, name # persistmodel 55600 56860 57184 # persistmodel 56861 99999 0 dtdict['persistmodel'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50)]) # -- Response -- # mjd1, mjd2, name, fluxid, psfid, temp # response 55600 99999 0 0 0 0 dtdict['response'] = np.dtype([('mjd1',int),('mjd2',int),('name',np.str,50),('fluxid',int), ('psfid',int),('temp',float)]) # Readnoise # frame1, frame2 # rn 1380094 1380095 # rn 1380102 1380103 #dtdict['rn'] = np.dtype([('frame1',int),('frame2',int)]) # Gain # frame1, frame2 #dtdict['gain'] = np.dtype([('frame1',int),('frame2',int)]) # READNOISE and GAIN lines are NOT used # Load the data for caltype in dtdict.keys(): cat = loadcaltype(lines,caltype,dtdict[caltype]) caldict[caltype.strip()] = cat return caldict
def measurement_update(expdir): t0 = time.time() hostname = socket.gethostname() host = hostname.split('.')[0] # Get version number from exposure directory lo = expdir.find('nsc/instcal/') dum = expdir[lo + 12:] version = dum[0:dum.find('/')] cmbdir = '/net/dl2/dnidever/nsc/instcal/' + version + '/' edir = '/net/dl1/users/dnidever/nsc/instcal/' + version + '/' nside = 128 # Check if output file already exists base = os.path.basename(expdir) # Log file #------------------ # format is nsc_combine_main.DATETIME.log ltime = time.localtime() # time.struct_time(tm_year=2019, tm_mon=7, tm_mday=22, tm_hour=0, tm_min=30, tm_sec=20, tm_wday=0, tm_yday=203, tm_isdst=1) smonth = str(ltime[1]) if ltime[1] < 10: smonth = '0' + smonth sday = str(ltime[2]) if ltime[2] < 10: sday = '0' + sday syear = str(ltime[0])[2:] shour = str(ltime[3]) if ltime[3] < 10: shour = '0' + shour sminute = str(ltime[4]) if ltime[4] < 10: sminute = '0' + sminute ssecond = str(int(ltime[5])) if ltime[5] < 10: ssecond = '0' + ssecond logtime = smonth + sday + syear + shour + sminute + ssecond logfile = expdir + '/' + base + '_measure_update.' + logtime + '.log' if os.path.exists(logfile): os.remove(logfile) # Set up logging to screen and logfile logFormatter = logging.Formatter( "%(asctime)s [%(levelname)-5.5s] %(message)s") rootLogger = logging.getLogger() fileHandler = logging.FileHandler(logfile) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) rootLogger.setLevel(logging.NOTSET) rootLogger.info( 'Adding objectID for measurement catalogs for exposure = ' + base) rootLogger.info("expdir = " + expdir) rootLogger.info("host = " + host) rootLogger.info(" ") # Load the exposure and metadata files metafile = expdir + '/' + base + '_meta.fits' meta = Table.read(metafile, 1) nmeta = len(meta) chstr = Table.read(metafile, 2) rootLogger.info('KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') cols = ['EXPDIR', 'FILENAME', 'MEASFILE'] for c in cols: f = np.char.array(chstr[c]).decode() f = np.char.array(f).replace('/dl1/users/dnidever/', '/dl2/dnidever/') chstr[c] = f nchips = len(chstr) measdtype = np.dtype([('MEASID', 'S50'), ('OBJECTID', 'S50'), ('EXPOSURE', 'S50'), ('CCDNUM', '>i2'), ('FILTER', 'S2'), ('MJD', '>f8'), ('X', '>f4'), ('Y', '>f4'), ('RA', '>f8'), ('RAERR', '>f4'), ('DEC', '>f8'), ('DECERR', '>f4'), ('MAG_AUTO', '>f4'), ('MAGERR_AUTO', '>f4'), ('MAG_APER1', '>f4'), ('MAGERR_APER1', '>f4'), ('MAG_APER2', '>f4'), ('MAGERR_APER2', '>f4'), ('MAG_APER4', '>f4'), ('MAGERR_APER4', '>f4'), ('MAG_APER8', '>f4'), ('MAGERR_APER8', '>f4'), ('KRON_RADIUS', '>f4'), ('ASEMI', '>f4'), ('ASEMIERR', '>f4'), ('BSEMI', '>f4'), ('BSEMIERR', '>f4'), ('THETA', '>f4'), ('THETAERR', '>f4'), ('FWHM', '>f4'), ('FLAGS', '>i2'), ('CLASS_STAR', '>f4')]) # Load and concatenate the meas catalogs chstr['MEAS_INDEX'] = 0 # keep track of where each chip catalog starts count = 0 meas = Table(data=np.zeros(int(np.sum(chstr['NMEAS'])), dtype=measdtype)) rootLogger.info('Loading and concatenating the chip measurement catalogs') for i in range(nchips): meas1 = Table.read(chstr['MEASFILE'][i].strip(), 1) # load chip meas catalog nmeas1 = len(meas1) meas[count:count + nmeas1] = meas1 chstr['MEAS_INDEX'][i] = count count += nmeas1 measid = np.char.array(meas['MEASID']).strip().decode() nmeas = len(meas) rootLogger.info(str(nmeas) + ' measurements') # Get the OBJECTID from the combined healpix file IDSTR structure # remove any sources that weren't used # Figure out which healpix this figure overlaps pix = hp.ang2pix(nside, meas['RA'], meas['DEC'], lonlat=True) upix = np.unique(pix) npix = len(upix) rootLogger.info(str(npix) + ' HEALPix to query') # Loop over the HEALPix pixels ntotmatch = 0 idstr_dtype = np.dtype([('measid', np.str, 200), ('objectid', np.str, 200), ('pix', int)]) idstr = np.zeros(nmeas, dtype=idstr_dtype) cnt = 0 for i in range(npix): fitsfile = cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str( upix[i]) + '.fits.gz' dbfile = cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str( upix[i]) + '_idstr.db' if os.path.exists(dbfile): # Read meas id information from idstr database for this expoure idstr1 = readidstrdb(dbfile, where="exposure=='" + base + "'") nidstr1 = len(idstr1) if nidstr1 > 0: idstr['measid'][cnt:cnt + nidstr1] = idstr1['measid'] idstr['objectid'][cnt:cnt + nidstr1] = idstr1['objectid'] idstr['pix'][cnt:cnt + nidstr1] = upix[i] cnt += nidstr1 rootLogger.info( str(i + 1) + ' ' + str(upix[i]) + ' ' + str(nidstr1)) else: rootLogger.info( str(i + 1) + ' ' + dbfile + ' NOT FOUND. Checking for high-resolution database files.') # Check if there are high-resolution healpix idstr databases hidbfiles = glob(cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str(upix[i]) + '_n*_*_idstr.db') nhidbfiles = len(hidbfiles) if os.path.exists(fitsfile) & (nhidbfiles > 0): rootLogger.info('Found high-resolution HEALPix IDSTR files') for j in range(nhidbfiles): dbfile1 = hidbfiles[j] dbbase1 = os.path.basename(dbfile1) idstr1 = readidstrdb(dbfile1, where="exposure=='" + base + "'") nidstr1 = len(idstr1) if nidstr1 > 0: idstr['measid'][cnt:cnt + nidstr1] = idstr1['measid'] idstr['objectid'][cnt:cnt + nidstr1] = idstr1['objectid'] idstr['pix'][cnt:cnt + nidstr1] = upix[i] cnt += nidstr1 rootLogger.info(' ' + str(j + 1) + ' ' + dbbase1 + ' ' + str(upix[i]) + ' ' + str(nidstr1)) # Trim any leftover elements of IDSTR if cnt < nmeas: idstr = idstr[0:cnt] # Now match them all up rootLogger.info('Matching the measurements') idstr_measid = np.char.array(idstr['measid']).strip() idstr_objectid = np.char.array(idstr['objectid']).strip() ind1, ind2 = dln.match(idstr_measid, measid) nmatch = len(ind1) if nmatch > 0: meas['OBJECTID'][ind2] = idstr_objectid[ind1] # Only keep sources with an objectid ind, nind = dln.where( np.char.array(meas['OBJECTID']).strip().decode() == '') # There can be missing/orphaned measurements at healpix boundaries in crowded # regions when the DBSCAN eps is different. But there should be very few of these. # At this point, let's allow this to pass if nind > 0: rootLogger.info('WARNING: ' + str(nind) + ' measurements are missing OBJECTIDs') if ((nmeas >= 20000) & (nind > 20)) | ((nmeas < 20000) & (nind > 3)): rootLogger.info('More missing OBJECTIDs than currently allowed.') raise ValueError('More missing OBJECTIDs than currently allowed.') # Output the updated catalogs #rootLogger.info('Updating measurement catalogs') #for i in range(nchips): # measfile1 = chstr['MEASFILE'][i].strip() # lo = chstr['MEAS_INDEX'][i] # hi = lo+chstr['NMEAS'][i] # meas1 = meas[lo:hi] # meta1 = Table.read(measfile1,2) # load the meta extensions # # 'KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') # cols = ['EXPDIR','FILENAME','MEASFILE'] # for c in cols: # f = np.char.array(meta1[c]).decode() # f = np.char.array(f).replace('/dl1/users/dnidever/','/dl2/dnidever/') # meta1[c] = f # # Copy as a backup # if os.path.exists(measfile1+'.bak'): os.remove(measfile1+'.bak') # dum = shutil.move(measfile1,measfile1+'.bak') # # Write new catalog # #meas1.write(measfile1,overwrite=True) # first, measurement table # # append other fits binary tabl # #hdulist = fits.open(measfile1) # rootLogger.info('Writing '+measfile1) # hdulist = fits.HDUList() # hdulist.append(fits.table_to_hdu(meas1)) # first, meas catalog # hdulist.append(fits.table_to_hdu(meta1)) # second, meta # hdulist.writeto(measfile1,overwrite=True) # hdulist.close() # # Create a file saying that the file was successfully updated. # dln.writelines(measfile1+'.updated','') # # Delete backups # if os.path.exists(measfile1+'.bak'): os.remove(measfile1+'.bak') # Output the updated measurement catalog # Writing a single FITS file is much faster than many small ones measfile = expdir + '/' + base + '_meas.fits' meas.write(measfile, overwrite=True) if os.path.exists(measfile + '.gz'): os.remove(measfile + '.gz') ret = subprocess.call(['gzip', measfile]) # compress final catalog # Update the meta file as well, need to the /dl2 filenames rootLogger.info('Updating meta file') meta.write(metafile, overwrite=True) hdulist = fits.open(metafile) hdu = fits.table_to_hdu(chstr) hdulist.append(hdu) hdulist.writeto(metafile, overwrite=True) hdulist.close() # Create a file saying that the files were updated okay. dln.writelines(expdir + '/' + base + '_meas.updated', '') rootLogger.info('dt = ' + str(time.time() - t0) + ' sec.')
#if file_test(file_dirname(outfile),/directory) eq 0 then file_mkdir,file_dirname(outfile) ; make directory #if testlock eq 0 then touchzero,outfile+'.lock' ; this is fast expstr['cmd'][ i] = '/home/dnidever/projects/noaosourcecatalog/python/nsc_instcal_measure.py ' + fluxfile + ' ' + wtfile + ' ' + maskfile + ' ' + version expstr['cmddir'][i] = tmpdir expstr['torun'][i] = True # Lock file exists else: expstr['locked'][i] = True expstr['torun'][i] = False if silent is False: rootLogger.info('Lock file exists ' + outfile + '.lock') # Parcel out the jobs nhosts = dln.size(hosts) torun, nalltorun = dln.where(expstr['torun'] == True) nperhost = int(np.ceil(nalltorun / nhosts)) for i in range(nhosts): if host == hosts[i]: torun = torun[i * nperhost:(i + 1) * nperhost] ntorun = len(torun) if ntorun == 0: rootLogger.info('No exposures to process.') sys.exit() # Pick the jobs to run # MAXJOBS if ntorun > maxjobs: rootLogger.info('More jobs than MAXJOBS. Cutting down to ' + str(maxjobs) + ' jobs') expstr['submitted'][torun[0:maxjobs]] = True
def apstar(filename,badval=20735): """ Read an SDSS APOGEE apStar spectrum. Parameters ---------- filename : string The name of the spectrum file to load. Returns ------- spec : Spec1D object The spectrum as a Spec1D object. Examples -------- spec = apstar('spec.fits') """ base, ext = os.path.splitext(os.path.basename(filename)) # APOGEE apStar, combined spectrum if (base.find("apStar") > -1) | (base.find("asStar") > -1): # HISTORY APSTAR: HDU0 = Header only # HISTORY APSTAR: All image extensions have: # HISTORY APSTAR: row 1: combined spectrum with individual pixel weighting # HISTORY APSTAR: row 2: combined spectrum with global weighting # HISTORY APSTAR: row 3-nvisits+2: individual resampled visit spectra # HISTORY APSTAR: unless nvisits=1, which only have a single row # HISTORY APSTAR: All spectra shifted to rest (vacuum) wavelength scale # HISTORY APSTAR: HDU1 - Flux (10^-17 ergs/s/cm^2/Ang) # HISTORY APSTAR: HDU2 - Error (10^-17 ergs/s/cm^2/Ang) # HISTORY APSTAR: HDU3 - Flag mask: # HISTORY APSTAR: row 1: bitwise OR of all visits # HISTORY APSTAR: row 2: bitwise AND of all visits # HISTORY APSTAR: row 3-nvisits+2: individual visit masks # HISTORY APSTAR: HDU4 - Sky (10^-17 ergs/s/cm^2/Ang) # HISTORY APSTAR: HDU5 - Sky Error (10^-17 ergs/s/cm^2/Ang) # HISTORY APSTAR: HDU6 - Telluric # HISTORY APSTAR: HDU7 - Telluric Error # HISTORY APSTAR: HDU8 - LSF coefficients # HISTORY APSTAR: HDU9 - RV and CCF structure # Get number of extensions hdulist = fits.open(filename) nhdu = len(hdulist) hdulist.close() # Spectrum, error, sky, skyerr are in units of 1e-17 # these are 2D arrays with [Nvisit+2,Npix] # the first two are combined and the rest are the individual spectra head1 = fits.getheader(filename,1) w0 = np.float64(head1["CRVAL1"]) dw = np.float64(head1["CDELT1"]) nw = head1["NAXIS1"] wave = 10**(np.arange(nw)*dw+w0) # flux, err, sky, skyerr are in units of 1e-17 flux = fits.getdata(filename,1).T * 1e-17 lsfcoef = fits.getdata(filename,8).T spec = Spec1D(flux,wave=wave,lsfpars=lsfcoef,lsftype='Gauss-Hermite',lsfxtype='Pixels') spec.filename = filename spec.sptype = "apStar" spec.waveregime = "NIR" spec.instrument = "APOGEE" spec.head = fits.getheader(filename,0) spec.err = fits.getdata(filename,2).T * 1e-17 #bad = (spec.err<=0) # fix bad error values #if np.sum(bad) > 0: # spec.err[bad] = 1e30 spec.bitmask = fits.getdata(filename,3) spec.sky = fits.getdata(filename,4).T * 1e-17 spec.skyerr = fits.getdata(filename,5).T * 1e-17 spec.telluric = fits.getdata(filename,6).T spec.telerr = fits.getdata(filename,7).T spec.lsf = fits.getdata(filename,8).T # Create the bad pixel mask # "bad" pixels: # flag = ['BADPIX','CRPIX','SATPIX','UNFIXABLE','BADDARK','BADFLAT','BADERR','NOSKY', # 'LITTROW_GHOST','PERSIST_HIGH','PERSIST_MED','PERSIST_LOW','SIG_SKYLINE','SIG_TELLURIC','NOT_ENOUGH_PSF',''] # badflag = [1,1,1,1,1,1,1,1, # 0,0,0,0,0,0,1,0] mask = (np.bitwise_and(spec.bitmask,badval)!=0) | (np.isfinite(spec.flux)==False) # Extra masking for bright skylines x = np.arange(spec.npix) nsky = 4 medsky = median_filter(spec.sky,201,mode='reflect') medcoef = dln.poly_fit(x,medsky/np.median(medsky),2) medsky2 = dln.poly(x,medcoef)*np.median(medsky) skymask1 = (sky>nsky*medsky2) # pixels Nsig above median sky mask[:,i] = np.logical_or(mask[:,i],skymask1) # OR combine spec.mask = mask # Fix NaN or bad pixels pixels bd,nbd = dln.where( (np.isfinite(spec.flux[:,i])==False) | (spec.err[:,i] <= 0.0) ) if nbd>0: spec.flux[bd] = 0.0 spec.err[bd] = 1e30 spec.mask[bd] = True if nhdu>=9: spec.meta = fits.getdata(filename,9) # meta-data spec.snr = spec.head["SNR"] if base.find("apStar") > -1: spec.observatory = 'apo' else: spec.observatory = 'lco' spec.wavevac = True return spec
def measurement_info(pix): t0 = time.time() hostname = socket.gethostname() host = hostname.split('.')[0] # Get version number from exposure directory #lo = expdir.find('nsc/instcal/') #dum = expdir[lo+12:] #version = dum[0:dum.find('/')] version = 'v3' cmbdir = '/net/dl2/dnidever/nsc/instcal/' + version + '/' #edir = '/net/dl1/users/dnidever/nsc/instcal/'+version+'/' #nside = 128 #expstr = fits.getdata('/net/dl2/dnidever/nsc/instcal/'+version+'/lists/nsc_'+version+'_exposures.fits.gz',1) # too much many columns, just need full path and base metadb = '/net/dl2/dnidever/nsc/instcal/' + version + '/lists/nsc_meta.db' data = querydb(metadb, 'exposure', 'expdir') data = [a[0] for a in data] expdir = np.char.array(data) expdir = expdir.rstrip('/') base = [os.path.basename(e) for e in expdir] base = np.char.array(base) # If we put the output files in a PIX_idstr/ subdirectory then I wouldn't need to # know all of this exposure path information dbfile = cmbdir + 'combine/' + str( int(pix) // 1000) + '/' + str(pix) + '_idstr.db' print(dbfile) # Deal with sub-pixels!! # Get the row count db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) cur = db.cursor() cur.execute('select count(rowid) from idstr') data = cur.fetchall() db.close() nrows = data[0][0] print(str(nrows) + ' rows') print('Loading the data') idstr = readidstrdb(dbfile) # Need to do this in chunks if there are too many rows # Get unique exposures exposure = np.char.array(idstr['exposure']) expindex = dln.create_index(exposure) nexp = len(expindex['value']) print(str(nexp) + ' exposures') # Get absolute paths ind1, ind2 = dln.match(base, expindex['value']) expdirs = np.zeros(nexp, (np.str, 200)) expdirs[ind2] = expdir[ind1] # Convert /dl1 to /dl2 expdirs = np.char.array(expdirs).replace('/dl1/users/dnidever/', '/dl2/dnidever/') # Loop through the exposures and write out their information for e in range(nexp): exposure1 = expindex['value'][e] eind = expindex['index'][expindex['lo'][e]:expindex['hi'][e] + 1] idstr1 = idstr[eind] nidstr1 = len(idstr1) # Just need measid,objectid, and only the width that we need mlen = np.max([len(m) for m in idstr1['measid']]) olen = np.max([len(o) for o in idstr1['objectid']]) dt = np.dtype([('measid', np.str, mlen), ('objectid', np.str, olen)]) new = np.zeros(nidstr1, dtype=dt) new['measid'] = idstr1['measid'] new['objectid'] = idstr1['objectid'] print(str(e + 1) + ' ' + exposure1 + ' ' + str(nidstr1)) # Put these files in expdir/idstr/ subdirectory!! # Write it out outfile = expdirs[e] + '/' + exposure1 + '_objectid_list.fits' #outfile = expdirs[e]+'/'+exposure1+'_objectid_list.npy' print(' Writing ' + outfile) #if os.path.exists(outfile): os.remove(outfile) #np.save(outfile,new) # not any faster Table(new).write(outfile, overwrite=True) print('dt = ' + str(time.time() - t0) + ' sec.') import pdb pdb.set_trace() # Check if output file already exists #base = os.path.basename(expdir) ## Log file ##------------------ ## format is nsc_combine_main.DATETIME.log #ltime = time.localtime() ## time.struct_time(tm_year=2019, tm_mon=7, tm_mday=22, tm_hour=0, tm_min=30, tm_sec=20, tm_wday=0, tm_yday=203, tm_isdst=1) #smonth = str(ltime[1]) #if ltime[1]<10: smonth = '0'+smonth #sday = str(ltime[2]) #if ltime[2]<10: sday = '0'+sday #syear = str(ltime[0])[2:] #shour = str(ltime[3]) #if ltime[3]<10: shour='0'+shour #sminute = str(ltime[4]) #if ltime[4]<10: sminute='0'+sminute #ssecond = str(int(ltime[5])) #if ltime[5]<10: ssecond='0'+ssecond #logtime = smonth+sday+syear+shour+sminute+ssecond #logfile = expdir+'/'+base+'_measure_update.'+logtime+'.log' #if os.path.exists(logfile): os.remove(logfile) ## Set up logging to screen and logfile #logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s") #rootLogger = logging.getLogger() #fileHandler = logging.FileHandler(logfile) #fileHandler.setFormatter(logFormatter) #rootLogger.addHandler(fileHandler) #consoleHandler = logging.StreamHandler() #consoleHandler.setFormatter(logFormatter) #rootLogger.addHandler(consoleHandler) #rootLogger.setLevel(logging.NOTSET) #rootLogger.info('Adding objectID for measurement catalogs for exposure = '+base) #rootLogger.info("expdir = "+expdir) #rootLogger.info("host = "+host) #rootLogger.info(" ") # Load the exposure and metadata files metafile = expdir + '/' + base + '_meta.fits' meta = Table.read(metafile, 1) nmeta = len(meta) chstr = Table.read(metafile, 2) rootLogger.info('KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') cols = ['EXPDIR', 'FILENAME', 'MEASFILE'] for c in cols: f = np.char.array(chstr[c]).decode() f = np.char.array(f).replace('/dl1/users/dnidever/', '/dl2/dnidever/') chstr[c] = f nchips = len(chstr) measdtype = np.dtype([('MEASID', 'S50'), ('OBJECTID', 'S50'), ('EXPOSURE', 'S50'), ('CCDNUM', '>i2'), ('FILTER', 'S2'), ('MJD', '>f8'), ('X', '>f4'), ('Y', '>f4'), ('RA', '>f8'), ('RAERR', '>f4'), ('DEC', '>f8'), ('DECERR', '>f4'), ('MAG_AUTO', '>f4'), ('MAGERR_AUTO', '>f4'), ('MAG_APER1', '>f4'), ('MAGERR_APER1', '>f4'), ('MAG_APER2', '>f4'), ('MAGERR_APER2', '>f4'), ('MAG_APER4', '>f4'), ('MAGERR_APER4', '>f4'), ('MAG_APER8', '>f4'), ('MAGERR_APER8', '>f4'), ('KRON_RADIUS', '>f4'), ('ASEMI', '>f4'), ('ASEMIERR', '>f4'), ('BSEMI', '>f4'), ('BSEMIERR', '>f4'), ('THETA', '>f4'), ('THETAERR', '>f4'), ('FWHM', '>f4'), ('FLAGS', '>i2'), ('CLASS_STAR', '>f4')]) # Load and concatenate the meas catalogs chstr['MEAS_INDEX'] = 0 # keep track of where each chip catalog starts count = 0 meas = Table(data=np.zeros(int(np.sum(chstr['NMEAS'])), dtype=measdtype)) rootLogger.info('Loading and concatenating the chip measurement catalogs') for i in range(nchips): meas1 = Table.read(chstr['MEASFILE'][i].strip(), 1) # load chip meas catalog nmeas1 = len(meas1) meas[count:count + nmeas1] = meas1 chstr['MEAS_INDEX'][i] = count count += nmeas1 measid = np.char.array(meas['MEASID']).strip().decode() nmeas = len(meas) rootLogger.info(str(nmeas) + ' measurements') # Get the OBJECTID from the combined healpix file IDSTR structure # remove any sources that weren't used # Figure out which healpix this figure overlaps pix = hp.ang2pix(nside, meas['RA'], meas['DEC'], lonlat=True) upix = np.unique(pix) npix = len(upix) rootLogger.info(str(npix) + ' HEALPix to query') # Loop over the HEALPix pixels ntotmatch = 0 idstr_dtype = np.dtype([('measid', np.str, 200), ('objectid', np.str, 200), ('pix', int)]) idstr = np.zeros(nmeas, dtype=idstr_dtype) cnt = 0 for i in range(npix): fitsfile = cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str( upix[i]) + '.fits.gz' dbfile = cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str( upix[i]) + '_idstr.db' if os.path.exists(dbfile): # Read meas id information from idstr database for this expoure #data = querydb(dbfile,table='idstr',cols='measid,objectid',where="exposure=='"+base+"'") idstr1 = readidstrdb(dbfile, where="exposure=='" + base + "'") nidstr1 = len(idstr1) if nidstr1 > 0: idstr['measid'][cnt:cnt + nidstr1] = idstr1['measid'] idstr['objectid'][cnt:cnt + nidstr1] = idstr1['objectid'] idstr['pix'][cnt:cnt + nidstr1] = upix[i] cnt += nidstr1 rootLogger.info( str(i + 1) + ' ' + str(upix[i]) + ' ' + str(nidstr1)) #nmatch = 0 #if nidstr>0: # idstr_measid = np.char.array(idstr['measid']).strip() # idstr_objectid = np.char.array(idstr['objectid']).strip() # #ind1,ind2 = dln.match(idstr_measid,measid) # nmatch = len(ind1) # if nmatch>0: # meas['OBJECTID'][ind2] = idstr_objectid[ind1] # ntotmatch += nmatch #rootLogger.info(str(i+1)+' '+str(upix[i])+' '+str(nmatch)) else: rootLogger.info( str(i + 1) + ' ' + dbfile + ' NOT FOUND. Checking for high-resolution database files.') # Check if there are high-resolution healpix idstr databases hidbfiles = glob(cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str(upix[i]) + '_n*_*_idstr.db') nhidbfiles = len(hidbfiles) if os.path.exists(fitsfile) & (nhidbfiles > 0): rootLogger.info('Found high-resolution HEALPix IDSTR files') for j in range(nhidbfiles): dbfile1 = hidbfiles[j] dbbase1 = os.path.basename(dbfile1) idstr1 = readidstrdb(dbfile1, where="exposure=='" + base + "'") nidstr1 = len(idstr1) if nidstr1 > 0: idstr['measid'][cnt:cnt + nidstr1] = idstr1['measid'] idstr['objectid'][cnt:cnt + nidstr1] = idstr1['objectid'] idstr['pix'][cnt:cnt + nidstr1] = upix[i] cnt += nidstr1 rootLogger.info(' ' + str(j + 1) + ' ' + dbbase1 + ' ' + str(upix[i]) + ' ' + str(nidstr1)) #idstr_measid = np.char.array(idstr['measid']).strip() #idstr_objectid = np.char.array(idstr['objectid']).strip() #ind1,ind2 = dln.match(idstr_measid,measid) #nmatch = len(ind1) #if nmatch>0: # meas['OBJECTID'][ind2] = idstr_objectid[ind1] # ntotmatch += nmatch #rootLogger.info(' '+str(j+1)+' '+dbbase1+' '+str(upix[i])+' '+str(nmatch)) # Trim any leftover elements of IDSTR if cnt < nmeas: idstr = idstr[0:cnt] # Now match them all up rootLogger.info('Matching the measurements') idstr_measid = np.char.array(idstr['measid']).strip() idstr_objectid = np.char.array(idstr['objectid']).strip() ind1, ind2 = dln.match(idstr_measid, measid) nmatch = len(ind1) if nmatch > 0: meas['OBJECTID'][ind2] = idstr_objectid[ind1] # Only keep sources with an objectid ind, nind = dln.where( np.char.array(meas['OBJECTID']).strip().decode() == '') # There can be missing/orphaned measurements at healpix boundaries in crowded # regions when the DBSCAN eps is different. But there should be very few of these. # At this point, let's allow this to pass if nind > 0: rootLogger.info('WARNING: ' + str(nind) + ' measurements are missing OBJECTIDs') if ((nmeas >= 20000) & (nind > 20)) | ((nmeas < 20000) & (nind > 3)): rootLogger.info('More missing OBJECTIDs than currently allowed.') raise ValueError('More missing OBJECTIDs than currently allowed.') # Output the updated catalogs #rootLogger.info('Updating measurement catalogs') #for i in range(nchips): # measfile1 = chstr['MEASFILE'][i].strip() # lo = chstr['MEAS_INDEX'][i] # hi = lo+chstr['NMEAS'][i] # meas1 = meas[lo:hi] # meta1 = Table.read(measfile1,2) # load the meta extensions # # 'KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') # cols = ['EXPDIR','FILENAME','MEASFILE'] # for c in cols: # f = np.char.array(meta1[c]).decode() # f = np.char.array(f).replace('/dl1/users/dnidever/','/dl2/dnidever/') # meta1[c] = f # # Copy as a backup # if os.path.exists(measfile1+'.bak'): os.remove(measfile1+'.bak') # dum = shutil.move(measfile1,measfile1+'.bak') # # Write new catalog # #meas1.write(measfile1,overwrite=True) # first, measurement table # # append other fits binary tabl # #hdulist = fits.open(measfile1) # rootLogger.info('Writing '+measfile1) # hdulist = fits.HDUList() # hdulist.append(fits.table_to_hdu(meas1)) # first, meas catalog # hdulist.append(fits.table_to_hdu(meta1)) # second, meta # hdulist.writeto(measfile1,overwrite=True) # hdulist.close() # # Create a file saying that the file was successfully updated. # dln.writelines(measfile1+'.updated','') # # Delete backups # if os.path.exists(measfile1+'.bak'): os.remove(measfile1+'.bak') measfile = expdir + '/' + base + '_meas.fits' meas.write(measfile, overwrite=True) if os.path.exists(measfile + '.gz'): os.remove(measfile + '.gz') ret = subprocess.call(['gzip', measfile]) # compress final catalog # Update the meta file as well, need to the /dl2 filenames rootLogger.info('Updating meta file') meta.write(metafile, overwrite=True) hdulist = fits.open(metafile) hdu = fits.table_to_hdu(chstr) hdulist.append(hdu) hdulist.writeto(metafile, overwrite=True) hdulist.close() # Create a file saying that the files were updated okay. dln.writelines(expdir + '/' + base + '_meas.updated', '') rootLogger.info('dt = ' + str(time.time() - t0) + ' sec.')
def __call__(self, labels, order=None, norm=True, fluxonly=False, wave=None, rv=None): # Default is to return all orders # order can also be a list or array of orders # Orders to loop over if order is None: orders = np.arange(self.norder) else: orders = list(np.atleast_1d(order)) norders = dln.size(orders) # Get maximum number of pixels over all orders npix = 0 for i in range(self.norder): npix = np.maximum(npix, self._data[i].dispersion.shape[0]) # Wavelength array input if wave is not None: if wave.ndim == 1: wnorders = 1 else: wnorders = wave.shape[1] if wnorders != norders: raise ValueError( "Number of orders in WAVE must match orders in the model") npix = wave.shape[0] # Initialize output arrays oflux = np.zeros((npix, norders), np.float32) + np.nan owave = np.zeros((npix, norders), np.float64) omask = np.zeros((npix, norders), bool) + True # Order loop for i in orders: if wave is None: owave1 = self._data[ i].dispersion # final wavelength array for this order else: if wave.ndim == 1: owave1 = wave.copy() else: owave1 = wave[:, i] # Get model and add radial velocity if necessary if (rv is None) & (wave is None): m = self._data[i] f = m(labels) zfactor = 1 else: m = self._data_nointerp[i] f0 = m(labels) zfactor = 1 + rv / cspeed # redshift factor zwave = m.dispersion * zfactor # redshift the wavelengths f = np.zeros(len(owave1), np.float32) + np.nan gind, ngind = dln.where( (owave1 >= np.min(zwave)) & (owave1 <= np.max(zwave))) # wavelengths we can cover if ngind > 0: f[gind] = dln.interp(zwave, f0, owave1[gind]) # Get Continuum if (norm is False): if hasattr(m, 'continuum'): contmodel = m.continuum smallcont = contmodel(labels) if contmodel._logflux is True: smallcont = 10**smallcont # Interpolate to the full spectrum wavelength array # with any redshift cont = dln.interp(contmodel.dispersion * zfactor, smallcont, owave1) # Now mulitply the flux array by the continuum f *= cont else: raise ValueError( "Model does not have continuum information") # Stuff in the array oflux[0:len(f), i] = f owave[0:len(f), i] = owave1 omask[0:len(f), i] = False # Only return the flux if fluxonly is True: return oflux # Change single order 2D arrays to 1D if norders == 1: oflux = oflux.flatten() owave = owave.flatten() omask = omask.flatten() # Create Spec1D object mspec = Spec1D(oflux, err=oflux * 0.0, wave=owave, mask=omask, lsfsigma=None, instrument='Model') mspec.teff = labels[0] mspec.logg = labels[1] mspec.feh = labels[2] #mspec.rv = rv mspec.snr = np.inf return mspec
def exposure_update(exposure, redo=False): """ Update the measurement table using the broken up measid/objectid lists.""" t00 = time.time() hostname = socket.gethostname() host = hostname.split('.')[0] iddir = '/data0/dnidever/nsc/instcal/v3/idstr/' version = 'v3' # Load the exposures table print('Loading exposure table') expcat = fits.getdata( '/net/dl2/dnidever/nsc/instcal/' + version + '/lists/nsc_v3_exposure_table.fits.gz', 1) # Make sure it's a list if type(exposure) is str: exposure = [exposure] # Match exposures to exposure catalog eind1, eind2 = dln.match(expcat['EXPOSURE'], exposure) nmatch = len(eind1) print( str(nmatch) + ' matches for ' + str(len(exposure)) + ' input exposures') if len(eind1) == 0: print('No exposures matched to exposure table') sys.exit() print('Updating measid for ' + str(len(exposure)) + ' exposures') # Loop over files for i in range(len(exposure)): t0 = time.time() exp = expcat['EXPOSURE'][eind1[i]] print(str(i + 1) + ' ' + exp) instcode = expcat['INSTRUMENT'][eind1[i]] dateobs = expcat['DATEOBS'][eind1[i]] night = dateobs[0:4] + dateobs[5:7] + dateobs[8:10] expdir = '/net/dl2/dnidever/nsc/instcal/' + version + '/' + instcode + '/' + night + '/' + exp edir = iddir + instcode + '/' + night + '/' + exp + '/' # local directory for ID files #outdir = edir outdir = expdir # Check that the directory exists if os.path.exists(expdir) is False: print(expdir + ' NOT FOUND') continue # Check output file measfile = outdir + '/' + exp + '_meas.fits' if (os.path.exists(measfile + '.gz')) & (redo is False): print(measfile + '.gz already exists. Skipping') continue # Log file #------------------ # format is EXPOSURE_measure_update.DATETIME.log ltime = time.localtime() # time.struct_time(tm_year=2019, tm_mon=7, tm_mday=22, tm_hour=0, tm_min=30, tm_sec=20, tm_wday=0, tm_yday=203, tm_isdst=1) smonth = str(ltime[1]) if ltime[1] < 10: smonth = '0' + smonth sday = str(ltime[2]) if ltime[2] < 10: sday = '0' + sday syear = str(ltime[0])[2:] shour = str(ltime[3]) if ltime[3] < 10: shour = '0' + shour sminute = str(ltime[4]) if ltime[4] < 10: sminute = '0' + sminute ssecond = str(int(ltime[5])) if ltime[5] < 10: ssecond = '0' + ssecond logtime = smonth + sday + syear + shour + sminute + ssecond logfile = outdir + '/' + exp + '_measure_update.' + logtime + '.log' if os.path.exists(logfile): os.remove(logfile) # Set up logging to screen and logfile logFormatter = logging.Formatter( "%(asctime)s [%(levelname)-5.5s] %(message)s") if logging.getLogger().hasHandlers() is True: rootLogger.handlers = [] # remove all handlers rootLogger = logging.getLogger() fileHandler = logging.FileHandler(logfile) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) rootLogger.setLevel(logging.NOTSET) rootLogger.info( 'Adding objectID for measurement catalogs for exposure = ' + exp) rootLogger.info("expdir = " + expdir) rootLogger.info("host = " + host) rootLogger.info(" ") # Load the exposure and metadata files metafile = expdir + '/' + exp + '_meta.fits' meta = Table.read(metafile, 1) nmeta = len(meta) chstr = Table.read(metafile, 2) rootLogger.info('KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') cols = ['EXPDIR', 'FILENAME', 'MEASFILE'] for c in cols: f = np.char.array(chstr[c]).decode() f = np.char.array(f).replace('/dl1/users/dnidever/', '/dl2/dnidever/') chstr[c] = f nchips = len(chstr) # Get "good" chips, astrometrically calibrated astokay = np.zeros(nchips, bool) for k in range(nchips): # Check that this chip was astrometrically calibrated # and falls in to HEALPix region # Also check for issues with my astrometric corrections if (chstr['NGAIAMATCH'][k] == 0) | (np.max(np.abs(chstr['RACOEF'][k])) > 1) | (np.max( np.abs(chstr['DECCOEF'][k])) > 1): astokay[k] = False else: astokay[k] = True #gdch,ngdch,bdch,nbdch = dln.where(chstr['NGAIAMATCH']>0,comp=True) gdch, ngdch, bdch, nbdch = dln.where(astokay == True, comp=True) if nbdch > 0: rootLogger.info( str(nbdch) + ' chips were not astrometrically calibrated') measdtype = np.dtype([('MEASID', 'S50'), ('OBJECTID', 'S50'), ('EXPOSURE', 'S50'), ('CCDNUM', '>i2'), ('FILTER', 'S2'), ('MJD', '>f8'), ('X', '>f4'), ('Y', '>f4'), ('RA', '>f8'), ('RAERR', '>f4'), ('DEC', '>f8'), ('DECERR', '>f4'), ('MAG_AUTO', '>f4'), ('MAGERR_AUTO', '>f4'), ('MAG_APER1', '>f4'), ('MAGERR_APER1', '>f4'), ('MAG_APER2', '>f4'), ('MAGERR_APER2', '>f4'), ('MAG_APER4', '>f4'), ('MAGERR_APER4', '>f4'), ('MAG_APER8', '>f4'), ('MAGERR_APER8', '>f4'), ('KRON_RADIUS', '>f4'), ('ASEMI', '>f4'), ('ASEMIERR', '>f4'), ('BSEMI', '>f4'), ('BSEMIERR', '>f4'), ('THETA', '>f4'), ('THETAERR', '>f4'), ('FWHM', '>f4'), ('FLAGS', '>i2'), ('CLASS_STAR', '>f4')]) # Load and concatenate the meas catalogs chstr[ 'MEAS_INDEX'] = -1 # keep track of where each chip catalog starts count = 0 meas = Table( data=np.zeros(int(np.sum(chstr['NMEAS'][gdch])), dtype=measdtype)) rootLogger.info( 'Loading and concatenating the chip measurement catalogs') for j in range(ngdch): jch = gdch[j] chfile = chstr['MEASFILE'][jch].strip() if chfile == '': continue #print(str(j+1)+' Loading '+chfile) meas1 = Table.read(chfile, 1) # load chip meas catalog nmeas1 = len(meas1) meas[count:count + nmeas1] = meas1 chstr['MEAS_INDEX'][jch] = count count += nmeas1 measid = np.char.array(meas['MEASID']).strip().decode() nmeas = len(meas) rootLogger.info(str(nmeas) + ' measurements') # Look for the id files allfiles = glob(edir + exp + '__*.npy') # check for duplicates, single and split into high-res healpix idstr files # always use the split ones base = [os.path.splitext(os.path.basename(f))[0] for f in allfiles] hfile = [f.split('__')[-1] for f in base] hh = [f.split('_')[0] for f in hfile] # the healpix portion hindex = dln.create_index(hh) files = [] for j in range(len(hindex['value'])): hpix1 = hindex['value'][j] hind = hindex['index'][hindex['lo'][j]:hindex['hi'][j] + 1] files1 = np.array(allfiles)[hind] # duplicates, use the split/hires ones if hindex['num'][j] > 1: gd = dln.grep(files1, str(hpix1) + '_n', index=True) if len(gd) == 0: raise ValueError( 'Something is wrong with the idstr files, duplicates') files += list(files1[gd]) else: files += list(files1) nfiles = len(files) rootLogger.info(str(nfiles) + ' ID files to load') # Loop over ID files and load them up df = np.dtype([('measid', np.str, 50), ('objectid', np.str, 50)]) idcat = np.zeros(10000, dtype=df) count = 0 for k in range(nfiles): idcat1 = np.load(files[k]) nidcat1 = len(idcat1) # Add more elements if count + nidcat1 > len(idcat): idcat = dln.add_elements(idcat, np.maximum(100000, nidcat1)) # Stuff in the data idcat[count:count + nidcat1] = idcat1 count += nidcat1 # Trim extra elements if len(idcat) > count: idcat = idcat[0:count] rootLogger.info('IDs for ' + str(len(idcat)) + ' measurements') # Match up with measid idcat_measid = np.char.array(idcat['measid']).strip() if isinstance(idcat_measid[0], bytes): idcat_measid = idcat_measid.decode() ind1, ind2 = dln.match(idcat_measid, measid) nmatch = len(ind1) rootLogger.info('Matches for ' + str(nmatch) + ' measurements') if nmatch > 0: meas['OBJECTID'][ind2] = idcat['objectid'][ind1] if (len(ind1) > len(measid)) | (len(idcat) > len(meas)): rootLogger.info('There are ' + str(len(idcat) - len(meas)) + ' duplicates!!') # Checking for missing objectid ind, nind = dln.where( np.char.array(meas['OBJECTID']).strip().decode() == '') # There can be missing/orphaned measurements at healpix boundaries in crowded # regions when the DBSCAN eps is different. But there should be very few of these. # At this point, let's allow this to pass if nind > 0: rootLogger.info('WARNING: ' + str(nind) + ' measurements are missing OBJECTIDs') #if ((nmeas>=20000) & (nind>20)) | ((nmeas<20000) & (nind>3)): # rootLogger.info('More missing OBJECTIDs than currently allowed.') # hpix = hp.ang2pix(128,meas['RA'][ind],meas['DEC'][ind],lonlat=True) # hindex = dln.create_index(hpix) # out = [] # for i in range(len(hindex['value'])): # out.append(str(hindex['value'][i])+' ('+str(hindex['num'][i])+')') # rootLogger.info('healpix of missing measurements: '+', '.join(out)) # outtxt = [str(nind)+' missing IDs','healpix of missing measurements: '+', '.join(out)] # dln.writelines(outdir+'/'+exp+'_meas.ERROR',outtxt) # continue # Output the updated measurement catalog # Writing a single FITS file is much faster than many small ones # could put it in /data0 but db01 won't be able to access that rootLogger.info('Writing final measurement catalog to ' + measfile) meas.write(measfile, overwrite=True) if os.path.exists(measfile + '.gz'): os.remove(measfile + '.gz') ret = subprocess.call(['gzip', measfile]) # compress final catalog # Update the meta file as well, need to update the /dl2 filenames metafile = outdir + '/' + exp + '_meta.fits' rootLogger.info('Updating meta file ' + metafile) meta.write(metafile, overwrite=True) hdulist = fits.open(metafile) hdu = fits.table_to_hdu(chstr) hdulist.append(hdu) hdulist.writeto(metafile, overwrite=True) hdulist.close() # Create a file saying that the files were updated okay. #dln.writelines(expdir+'/'+exp+'_meas.updated','') dln.writelines(outdir + '/' + exp + '_meas.updated', '') # Remove meas.ERROR, if it exists if os.path.exists(outdir + '/' + exp + '_meas.ERROR'): os.remove(outdir + '/' + exp + '_meas.ERROR') rootLogger.info('dt = ' + str(time.time() - t0) + ' sec.') print('dt = %6.1f sec.' % (time.time() - t00))
def interp(self, x=None, xtype='wave', order=None): """ Interpolate onto a new wavelength scale and/or shift by a velocity.""" # if x is 2D and has multiple dimensions and the spectrum does as well # (with the same number of dimensions), and order=None, then it is assumed # that the input and output orders are "matched". # Check input xtype if (xtype.lower().find('wave') == -1) & (xtype.lower().find('pix') == -1): raise ValueError(xtype + ' not supported. Must be wave or pixel') # Convert pixels to wavelength if (xtype.lower().find('pix') > -1): wave = self.pix2wave(x, order=order) else: wave = x.copy() # How many orders in output wavelength if (wave.ndim == 1): nwpix = len(wave) nworder = 1 else: nwpix, nworder = wave.shape wave = wave.reshape(nwpix, nworder) # make 2D for order indexing # Loop over orders in final wavelength oflux = np.zeros((nwpix, nworder), float) oerr = np.zeros((nwpix, nworder), float) omask = np.zeros((nwpix, nworder), bool) osigma = np.zeros((nwpix, nworder), float) for i in range(nworder): # Interpolate onto the final wavelength scale wave1 = wave[:, i] wr1 = dln.minmax(wave1) # Make spectrum arrays 2D for order indexing, [Npix,Norder] swave = self.wave.reshape(self.npix, self.norder) sflux = self.flux.reshape(self.npix, self.norder) serr = self.err.reshape(self.npix, self.norder) # convert mask to integer 0 or 1 if hasattr(self, 'mask'): smask = np.zeros((self.npix, self.norder), int) smask_bool = self.err.reshape(self.npix, self.norder) smask[smask_bool == True] = 1 else: smask = np.zeros((self.npix, self.norder), int) # The orders are "matched", one input for one output order if (nworder == self.norder) & (order is None): swave1 = swave[:, i] sflux1 = sflux[:, i] serr1 = serr[:, i] ssigma1 = self.lsf.sigma(order=i) smask1 = smask[:, i] # Some overlap if (np.min(swave1) < wr1[1]) & (np.max(swave1) > wr1[0]): # Fix NaN pixels bd, nbd = dln.where(np.isfinite(sflux1) == False) if nbd > 0: sflux1[bd] = 1.0 serr1[bd] = 1e30 smask1[bd] = 1 ind, nind = dln.where((wave1 > np.min(swave1)) & (wave1 < np.max(swave1))) oflux[ind, i] = dln.interp(swave1, sflux1, wave1[ind], extrapolate=False, assume_sorted=False) oerr[ind, i] = dln.interp(swave1, serr1, wave1[ind], extrapolate=False, assume_sorted=False, kind='linear') osigma[ind, i] = dln.interp(swave1, ssigma1, wave1[ind], extrapolate=False, assume_sorted=False) # Gauss-Hermite, convert to wavelength units if self.lsf.lsftype == 'Gauss-Hermite': sdw1 = np.abs(swave1[1:] - swave1[0:-1]) sdw1 = np.hstack((sdw1, sdw1[-1])) dw = dln.interp(swave1, sdw1, wave1[ind], extrapolate=False, assume_sorted=False) osigma[ind, i] *= dw # in Ang mask_interp = dln.interp(swave1, smask1, wave1[ind], extrapolate=False, assume_sorted=False) mask_interp_bool = np.zeros(nind, bool) mask_interp_bool[mask_interp > 0.4] = True omask[ind, i] = mask_interp_bool # Loop over all spectrum orders else: # Loop over spectrum orders for j in range(self.norder): swave1 = swave[:, j] sflux1 = sflux[:, j] serr1 = serr[:, j] ssigma1 = self.lsf.sigma(order=j) smask1 = smask[:, j] # Some overlap if (np.min(swave1) < wr1[1]) & (np.max(swave1) > wr1[0]): ind, nind = dln.where((wave1 > np.min(swave1)) & (wave1 < np.max(swave1))) oflux[ind, i] = dln.interp(swave1, sflux1, wave1[ind], extrapolate=False, assume_sorted=False) oerr[ind, i] = dln.interp(swave1, serr1, wave1[ind], extrapolate=False, assume_sorted=False, kind='linear') osigma[ind, i] = dln.interp(swave1, ssigma1, wave1[ind], extrapolate=False, assume_sorted=False) mask_interp = dln.interp(swave1, smask1, wave1[ind], extrapolate=False, assume_sorted=False) mask_interp_bool = np.zeros(nind, bool) mask_interp_bool[mask_interp > 0.4] = True omask[ind, i] = mask_interp_bool # Currently this does NOT deal with the overlap of multiple orders (e.g. averaging) # Flatten if 1D if (x.ndim == 1): wave = wave.flatten() oflux = oflux.flatten() oerr = oerr.flatten() osigma = osigma.flatten() omask = omask.flatten() # Create output spectrum object if self.lsf.lsftype == 'Gauss-Hermite': # Can't interpolate Gauss-Hermite LSF yet # instead convert to a Gaussian approximation in wavelength units #print('Cannot interpolate Gauss-Hermite LSF yet') lsfxtype = 'wave' else: lsfxtype = self.lsf.xtype ospec = Spec1D(oflux, wave=wave, err=oerr, mask=omask, lsftype='Gaussian', lsfxtype=lsfxtype, lsfsigma=osigma) return ospec
def sigma(self,x=None,xtype='pixels',order=0,extrapolate=True): """ Return the Gaussian sigma at specified locations. The sigma will be in units of lsf.xtype. Parameters ---------- x : array, optional The x-values for which to return the Gaussian sigma values. xtype : string, optional The type of x-value input, either 'wave' or 'pixels'. Default is 'pixels'. order : int, optional The order to use if there are multiple orders. The default is 0. extrapolate : bool, optional Extrapolate beyond the dispersion solution, if necessary. True by default. Returns ------- sigma : array The array of Gaussian sigma values. Examples -------- sigma = lsf.sigma([100,200]) """ # The sigma will be returned in units given in lsf.xtype if self._sigma is not None: _sigma = self._sigma if self.ndim==2: _sigma = self._sigma[:,order] if x is None: return _sigma else: # Wavelength input if xtype.lower().find('wave') > -1: x0 = np.array(x).copy() # backup x = self.wave2pix(x0,order=order) # convert to pixels # Integer, just return the values if( type(x)==int) | (np.array(x).dtype.kind=='i'): return _sigma[x] # Floats, interpolate else: sig = interp1d(np.arange(len(_sigma)),_sigma,kind='cubic',bounds_error=False, fill_value=(np.nan,np.nan),assume_sorted=True)(x) # Extrapolate npix = self.npix if ((np.min(x)<0) | (np.max(x)>(npix-1))) & (extrapolate is True): xin = np.arange(npix) # At the beginning if (np.min(x)<0): coef1 = dln.poly_fit(xin[0:10], _sigma[0:10], 2) bd1, nbd1 = dln.where(x <0) sig[bd1] = dln.poly(x[bd1],coef1) # At the end if (np.max(x)>(npix-1)): coef2 = dln.poly_fit(xin[npix-10:], _sigma[npix-10:], 2) bd2, nbd2 = dln.where(x > (npix-1)) sig[bd2] = dln.poly(x[bd2],coef2) return sig # Need to calculate else: if x is None: x = np.arange(self.npix) if self.pars is None: raise Exception("No LSF parameters") # Get parameters pars = self.pars if self.ndim==2: pars=self.pars[:,order] # Pixels input if xtype.lower().find('pix') > -1: # Pixel LSF parameters if self.xtype.lower().find('pix') > -1: return np.polyval(pars[::-1],x) # Wave LSF parameters else: w = self.pix2wave(x,order=order) return np.polyval(pars[::-1],w) # Wavelengths input else: # Wavelength LSF parameters if self.xtype.lower().find('wave') > -1: return np.polyval(pars[::-1],x) # Pixel LSF parameters else: x0 = np.array(x).copy() x = self.wave2pix(x0,order=order) return np.polyval(pars[::-1],x)