def query(self, table=None, cols='*', where=None, groupby=None, sql=None, fmt='numpy', verbose=False): """ Query the APOGEE DRP database. Parameters ---------- table : str, optional Name of table to query. Default is to use the apogee_drp schema, but table names with schema (e.g. catalogdb.gaia_dr2_source) can also be input. If the sql command is given directly, then this is not needed. cols : str, optional Comma-separated list of columns to return. Default is "*", all columns. where : str, optional Constraints on the selection. groupby : str, optional Column to group data by. sql : str, optional Enter the SQL command directly. fmt : str, optional The output format: -numpy: numpy structured array (default) -table: astropy table -list: list of tuples, first row has column names -raw: raw output, list of tuples verbose : bool, optional Print verbose output to screen. False by default. Returns ------- cat : numpy structured array The data in a catalog format. If raw=True then the data will be returned as a list of tuples. Examples -------- cat = db.query('visit',where="apogee_id='2M09241296+2723318'") cat = db.query(sql='select * from apgoee_drp.visit as v join catalogdb.something as c on v.apogee_id=c.2mass_type') """ cur = self.connection.cursor() # Simple table query if sql is None: # Schema if table.find('.') > -1: schema, tab = table.split('.') else: schema = 'apogee_drp' tab = table # Start the SELECT statement cmd = 'SELECT ' + cols + ' FROM ' + schema + '.' + tab # Add WHERE statement if where is not None: cmd += ' WHERE ' + where # Add GROUP BY statement if groupby is not None: cmd += ' GROUP BY ' + groupby # Execute the select command if verbose: print('CMD = ' + cmd) cur.execute(cmd) data = cur.fetchall() if len(data) == 0: cur.close() return np.array([]) # Return the raw results if fmt == 'raw': cur.close() return data # Get table column names and data types cur.execute( "select column_name,data_type from information_schema.columns where table_schema='" + schema + "' and table_name='" + tab + "'") head = cur.fetchall() cur.close() colnames = [h[0] for h in head] # Return fmt="list" format if fmt == 'list': data = [tuple(colnames)] + data cur.close() return data # Get numpy data types d2d = { 'smallint': np.int, 'integer': np.int, 'bigint': np.int, 'real': np.float32, 'double precision': np.float64, 'text': (np.str, 200), 'char': (np.str, 5), 'timestamp': (np.str, 50), 'timestamp with time zone': (np.str, 50), 'timestamp without time zone': (np.str, 50), 'boolean': np.bool } dt = [] for i, h in enumerate(head): if h[1] == 'ARRAY': # Get number if elements and type from the data itself shp = np.array(data[0][i]).shape type1 = np.array(data[0][i]).dtype.type dt.append((h[0], type1, shp)) else: dt.append((h[0], d2d[h[1]])) dtype = np.dtype(dt) # Convert to numpy structured array cat = np.zeros(len(data), dtype=dtype) cat[...] = data del (data) # SQL command input else: # Execute the command if verbose: print('CMD = ' + sql) cur.execute(sql) data = cur.fetchall() if len(data) == 0: cur.close() return np.array([]) # Return the raw results if fmt == 'raw': cur.close() return data # Return fmt="list" format if fmt == 'list': colnames = [desc[0] for desc in cur.description] data = [tuple(colnames)] + data cur.close() return data # Get table column names and data types colnames = [desc[0] for desc in cur.description] colnames = np.array(colnames) # Fix duplicate column names cindex = dln.create_index(colnames) bd, nbd = dln.where(cindex['num'] > 1) for i in range(nbd): ind = cindex['index'][cindex['lo'][bd[i]]:cindex['hi'][bd[i]] + 1] ind.sort() nind = len(ind) for j in np.arange(1, nind): colnames[ind[j]] += str(j + 1) # Use the data returned to get the type dt = [] for i, c in enumerate(colnames): type1 = type(data[0][i]) if type1 is str: dt.append((c, type(data[0][i]), 300)) elif type1 is list: # convert list to array nlist = len(data[0][i]) dtype1 = type(data[0][i][0]) dt.append((c, dtype1, nlist)) else: dt.append((c, type(data[0][i]))) dtype = np.dtype(dt) # Convert to numpy structured array cat = np.zeros(len(data), dtype=dtype) cat[...] = data del (data) # For string columns change size to maximum length of that column dt2 = [] names = dtype.names nplen = np.vectorize(len) needcopy = False for i in range(len(dtype)): type1 = type(cat[names[i]][0]) if type1 is str or type1 is np.str_: maxlen = np.max(nplen(cat[names[i]])) dt2.append((names[i], str, maxlen + 10)) needcopy = True else: dt2.append(dt[i]) # reuse dt value # We need to copy if needcopy == True: dtype2 = np.dtype(dt2) cat2 = np.zeros(len(cat), dtype=dtype2) for n in names: cat2[n] = cat[n] cat = cat2 del cat2 # Convert to astropy table if fmt == 'table': cat = Table(cat) return cat
def make_mjd5_yaml(mjd, apred, telescope, clobber=False, logger=None): """ Make a MJD5 yaml file that can be used to create plan files. Parameters ---------- mjd : int MJD number for this night. apred : str APOGEE reduction version. telescope : str APOGEE telescope: apo25m, apo1m, lco25m. clobber : bool Overwrite any existing files. Returns ------- out : list of dictionaries The list of dictionaries that contain the information needed to make the plan files. planfiles : list of str The names of the plan files that would be created. The yaml files are also written to disk in APOGEEREDUCEPLAN product directory. Examples -------- out,planfiles = mkplan.make_mjd5_yaml(57680,'t15','apo25m') By J.Holtzman, 2011 translated/rewritten, D.Nidever Oct2020 """ # Logger if logger is None: logger = dln.basiclogger() logger.info('Making MJD5.yaml file for MJD=' + str(mjd)) load = apload.ApLoad(apred=apred, telescope=telescope) datadir = { 'apo25m': os.environ['APOGEE_DATA_N'], 'apo1m': os.environ['APOGEE_DATA_N'], 'lco25m': os.environ['APOGEE_DATA_S'] }[telescope] observatory = telescope[0:3] # Output file/directory outfile = os.environ[ 'APOGEEREDUCEPLAN_DIR'] + '/yaml/' + telescope + '/' + telescope + '_' + str( mjd) + 'auto.yaml' if os.path.exists(os.path.dirname(outfile)) == False: os.makedirs(os.path.dirname(outfile)) # File already exists and clobber not set if os.path.exists(outfile) and clobber == False: logger.info(outfile + ' already EXISTS and clobber==False') return # Get the exposures and info about them info = getexpinfo(observatory, mjd) nfiles = len(info) logger.info(str(nfiles) + ' exposures found') # Print summary information about the data expindex = dln.create_index(info['exptype']) for i in range(len(expindex['value'])): logger.info(' ' + expindex['value'][i] + ': ' + str(expindex['num'][i])) objind, = np.where(info['exptype'] == 'OBJECT') if len(objind) > 0: plates = np.unique(info['plateid'][objind]) logger.info('Observations of ' + str(len(plates)) + ' plates') plateindex = dln.create_index(info['plateid'][objind]) for i in range(len(plateindex['value'])): logger.info(' ' + plateindex['value'][i] + ': ' + str(plateindex['num'][i])) # Scan through all files, accumulate IDs of the various types dark, cal, exp, sky, dome, calpsfid = [], [], [], [], None, None out, planfiles = [], [] for i in range(nfiles): # Load image number in variable according to exptype and nreads # discard images with nread<3 if info['nread'][i] < 3: logger.info(info['num'][i], ' has less than the required 3 reads') # Dark if (info['exptype'][i] == 'DARK') and info['nread'][i] >= 3: dark.append(int(info['num'][i])) # Internal flat # reduced only to 2D, hence treated like darks if (info['exptype'][i] == 'INTERNALFLAT') and info['nread'][i] >= 3: dark.append(int(info['num'][i])) # Dome flat if (info['exptype'][i] == 'QUARTZFLAT') and info['nread'][i] >= 3: cal.append(int(info['num'][i])) calpsfid = int(info['num'][i]) # Arc lamps if (info['exptype'][i] == 'ARCLAMP') and info['nread'][i] >= 3: cal.append(int(info['num'][i])) # Sky frame # identify sky frames as object frames with 10<nread<15 if (info['exptype'][i] == 'OBJECT') and (info['nread'][i] < 15 and info['nread'][i] > 10): sky.append(int(info['num'][i])) # Object exposure if (info['exptype'][i] == 'OBJECT') and info['nread'][i] > 15: exp.append(int(info['num'][i])) # Dome flat if (info['exptype'][i] == 'DOMEFLAT') and info['nread'][i] > 3: dome = int(info['num'][i]) # End of this plate block # if plateid changed or last exposure platechange = info['plateid'][i] != info['plateid'][np.minimum( i + 1, nfiles - 1)] if (platechange or i == nfiles - 1) and len(exp) > 0 and dome is not None: # Object plate visit objplan = { 'apred': str(apred), 'telescope': str(load.telescope), 'mjd': int(mjd), 'plate': int(info['plateid'][i]), 'psfid': dome, 'fluxid': dome, 'ims': exp } out.append(objplan) planfile = load.filename('Plan', plate=int(info['plateid'][i]), mjd=mjd) planfiles.append(planfile) exp = [] # Sky exposures # use same cals as for object if len(sky) > 0: skyplan = { 'apred': str(apred), 'telescope': str(load.telescope), 'mjd': int(mjd), 'plate': int(info['plateid'][i]), 'psfid': dome, 'fluxid': dome, 'ims': sky, 'sky': True } out.append(skyplan) skyplanfile = planfile.replace('.yaml', 'sky.yaml') planfiles.append(skyplanfile) sky = [] # Dark frame information cplate = '0000' if len(dark) > 0: darkplan = { 'apred': str(apred), 'telescope': str(load.telescope), 'mjd': int(mjd), 'plate': 0, 'psfid': 0, 'fluxid': 0, 'ims': dark, 'dark': True } out.append(darkplan) planfile = load.filename('DarkPlan', mjd=mjd) planfiles.append(planfile) # Calibration frame information if len(cal) > 0 and calpsfid is not None: calplan = { 'apred': str(apred), 'telescope': str(load.telescope), 'mjd': int(mjd), 'plate': 0, 'psfid': calpsfid, 'fluxid': calpsfid, 'ims': cal, 'cal': True } out.append(calplan) planfile = load.filename('CalPlan', mjd=mjd) planfiles.append(planfile) # Write out the MJD5 file if os.path.exists(outfile): os.remove(outfile) logger.info('Writing MJD5.yaml file to ' + outfile) with open(outfile, 'w') as file: dum = yaml.dump(out, file, default_flow_style=False, sort_keys=False) # Copy it to the non-"auto" version outfile2 = outfile.replace('auto', '') if os.path.exists(outfile2): os.remove(outfile2) shutil.copyfile(outfile, outfile2) return out, planfiles
def varmetric(inpmeas): """ Compute photometric variability metrics.""" # meas is a catalog of measurements for a single object nmeas = len(inpmeas) # Need the catalog to be a numpy array if isinstance(inpmeas, np.ndarray): meas = inpmeas else: meas = np.array(inpmeas) filtcol = 'FILTER' if filtcol not in meas.dtype.names: filtcol = 'filter' if filtcol not in meas.dtype.names: raise ValueError('No filter column') magcol = 'MAG_AUTO' if magcol not in meas.dtype.names: magcol = 'mag_auto' if magcol not in meas.dtype.names: raise ValueError('No mag_auto column') errcol = 'MAGERR_AUTO' if errcol not in meas.dtype.names: errcol = 'magerr_auto' if errcol not in meas.dtype.names: raise ValueError('No magerr_auto column') mjdcol = 'MJD' if mjdcol not in meas.dtype.names: mjdcol = 'mjd' if mjdcol not in meas.dtype.names: raise ValueError('No mjd column') # OBJ schema dtype_obj = np.dtype([('deltamjd', np.float32), ('ndet', np.int16), ('nphot', np.int16), ('ndetu', np.int16), ('nphotu', np.int16), ('umag', np.float32), ('urms', np.float32), ('uerr', np.float32), ('ndetg', np.int16), ('nphotg', np.int16), ('gmag', np.float32), ('grms', np.float32), ('gerr', np.float32), ('ndetr', np.int16), ('nphotr', np.int16), ('rmag', np.float32), ('rrms', np.float32), ('rerr', np.float32), ('ndeti', np.int16), ('nphoti', np.int16), ('imag', np.float32), ('irms', np.float32), ('ierr', np.float32), ('ndetz', np.int16), ('nphotz', np.int16), ('zmag', np.float32), ('zrms', np.float32), ('zerr', np.float32), ('ndety', np.int16), ('nphoty', np.int16), ('ymag', np.float32), ('yrms', np.float32), ('yerr', np.float32), ('ndetvr', np.int16), ('nphotvr', np.int16), ('vrmag', np.float32), ('vrrms', np.float32), ('vrerr', np.float32), ('rmsvar', np.float32), ('madvar', np.float32), ('iqrvar', np.float32), ('etavar', np.float32), ('jvar', np.float32), ('kvar', np.float32), ('chivar', np.float32), ('romsvar', np.float32), ('variable10sig', np.int16), ('nsigvar', np.float32)]) obj = np.zeros(1, dtype=dtype_obj) # Initialize the OBJ structured array obj = np.zeros(1, dtype=dtype_obj) # all bad to start for f in [ 'rmsvar', 'madvar', 'iqrvar', 'etavar', 'jvar', 'kvar', 'chivar', 'romsvar' ]: obj[f] = np.nan for f in ['u', 'g', 'r', 'i', 'z', 'y', 'vr']: obj[f + 'mag'] = 99.99 obj[f + 'err'] = 9.99 obj[f + 'rms'] = np.nan obj['ndet'] = nmeas obj['deltamjd'] = np.max(meas[mjdcol]) - np.min(meas[mjdcol]) # Mean magnitudes # Convert totalwt and totalfluxwt to MAG and ERR # and average the morphology parameters PER FILTER filtindex = dln.create_index(meas[filtcol].astype(np.str)) nfilters = len(filtindex['value']) resid = np.zeros(nmeas) + np.nan # residual mag relresid = np.zeros( nmeas) + np.nan # residual mag relative to the uncertainty for f in range(nfilters): filt = filtindex['value'][f].lower() findx = filtindex['index'][filtindex['lo'][f]:filtindex['hi'][f] + 1] obj['ndet' + filt] = filtindex['num'][f] gph, ngph = dln.where(meas[magcol][findx] < 50) obj['nphot' + filt] = ngph if ngph == 1: obj[filt + 'mag'] = meas[magcol][findx[gph]] obj[filt + 'err'] = meas[errcol][findx[gph]] if ngph > 1: newmag, newerr = dln.wtmean(meas[magcol][findx[gph]], meas[errcol][findx[gph]], magnitude=True, reweight=True, error=True) obj[filt + 'mag'] = newmag obj[filt + 'err'] = newerr # Calculate RMS obj[filt + 'rms'] = np.sqrt( np.mean((meas[magcol][findx[gph]] - newmag)**2)) # Residual mag resid[findx[gph]] = meas[magcol][findx[gph]] - newmag # Residual mag relative to the uncertainty # set a lower threshold of 0.02 in the uncertainty relresid[findx[gph]] = np.sqrt( ngph / (ngph - 1)) * (meas[magcol][findx[gph]] - newmag) / np.maximum( meas[errcol][findx[gph]], 0.02) # Calculate variability indices gdresid = np.isfinite(resid) ngdresid = np.sum(gdresid) if ngdresid > 0: resid2 = resid[gdresid] sumresidsq = np.sum(resid2**2) tsi = np.argsort(meas[mjdcol][gdresid]) resid2tsi = resid2[tsi] quartiles = np.percentile(resid2, [25, 50, 75]) # RMS rms = np.sqrt(sumresidsq / ngdresid) # MAD madvar = 1.4826 * np.median(np.abs(resid2 - quartiles[1])) # IQR iqrvar = 0.741289 * (quartiles[2] - quartiles[0]) # 1/eta etavar = sumresidsq / np.sum((resid2tsi[1:] - resid2tsi[0:-1])**2) obj['rmsvar'] = rms obj['madvar'] = madvar obj['iqrvar'] = iqrvar obj['etavar'] = etavar # Calculate variability indices wrt to uncertainties gdrelresid = np.isfinite(relresid) ngdrelresid = np.sum(gdrelresid) if ngdrelresid > 0: relresid2 = relresid[gdrelresid] pk = relresid2**2 - 1 jvar = np.sum(np.sign(pk) * np.sqrt(np.abs(pk))) / ngdrelresid #avgrelvar = np.mean(np.abs(relresid2)) # average of absolute relative residuals chivar = np.sqrt(np.sum(relresid2**2)) / ngdrelresid kdenom = np.sqrt(np.sum(relresid2**2) / ngdrelresid) if kdenom != 0: kvar = (np.sum(np.abs(relresid2)) / ngdrelresid) / kdenom else: kvar = np.nan # RoMS romsvar = np.sum(np.abs(relresid2)) / (ngdrelresid - 1) obj['jvar'] = jvar obj['kvar'] = kvar #obj['avgrelvar'] = avgrelvar obj['chivar'] = chivar obj['romsvar'] = romsvar #if chivar>50: import pdb; pdb.set_trace() # Make NPHOT from NPHOTX obj['nphot'] = obj['nphotu'] + obj['nphotg'] + obj['nphotr'] + obj[ 'nphoti'] + obj['nphotz'] + obj['nphoty'] + obj['nphotvr'] # Fiducial magnitude, used to select variables below # order of priority: r,g,i,z,Y,VR,u if obj['nphot'] > 0: magarr = np.zeros(7, float) for ii, nn in enumerate( ['rmag', 'gmag', 'imag', 'zmag', 'ymag', 'vrmag', 'umag']): magarr[ii] = obj[nn] gfid, ngfid = dln.where(magarr < 50) if ngfid > 0: fidmag = magarr[gfid[0]] return obj
def fit(psf,image,cat,method='qr',fitradius=None,recenter=True,maxiter=10,minpercdiff=0.5, reskyiter=2,nofreeze=False,skyfit=True,verbose=False): """ Fit PSF to all stars in an image. To pre-group the stars, add a "group_id" in the input catalog. Parameters ---------- psf : PSF object PSF object with initial parameters to use. image : CCDData object Image to use to fit PSF model to stars. cat : table Catalog with initial amp/x/y values for the stars to use to fit the PSF. To pre-group the stars, add a "group_id" in the catalog. method : str, optional Method to use for solving the non-linear least squares problem: "cholesky", "qr", "svd", and "curve_fit". Default is "cholesky". fitradius : float, optional The fitting radius in pixels. By default the PSF FWHM is used. recenter : boolean, optional Allow the centroids to be fit. Default is True. maxiter : int, optional Maximum number of iterations to allow. Only for methods "qr" or "svd". Default is 10. minpercdiff : float, optional Minimum percent change in the parameters to allow until the solution is considered converged and the iteration loop is stopped. Only for methods "qr" and "svd". Default is 0.5. reskyiter : int, optional After how many iterations to re-calculate the sky background. Default is 2. nofreeze : boolean, optional Do not freeze any parameters even if they have converged. Default is False. skyfit : boolean, optional Fit a constant sky offset with the stellar parameters. Default is True. verbose : boolean, optional Verbose output. Returns ------- out : table Table of best-fitting parameters for each star. id, amp, amp_error, x, x_err, y, y_err, sky model : numpy array Best-fitting model of the stars and sky background. Example ------- outcat,model = fit(psf,image,cat,groups) """ print = utils.getprintfunc() # Get print function to be used locally, allows for easy logging start = time.time() # Check input catalog for n in ['x','y']: if n not in cat.keys(): raise ValueError('Cat must have x and y columns') # Check the method method = str(method).lower() if method not in ['cholesky','svd','qr','sparse','htcen','curve_fit']: raise ValueError('Only cholesky, svd, qr, sparse, htcen or curve_fit methods currently supported') nstars = np.array(cat).size ny,nx = image.data.shape # Groups if 'group_id' not in cat.keys(): daogroup = DAOGroup(crit_separation=2.5*psf.fwhm()) starlist = cat.copy() starlist['x_0'] = cat['x'] starlist['y_0'] = cat['y'] # THIS TAKES ~4 SECONDS!!!!!! WAY TOO LONG!!!! star_groups = daogroup(starlist) cat['group_id'] = star_groups['group_id'] # Star index starindex = dln.create_index(np.array(cat['group_id'])) groups = starindex['value'] ngroups = len(groups) if verbose: print(str(ngroups)+' star groups') # Initialize catalog dt = np.dtype([('id',int),('amp',float),('amp_error',float),('x',float), ('x_error',float),('y',float),('y_error',float),('sky',float), ('flux',float),('flux_error',float),('mag',float),('mag_error',float), ('niter',int),('group_id',int),('ngroup',int),('rms',float),('chisq',float)]) outcat = np.zeros(nstars,dtype=dt) outcat = Table(outcat) if 'id' in cat.keys(): outcat['id'] = cat['id'] else: outcat['id'] = np.arange(nstars)+1 # Group Loop #--------------- resid = image.copy() outmodel = CCDData(np.zeros(image.shape),bbox=image.bbox,unit=image.unit) outsky = CCDData(np.zeros(image.shape),bbox=image.bbox,unit=image.unit) for g,grp in enumerate(groups): ind = starindex['index'][starindex['lo'][g]:starindex['hi'][g]+1] nind = len(ind) inpcat = cat[ind].copy() if 'amp' not in inpcat.columns: # Estimate amp from flux and fwhm # area under 2D Gaussian is 2*pi*A*sigx*sigy if 'fwhm' in inpcat.columns: amp = inpcat['flux']/(2*np.pi*(inpcat['fwhm']/2.35)**2) else: amp = inpcat['flux']/(2*np.pi*(psf.fwhm()/2.35)**2) staramp = np.maximum(amp,0) # make sure it's positive inpcat['amp'] = staramp if verbose: print('-- Group '+str(grp)+'/'+str(len(groups))+' : '+str(nind)+' star(s) --') # Single Star if nind==1: inpcat = [inpcat['amp'][0],inpcat['x'][0],inpcat['y'][0]] out,model = psf.fit(resid,inpcat,niter=3,verbose=verbose,retfullmodel=True,recenter=recenter) model.data -= out['sky'] # remove sky outmodel.data[model.bbox.slices] += model.data outsky.data[model.bbox.slices] = out['sky'] # Group else: bbox = cutoutbbox(image,psf,inpcat) out,model,sky = groupfit.fit(psf,resid[bbox.slices],inpcat,method=method,fitradius=fitradius, recenter=recenter,maxiter=maxiter,minpercdiff=minpercdiff, reskyiter=reskyiter,nofreeze=nofreeze,verbose=verbose, skyfit=skyfit,absolute=True) outmodel.data[model.bbox.slices] += model.data outsky.data[model.bbox.slices] = sky # Subtract the best model for the group/star resid[model.bbox.slices].data -= model.data # Put in catalog cols = ['amp','amp_error','x','x_error','y','y_error', 'sky','flux','flux_error','mag','mag_error','niter','rms','chisq'] for c in cols: outcat[c][ind] = out[c] outcat['group_id'][ind] = grp outcat['ngroup'][ind] = nind outcat = Table(outcat) if verbose: print('dt = %.2f sec' % (time.time()-start)) return outcat,outmodel,outsky
def exposure_update(exposure, redo=False): """ Update the measurement table using the broken up measid/objectid lists.""" t00 = time.time() hostname = socket.gethostname() host = hostname.split('.')[0] iddir = '/data0/dnidever/nsc/instcal/v3/idstr/' version = 'v3' # Load the exposures table print('Loading exposure table') expcat = fits.getdata( '/net/dl2/dnidever/nsc/instcal/' + version + '/lists/nsc_v3_exposure_table.fits.gz', 1) # Make sure it's a list if type(exposure) is str: exposure = [exposure] # Match exposures to exposure catalog eind1, eind2 = dln.match(expcat['EXPOSURE'], exposure) nmatch = len(eind1) print( str(nmatch) + ' matches for ' + str(len(exposure)) + ' input exposures') if len(eind1) == 0: print('No exposures matched to exposure table') sys.exit() print('Updating measid for ' + str(len(exposure)) + ' exposures') # Loop over files for i in range(len(exposure)): t0 = time.time() exp = expcat['EXPOSURE'][eind1[i]] print(str(i + 1) + ' ' + exp) instcode = expcat['INSTRUMENT'][eind1[i]] dateobs = expcat['DATEOBS'][eind1[i]] night = dateobs[0:4] + dateobs[5:7] + dateobs[8:10] expdir = '/net/dl2/dnidever/nsc/instcal/' + version + '/' + instcode + '/' + night + '/' + exp edir = iddir + instcode + '/' + night + '/' + exp + '/' # local directory for ID files #outdir = edir outdir = expdir # Check that the directory exists if os.path.exists(expdir) is False: print(expdir + ' NOT FOUND') continue # Check output file measfile = outdir + '/' + exp + '_meas.fits' if (os.path.exists(measfile + '.gz')) & (redo is False): print(measfile + '.gz already exists. Skipping') continue # Log file #------------------ # format is EXPOSURE_measure_update.DATETIME.log ltime = time.localtime() # time.struct_time(tm_year=2019, tm_mon=7, tm_mday=22, tm_hour=0, tm_min=30, tm_sec=20, tm_wday=0, tm_yday=203, tm_isdst=1) smonth = str(ltime[1]) if ltime[1] < 10: smonth = '0' + smonth sday = str(ltime[2]) if ltime[2] < 10: sday = '0' + sday syear = str(ltime[0])[2:] shour = str(ltime[3]) if ltime[3] < 10: shour = '0' + shour sminute = str(ltime[4]) if ltime[4] < 10: sminute = '0' + sminute ssecond = str(int(ltime[5])) if ltime[5] < 10: ssecond = '0' + ssecond logtime = smonth + sday + syear + shour + sminute + ssecond logfile = outdir + '/' + exp + '_measure_update.' + logtime + '.log' if os.path.exists(logfile): os.remove(logfile) # Set up logging to screen and logfile logFormatter = logging.Formatter( "%(asctime)s [%(levelname)-5.5s] %(message)s") if logging.getLogger().hasHandlers() is True: rootLogger.handlers = [] # remove all handlers rootLogger = logging.getLogger() fileHandler = logging.FileHandler(logfile) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) rootLogger.setLevel(logging.NOTSET) rootLogger.info( 'Adding objectID for measurement catalogs for exposure = ' + exp) rootLogger.info("expdir = " + expdir) rootLogger.info("host = " + host) rootLogger.info(" ") # Load the exposure and metadata files metafile = expdir + '/' + exp + '_meta.fits' meta = Table.read(metafile, 1) nmeta = len(meta) chstr = Table.read(metafile, 2) rootLogger.info('KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') cols = ['EXPDIR', 'FILENAME', 'MEASFILE'] for c in cols: f = np.char.array(chstr[c]).decode() f = np.char.array(f).replace('/dl1/users/dnidever/', '/dl2/dnidever/') chstr[c] = f nchips = len(chstr) # Get "good" chips, astrometrically calibrated astokay = np.zeros(nchips, bool) for k in range(nchips): # Check that this chip was astrometrically calibrated # and falls in to HEALPix region # Also check for issues with my astrometric corrections if (chstr['NGAIAMATCH'][k] == 0) | (np.max(np.abs(chstr['RACOEF'][k])) > 1) | (np.max( np.abs(chstr['DECCOEF'][k])) > 1): astokay[k] = False else: astokay[k] = True #gdch,ngdch,bdch,nbdch = dln.where(chstr['NGAIAMATCH']>0,comp=True) gdch, ngdch, bdch, nbdch = dln.where(astokay == True, comp=True) if nbdch > 0: rootLogger.info( str(nbdch) + ' chips were not astrometrically calibrated') measdtype = np.dtype([('MEASID', 'S50'), ('OBJECTID', 'S50'), ('EXPOSURE', 'S50'), ('CCDNUM', '>i2'), ('FILTER', 'S2'), ('MJD', '>f8'), ('X', '>f4'), ('Y', '>f4'), ('RA', '>f8'), ('RAERR', '>f4'), ('DEC', '>f8'), ('DECERR', '>f4'), ('MAG_AUTO', '>f4'), ('MAGERR_AUTO', '>f4'), ('MAG_APER1', '>f4'), ('MAGERR_APER1', '>f4'), ('MAG_APER2', '>f4'), ('MAGERR_APER2', '>f4'), ('MAG_APER4', '>f4'), ('MAGERR_APER4', '>f4'), ('MAG_APER8', '>f4'), ('MAGERR_APER8', '>f4'), ('KRON_RADIUS', '>f4'), ('ASEMI', '>f4'), ('ASEMIERR', '>f4'), ('BSEMI', '>f4'), ('BSEMIERR', '>f4'), ('THETA', '>f4'), ('THETAERR', '>f4'), ('FWHM', '>f4'), ('FLAGS', '>i2'), ('CLASS_STAR', '>f4')]) # Load and concatenate the meas catalogs chstr[ 'MEAS_INDEX'] = -1 # keep track of where each chip catalog starts count = 0 meas = Table( data=np.zeros(int(np.sum(chstr['NMEAS'][gdch])), dtype=measdtype)) rootLogger.info( 'Loading and concatenating the chip measurement catalogs') for j in range(ngdch): jch = gdch[j] chfile = chstr['MEASFILE'][jch].strip() if chfile == '': continue #print(str(j+1)+' Loading '+chfile) meas1 = Table.read(chfile, 1) # load chip meas catalog nmeas1 = len(meas1) meas[count:count + nmeas1] = meas1 chstr['MEAS_INDEX'][jch] = count count += nmeas1 measid = np.char.array(meas['MEASID']).strip().decode() nmeas = len(meas) rootLogger.info(str(nmeas) + ' measurements') # Look for the id files allfiles = glob(edir + exp + '__*.npy') # check for duplicates, single and split into high-res healpix idstr files # always use the split ones base = [os.path.splitext(os.path.basename(f))[0] for f in allfiles] hfile = [f.split('__')[-1] for f in base] hh = [f.split('_')[0] for f in hfile] # the healpix portion hindex = dln.create_index(hh) files = [] for j in range(len(hindex['value'])): hpix1 = hindex['value'][j] hind = hindex['index'][hindex['lo'][j]:hindex['hi'][j] + 1] files1 = np.array(allfiles)[hind] # duplicates, use the split/hires ones if hindex['num'][j] > 1: gd = dln.grep(files1, str(hpix1) + '_n', index=True) if len(gd) == 0: raise ValueError( 'Something is wrong with the idstr files, duplicates') files += list(files1[gd]) else: files += list(files1) nfiles = len(files) rootLogger.info(str(nfiles) + ' ID files to load') # Loop over ID files and load them up df = np.dtype([('measid', np.str, 50), ('objectid', np.str, 50)]) idcat = np.zeros(10000, dtype=df) count = 0 for k in range(nfiles): idcat1 = np.load(files[k]) nidcat1 = len(idcat1) # Add more elements if count + nidcat1 > len(idcat): idcat = dln.add_elements(idcat, np.maximum(100000, nidcat1)) # Stuff in the data idcat[count:count + nidcat1] = idcat1 count += nidcat1 # Trim extra elements if len(idcat) > count: idcat = idcat[0:count] rootLogger.info('IDs for ' + str(len(idcat)) + ' measurements') # Match up with measid idcat_measid = np.char.array(idcat['measid']).strip() if isinstance(idcat_measid[0], bytes): idcat_measid = idcat_measid.decode() ind1, ind2 = dln.match(idcat_measid, measid) nmatch = len(ind1) rootLogger.info('Matches for ' + str(nmatch) + ' measurements') if nmatch > 0: meas['OBJECTID'][ind2] = idcat['objectid'][ind1] if (len(ind1) > len(measid)) | (len(idcat) > len(meas)): rootLogger.info('There are ' + str(len(idcat) - len(meas)) + ' duplicates!!') # Checking for missing objectid ind, nind = dln.where( np.char.array(meas['OBJECTID']).strip().decode() == '') # There can be missing/orphaned measurements at healpix boundaries in crowded # regions when the DBSCAN eps is different. But there should be very few of these. # At this point, let's allow this to pass if nind > 0: rootLogger.info('WARNING: ' + str(nind) + ' measurements are missing OBJECTIDs') #if ((nmeas>=20000) & (nind>20)) | ((nmeas<20000) & (nind>3)): # rootLogger.info('More missing OBJECTIDs than currently allowed.') # hpix = hp.ang2pix(128,meas['RA'][ind],meas['DEC'][ind],lonlat=True) # hindex = dln.create_index(hpix) # out = [] # for i in range(len(hindex['value'])): # out.append(str(hindex['value'][i])+' ('+str(hindex['num'][i])+')') # rootLogger.info('healpix of missing measurements: '+', '.join(out)) # outtxt = [str(nind)+' missing IDs','healpix of missing measurements: '+', '.join(out)] # dln.writelines(outdir+'/'+exp+'_meas.ERROR',outtxt) # continue # Output the updated measurement catalog # Writing a single FITS file is much faster than many small ones # could put it in /data0 but db01 won't be able to access that rootLogger.info('Writing final measurement catalog to ' + measfile) meas.write(measfile, overwrite=True) if os.path.exists(measfile + '.gz'): os.remove(measfile + '.gz') ret = subprocess.call(['gzip', measfile]) # compress final catalog # Update the meta file as well, need to update the /dl2 filenames metafile = outdir + '/' + exp + '_meta.fits' rootLogger.info('Updating meta file ' + metafile) meta.write(metafile, overwrite=True) hdulist = fits.open(metafile) hdu = fits.table_to_hdu(chstr) hdulist.append(hdu) hdulist.writeto(metafile, overwrite=True) hdulist.close() # Create a file saying that the files were updated okay. #dln.writelines(expdir+'/'+exp+'_meas.updated','') dln.writelines(outdir + '/' + exp + '_meas.updated', '') # Remove meas.ERROR, if it exists if os.path.exists(outdir + '/' + exp + '_meas.ERROR'): os.remove(outdir + '/' + exp + '_meas.ERROR') rootLogger.info('dt = ' + str(time.time() - t0) + ' sec.') print('dt = %6.1f sec.' % (time.time() - t00))
def breakup_idstr(dbfile): """ Break-up idstr file into separate measid/objectid lists per exposure on /data0.""" t00 = time.time() outdir = '/data0/dnidever/nsc/instcal/v3/idstr/' # Load the exposures table expcat = fits.getdata('/net/dl2/dnidever/nsc/instcal/v3/lists/nsc_v3_exposure_table.fits.gz',1) # Make sure it's a list if type(dbfile) is str: dbfile=[dbfile] print('Breaking up '+str(len(dbfile))+' database files') # Loop over files for i,dbfile1 in enumerate(dbfile): print(str(i+1)+' '+dbfile1) if os.path.exists(dbfile1): t0 = time.time() dbbase1 = os.path.basename(dbfile1)[0:-9] # remove _idstr.db ending # Get existing index names for this database d = sqlite3.connect(dbfile1, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) cur = d.cursor() cmd = 'select measid,exposure,objectid from idstr' t1 = time.time() data = cur.execute(cmd).fetchall() print(' '+str(len(data))+' rows read in %5.1f sec. ' % (time.time()-t1)) # Break up data into lists measid,exposure,objectid = list(zip(*data)) measid = np.array(measid) objectid = np.array(objectid) exposure = np.array(exposure) eindex = dln.create_index(exposure) # Match exposures to exposure catalog ind1,ind2 = dln.match(expcat['EXPOSURE'],eindex['value']) # Loop over exposures and write output files nexp = len(eindex['value']) print(' '+str(nexp)+' exposures') measid_maxlen = np.max(dln.strlen(measid)) objectid_maxlen = np.max(dln.strlen(objectid)) df = np.dtype([('measid',np.str,measid_maxlen+1),('objectid',np.str,objectid_maxlen+1)]) # Loop over the exposures and write out the files for k in range(nexp): if nexp>100: if k % 100 == 0: print(' '+str(k+1)) ind = eindex['index'][eindex['lo'][k]:eindex['hi'][k]+1] cat = np.zeros(len(ind),dtype=df) cat['measid'] = measid[ind] cat['objectid'] = objectid[ind] instcode = expcat['INSTRUMENT'][ind1[k]] dateobs = expcat['DATEOBS'][ind1[k]] night = dateobs[0:4]+dateobs[5:7]+dateobs[8:10] if os.path.exists(outdir+instcode+'/'+night+'/'+eindex['value'][k]) is False: # Sometimes this crashes because another process is making the directory at the same time try: os.makedirs(outdir+instcode+'/'+night+'/'+eindex['value'][k]) except: pass outfile = outdir+instcode+'/'+night+'/'+eindex['value'][k]+'/'+eindex['value'][k]+'__'+dbbase1+'.npy' np.save(outfile,cat) print(' dt = %6.1f sec. ' % (time.time()-t0)) else: print(' '+dbfile1+' NOT FOUND') print('dt = %6.1f sec.' % (time.time()-t00))
hdu = fits.table_to_hdu(Table(obj1)) # second, catalog hdulist.append(hdu) hdulist.writeto(outfile1fits,overwrite=True) hdulist.close() if os.path.exists(outfile1): os.remove(outfile1) ret = subprocess.call(['gzip',outfile1fits]) # compress final catalog if allobj is None: allobj = obj1.copy() else: allobj = np.hstack((allobj,obj1.copy())) nobjects.append(nobj1) totobjects += nobj1 # Deal with duplicate metas metaindex = dln.create_index(allmeta['base']) for i in range(len(metaindex['value'])): indx = metaindex['index'][metaindex['lo'][i]:metaindex['hi'][i]+1] meta1 = allmeta[indx[0]].copy() if len(indx)>1: meta1['nobjects'] = np.sum(allmeta['nobjects'][indx]) if i==0: sumstr = meta1 else: sumstr = np.hstack((sumstr,meta1)) sumstr = Table(sumstr) # Write the output file print('Writing combined catalog to '+outfile) if os.path.exists(outfile): os.remove(outfile) sumstr.write(outfile) # first, summary table
def measurement_info(pix): t0 = time.time() hostname = socket.gethostname() host = hostname.split('.')[0] # Get version number from exposure directory #lo = expdir.find('nsc/instcal/') #dum = expdir[lo+12:] #version = dum[0:dum.find('/')] version = 'v3' cmbdir = '/net/dl2/dnidever/nsc/instcal/' + version + '/' #edir = '/net/dl1/users/dnidever/nsc/instcal/'+version+'/' #nside = 128 #expstr = fits.getdata('/net/dl2/dnidever/nsc/instcal/'+version+'/lists/nsc_'+version+'_exposures.fits.gz',1) # too much many columns, just need full path and base metadb = '/net/dl2/dnidever/nsc/instcal/' + version + '/lists/nsc_meta.db' data = querydb(metadb, 'exposure', 'expdir') data = [a[0] for a in data] expdir = np.char.array(data) expdir = expdir.rstrip('/') base = [os.path.basename(e) for e in expdir] base = np.char.array(base) # If we put the output files in a PIX_idstr/ subdirectory then I wouldn't need to # know all of this exposure path information dbfile = cmbdir + 'combine/' + str( int(pix) // 1000) + '/' + str(pix) + '_idstr.db' print(dbfile) # Deal with sub-pixels!! # Get the row count db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) cur = db.cursor() cur.execute('select count(rowid) from idstr') data = cur.fetchall() db.close() nrows = data[0][0] print(str(nrows) + ' rows') print('Loading the data') idstr = readidstrdb(dbfile) # Need to do this in chunks if there are too many rows # Get unique exposures exposure = np.char.array(idstr['exposure']) expindex = dln.create_index(exposure) nexp = len(expindex['value']) print(str(nexp) + ' exposures') # Get absolute paths ind1, ind2 = dln.match(base, expindex['value']) expdirs = np.zeros(nexp, (np.str, 200)) expdirs[ind2] = expdir[ind1] # Convert /dl1 to /dl2 expdirs = np.char.array(expdirs).replace('/dl1/users/dnidever/', '/dl2/dnidever/') # Loop through the exposures and write out their information for e in range(nexp): exposure1 = expindex['value'][e] eind = expindex['index'][expindex['lo'][e]:expindex['hi'][e] + 1] idstr1 = idstr[eind] nidstr1 = len(idstr1) # Just need measid,objectid, and only the width that we need mlen = np.max([len(m) for m in idstr1['measid']]) olen = np.max([len(o) for o in idstr1['objectid']]) dt = np.dtype([('measid', np.str, mlen), ('objectid', np.str, olen)]) new = np.zeros(nidstr1, dtype=dt) new['measid'] = idstr1['measid'] new['objectid'] = idstr1['objectid'] print(str(e + 1) + ' ' + exposure1 + ' ' + str(nidstr1)) # Put these files in expdir/idstr/ subdirectory!! # Write it out outfile = expdirs[e] + '/' + exposure1 + '_objectid_list.fits' #outfile = expdirs[e]+'/'+exposure1+'_objectid_list.npy' print(' Writing ' + outfile) #if os.path.exists(outfile): os.remove(outfile) #np.save(outfile,new) # not any faster Table(new).write(outfile, overwrite=True) print('dt = ' + str(time.time() - t0) + ' sec.') import pdb pdb.set_trace() # Check if output file already exists #base = os.path.basename(expdir) ## Log file ##------------------ ## format is nsc_combine_main.DATETIME.log #ltime = time.localtime() ## time.struct_time(tm_year=2019, tm_mon=7, tm_mday=22, tm_hour=0, tm_min=30, tm_sec=20, tm_wday=0, tm_yday=203, tm_isdst=1) #smonth = str(ltime[1]) #if ltime[1]<10: smonth = '0'+smonth #sday = str(ltime[2]) #if ltime[2]<10: sday = '0'+sday #syear = str(ltime[0])[2:] #shour = str(ltime[3]) #if ltime[3]<10: shour='0'+shour #sminute = str(ltime[4]) #if ltime[4]<10: sminute='0'+sminute #ssecond = str(int(ltime[5])) #if ltime[5]<10: ssecond='0'+ssecond #logtime = smonth+sday+syear+shour+sminute+ssecond #logfile = expdir+'/'+base+'_measure_update.'+logtime+'.log' #if os.path.exists(logfile): os.remove(logfile) ## Set up logging to screen and logfile #logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s") #rootLogger = logging.getLogger() #fileHandler = logging.FileHandler(logfile) #fileHandler.setFormatter(logFormatter) #rootLogger.addHandler(fileHandler) #consoleHandler = logging.StreamHandler() #consoleHandler.setFormatter(logFormatter) #rootLogger.addHandler(consoleHandler) #rootLogger.setLevel(logging.NOTSET) #rootLogger.info('Adding objectID for measurement catalogs for exposure = '+base) #rootLogger.info("expdir = "+expdir) #rootLogger.info("host = "+host) #rootLogger.info(" ") # Load the exposure and metadata files metafile = expdir + '/' + base + '_meta.fits' meta = Table.read(metafile, 1) nmeta = len(meta) chstr = Table.read(metafile, 2) rootLogger.info('KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') cols = ['EXPDIR', 'FILENAME', 'MEASFILE'] for c in cols: f = np.char.array(chstr[c]).decode() f = np.char.array(f).replace('/dl1/users/dnidever/', '/dl2/dnidever/') chstr[c] = f nchips = len(chstr) measdtype = np.dtype([('MEASID', 'S50'), ('OBJECTID', 'S50'), ('EXPOSURE', 'S50'), ('CCDNUM', '>i2'), ('FILTER', 'S2'), ('MJD', '>f8'), ('X', '>f4'), ('Y', '>f4'), ('RA', '>f8'), ('RAERR', '>f4'), ('DEC', '>f8'), ('DECERR', '>f4'), ('MAG_AUTO', '>f4'), ('MAGERR_AUTO', '>f4'), ('MAG_APER1', '>f4'), ('MAGERR_APER1', '>f4'), ('MAG_APER2', '>f4'), ('MAGERR_APER2', '>f4'), ('MAG_APER4', '>f4'), ('MAGERR_APER4', '>f4'), ('MAG_APER8', '>f4'), ('MAGERR_APER8', '>f4'), ('KRON_RADIUS', '>f4'), ('ASEMI', '>f4'), ('ASEMIERR', '>f4'), ('BSEMI', '>f4'), ('BSEMIERR', '>f4'), ('THETA', '>f4'), ('THETAERR', '>f4'), ('FWHM', '>f4'), ('FLAGS', '>i2'), ('CLASS_STAR', '>f4')]) # Load and concatenate the meas catalogs chstr['MEAS_INDEX'] = 0 # keep track of where each chip catalog starts count = 0 meas = Table(data=np.zeros(int(np.sum(chstr['NMEAS'])), dtype=measdtype)) rootLogger.info('Loading and concatenating the chip measurement catalogs') for i in range(nchips): meas1 = Table.read(chstr['MEASFILE'][i].strip(), 1) # load chip meas catalog nmeas1 = len(meas1) meas[count:count + nmeas1] = meas1 chstr['MEAS_INDEX'][i] = count count += nmeas1 measid = np.char.array(meas['MEASID']).strip().decode() nmeas = len(meas) rootLogger.info(str(nmeas) + ' measurements') # Get the OBJECTID from the combined healpix file IDSTR structure # remove any sources that weren't used # Figure out which healpix this figure overlaps pix = hp.ang2pix(nside, meas['RA'], meas['DEC'], lonlat=True) upix = np.unique(pix) npix = len(upix) rootLogger.info(str(npix) + ' HEALPix to query') # Loop over the HEALPix pixels ntotmatch = 0 idstr_dtype = np.dtype([('measid', np.str, 200), ('objectid', np.str, 200), ('pix', int)]) idstr = np.zeros(nmeas, dtype=idstr_dtype) cnt = 0 for i in range(npix): fitsfile = cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str( upix[i]) + '.fits.gz' dbfile = cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str( upix[i]) + '_idstr.db' if os.path.exists(dbfile): # Read meas id information from idstr database for this expoure #data = querydb(dbfile,table='idstr',cols='measid,objectid',where="exposure=='"+base+"'") idstr1 = readidstrdb(dbfile, where="exposure=='" + base + "'") nidstr1 = len(idstr1) if nidstr1 > 0: idstr['measid'][cnt:cnt + nidstr1] = idstr1['measid'] idstr['objectid'][cnt:cnt + nidstr1] = idstr1['objectid'] idstr['pix'][cnt:cnt + nidstr1] = upix[i] cnt += nidstr1 rootLogger.info( str(i + 1) + ' ' + str(upix[i]) + ' ' + str(nidstr1)) #nmatch = 0 #if nidstr>0: # idstr_measid = np.char.array(idstr['measid']).strip() # idstr_objectid = np.char.array(idstr['objectid']).strip() # #ind1,ind2 = dln.match(idstr_measid,measid) # nmatch = len(ind1) # if nmatch>0: # meas['OBJECTID'][ind2] = idstr_objectid[ind1] # ntotmatch += nmatch #rootLogger.info(str(i+1)+' '+str(upix[i])+' '+str(nmatch)) else: rootLogger.info( str(i + 1) + ' ' + dbfile + ' NOT FOUND. Checking for high-resolution database files.') # Check if there are high-resolution healpix idstr databases hidbfiles = glob(cmbdir + 'combine/' + str(int(upix[i]) // 1000) + '/' + str(upix[i]) + '_n*_*_idstr.db') nhidbfiles = len(hidbfiles) if os.path.exists(fitsfile) & (nhidbfiles > 0): rootLogger.info('Found high-resolution HEALPix IDSTR files') for j in range(nhidbfiles): dbfile1 = hidbfiles[j] dbbase1 = os.path.basename(dbfile1) idstr1 = readidstrdb(dbfile1, where="exposure=='" + base + "'") nidstr1 = len(idstr1) if nidstr1 > 0: idstr['measid'][cnt:cnt + nidstr1] = idstr1['measid'] idstr['objectid'][cnt:cnt + nidstr1] = idstr1['objectid'] idstr['pix'][cnt:cnt + nidstr1] = upix[i] cnt += nidstr1 rootLogger.info(' ' + str(j + 1) + ' ' + dbbase1 + ' ' + str(upix[i]) + ' ' + str(nidstr1)) #idstr_measid = np.char.array(idstr['measid']).strip() #idstr_objectid = np.char.array(idstr['objectid']).strip() #ind1,ind2 = dln.match(idstr_measid,measid) #nmatch = len(ind1) #if nmatch>0: # meas['OBJECTID'][ind2] = idstr_objectid[ind1] # ntotmatch += nmatch #rootLogger.info(' '+str(j+1)+' '+dbbase1+' '+str(upix[i])+' '+str(nmatch)) # Trim any leftover elements of IDSTR if cnt < nmeas: idstr = idstr[0:cnt] # Now match them all up rootLogger.info('Matching the measurements') idstr_measid = np.char.array(idstr['measid']).strip() idstr_objectid = np.char.array(idstr['objectid']).strip() ind1, ind2 = dln.match(idstr_measid, measid) nmatch = len(ind1) if nmatch > 0: meas['OBJECTID'][ind2] = idstr_objectid[ind1] # Only keep sources with an objectid ind, nind = dln.where( np.char.array(meas['OBJECTID']).strip().decode() == '') # There can be missing/orphaned measurements at healpix boundaries in crowded # regions when the DBSCAN eps is different. But there should be very few of these. # At this point, let's allow this to pass if nind > 0: rootLogger.info('WARNING: ' + str(nind) + ' measurements are missing OBJECTIDs') if ((nmeas >= 20000) & (nind > 20)) | ((nmeas < 20000) & (nind > 3)): rootLogger.info('More missing OBJECTIDs than currently allowed.') raise ValueError('More missing OBJECTIDs than currently allowed.') # Output the updated catalogs #rootLogger.info('Updating measurement catalogs') #for i in range(nchips): # measfile1 = chstr['MEASFILE'][i].strip() # lo = chstr['MEAS_INDEX'][i] # hi = lo+chstr['NMEAS'][i] # meas1 = meas[lo:hi] # meta1 = Table.read(measfile1,2) # load the meta extensions # # 'KLUDGE!!! Changing /dl1 filenames to /dl2 filenames') # cols = ['EXPDIR','FILENAME','MEASFILE'] # for c in cols: # f = np.char.array(meta1[c]).decode() # f = np.char.array(f).replace('/dl1/users/dnidever/','/dl2/dnidever/') # meta1[c] = f # # Copy as a backup # if os.path.exists(measfile1+'.bak'): os.remove(measfile1+'.bak') # dum = shutil.move(measfile1,measfile1+'.bak') # # Write new catalog # #meas1.write(measfile1,overwrite=True) # first, measurement table # # append other fits binary tabl # #hdulist = fits.open(measfile1) # rootLogger.info('Writing '+measfile1) # hdulist = fits.HDUList() # hdulist.append(fits.table_to_hdu(meas1)) # first, meas catalog # hdulist.append(fits.table_to_hdu(meta1)) # second, meta # hdulist.writeto(measfile1,overwrite=True) # hdulist.close() # # Create a file saying that the file was successfully updated. # dln.writelines(measfile1+'.updated','') # # Delete backups # if os.path.exists(measfile1+'.bak'): os.remove(measfile1+'.bak') measfile = expdir + '/' + base + '_meas.fits' meas.write(measfile, overwrite=True) if os.path.exists(measfile + '.gz'): os.remove(measfile + '.gz') ret = subprocess.call(['gzip', measfile]) # compress final catalog # Update the meta file as well, need to the /dl2 filenames rootLogger.info('Updating meta file') meta.write(metafile, overwrite=True) hdulist = fits.open(metafile) hdu = fits.table_to_hdu(chstr) hdulist.append(hdu) hdulist.writeto(metafile, overwrite=True) hdulist.close() # Create a file saying that the files were updated okay. dln.writelines(expdir + '/' + base + '_meas.updated', '') rootLogger.info('dt = ' + str(time.time() - t0) + ' sec.')
def fix_pms(pix): """ Correct the proper motions in the healpix object catalog.""" t00 = time.time() hostname = socket.gethostname() host = hostname.split('.')[0] version = 'v3' nside = 128 radeg = np.float64(180.00) / np.pi hdir = '/net/dl2/dnidever/nsc/instcal/'+version+'/combine/'+str(int(pix)//1000)+'/' objfile = hdir+str(pix)+'.fits.gz' outfile = hdir+str(pix)+'_pmcorr.fits' print('Correcting proper motions for '+str(pix)) # Check that the object file exists if os.path.exists(objfile) is False: print(objfile+' NOT FOUND') return # Check fixed file if os.path.exists(outfile+'.gz') == True: print(str(pix)+' already fixed') return # Load the object file #meta = fits.getdata(objfile,1) #obj = fits.getdata(objfile,2) meta = Table.read(objfile,1) obj = Table.read(objfile,2) nobj = len(obj) print(str(nobj)+' objects with '+str(np.sum(obj['ndet']))+' measurements') #print('KLUDGE!!! MAKING COPY OF OBJ!!!') #orig = obj.copy() #v = psutil.virtual_memory() #process = psutil.Process(os.getpid()) #print('%6.1f Percent of memory used. %6.1f GB available. Process is using %6.2f GB of memory.' % (v.percent,v.available/1e9,process.memory_info()[0]/1e9)) # Break up into subregions totmeas = np.sum(obj['ndet']) nsub,bestind = dln.closest([1,4,16,64],int(np.ceil(totmeas/500000))) hinside = [128,256,512,1024][bestind] vecbound = hp.boundaries(nside,int(pix)) allpix = hp.query_polygon(hinside,np.transpose(vecbound)) allra,alldec = hp.pix2ang(hinside,allpix,lonlat=True) print(str(nsub)+' sub regions') # Get the objects within this subpixel objpix = hp.ang2pix(hinside,obj['ra'],obj['dec'],lonlat=True) ndet = np.zeros(nobj,int) #allpmra_old = np.zeros(nobj,float) #allpmdec_old = np.zeros(nobj,float) #allpmra_linefit = np.zeros(nobj,float) # Loop over subpixels for i in range(nsub): pix1 = allpix[i] print(str(i+1)+' '+str(pix1)) # Get the measurements meas = get_meas(pix1,nside=hinside) nmeas = len(meas) if nmeas==0: print('No measurements in this subregion') continue #v = psutil.virtual_memory() #process = psutil.Process(os.getpid()) #print('%6.1f Percent of memory used. %6.1f GB available. Process is using %6.2f GB of memory.' % (v.percent,v.available/1e9,process.memory_info()[0]/1e9)) # Get the objects within this subpixel objind, = np.where(objpix==pix1) obj1 = obj[objind] nobj1 = len(obj1) print(' '+str(nobj1)+' objects in this subregion') idindex = dln.create_index(meas['objectid']) ## Not all matched #if len(idindex['value']) != nobj: # print('Number of unique OBJECTIDs in object and meas catalogs do not match') # return ind1,ind2 = dln.match(obj1['objectid'],idindex['value']) # Not all matched if len(ind1) != nobj1: print(str(len(obj1))+' objects in this sub healpix but only measurements for '+str(len(ind1))) #print('Some objects are missing measurements') #return # Ensure they are arrays ind1 = np.atleast_1d(ind1) ind2 = np.atleast_1d(ind2) # sort by object index si = np.argsort(ind1) if len(ind1)>1: ind1 = ind1[si] ind2 = ind2[si] # Loop over ndet1 = np.zeros(nobj1,int) #allpmra_old1 = np.zeros(nobj1,float) #allpmdec_old1 = np.zeros(nobj1,float) #allpmra_linefit1 = np.zeros(nobj1,float) for j in range(len(ind1)): if (j % 1000)==0: print(' '+str(j)) k = ind1[j] # object index # Calculate the proper motions mind = idindex['index'][idindex['lo'][ind2[j]]:idindex['hi'][ind2[j]]+1] cat1 = meas[mind] ncat1 = len(cat1) ndet1[k] = ncat1 if ncat1>1: raerr = np.array(cat1['raerr']*1e3,np.float64) # milli arcsec ra = np.array(cat1['ra'],np.float64) ra -= np.mean(ra) ra *= 3600*1e3 * np.cos(obj1['dec'][k]/radeg) # convert to true angle, milli arcsec t = cat1['mjd'].copy() t -= np.mean(t) t /= 365.2425 # convert to year # Calculate robust slope try: pmra, pmraerr = dln.robust_slope(t,ra,raerr,reweight=True) #pmra_old, pmraerr_old = dln.robust_slope_old(t,ra,raerr,reweight=True) #pmra_linefit = dln.poly_fit(t,ra,2,robust=True,sigma=raerr,initpar=pmra) except: print('problem') import pdb; pdb.set_trace() obj1['pmra'][k] = pmra # mas/yr obj1['pmraerr'][k] = pmraerr # mas/yr #allpmra_old1[k] = pmra_old #allpmra_linefit1[k] = pmra_linefit decerr = np.array(cat1['decerr']*1e3,np.float64) # milli arcsec dec = np.array(cat1['dec'],np.float64) dec -= np.mean(dec) dec *= 3600*1e3 # convert to milli arcsec # Calculate robust slope try: pmdec, pmdecerr = dln.robust_slope(t,dec,decerr,reweight=True) #pmdec_old, pmdecerr_old = dln.robust_slope_old(t,dec,decerr,reweight=True) except: print('problem') import pdb; pdb.set_trace() obj1['pmdec'][k] = pmdec # mas/yr obj1['pmdecerr'][k] = pmdecerr # mas/yr #allpmdec_old1[k] = pmdec_old # Stuff subregion object back into big one obj[objind] = obj1 ndet[objind] = ndet1 #allpmra_old[objind] = allpmra_old1 #allpmdec_old[objind] = allpmdec_old1 #allpmra_linefit[objind] = allpmra_linefit1 #import pdb; pdb.set_trace() #np.save(hdir+str(pix)+'_pmraold.npy',allpmra_old) #np.save(hdir+str(pix)+'_pmdecold.npy',allpmdec_old) #np.save(hdir+str(pix)+'_pmralinefit.npy',allpmra_linefit) #import pdb; pdb.set_trace() # Save the new version of obj # Write the output file print('Writing combined catalog to '+outfile) if os.path.exists(outfile): os.remove(outfile) #Table(meta).write(outfile) # first, summary table meta.write(outfile) # first, summary table # append other fits binary tables hdulist = fits.open(outfile) #hdu = fits.table_to_hdu(Table(obj)) # second, catalog hdu = fits.table_to_hdu(obj) # second, catalog hdulist.append(hdu) hdulist.writeto(outfile,overwrite=True) hdulist.close() if os.path.exists(outfile+'.gz'): os.remove(outfile+'.gz') ret = subprocess.call(['gzip',outfile]) # compress final catalog print('dt = %6.1f sec.' % (time.time()-t00))