def test_bundle_bricks(self): """ Test the bundle_bricks scripting code executes without bugs """ blat = geomask.bundle_bricks(1, 1, 1, surveydirs=[self.surveydir]) self.assertTrue(blat is None) foo = geomask.bundle_bricks( 1, 1, 1, surveydirs=[self.surveydir, self.surveydir2]) self.assertTrue(foo is None)
def select_randoms(drdir, density=100000, numproc=32, nside=4, pixlist=None, bundlebricks=None, brickspersec=2.5, dustdir=None): """NOBS, GALDEPTH, PSFDEPTH (per-band) for random points in a DR of the Legacy Surveys Parameters ---------- drdir : :class:`str` The root directory pointing to a Data Release from the Legacy Surveys e.g. /global/project/projectdirs/cosmo/data/legacysurvey/dr7. density : :class:`int`, optional, defaults to 100,000 The number of random points to return per sq. deg. As a typical brick is ~0.25 x 0.25 sq. deg. about (0.0625*density) points will be returned numproc : :class:`int`, optional, defaults to 32 The number of processes over which to parallelize nside : :class:`int`, optional, defaults to nside=4 (214.86 sq. deg.) The (NESTED) HEALPixel nside to be used with the `pixlist` and `bundlebricks` input. pixlist : :class:`list` or `int`, optional, defaults to None Bricks will only be processed if the CENTER of the brick lies within the bounds of pixels that are in this list of integers, at the supplied HEALPixel `nside`. Uses the HEALPix NESTED scheme. Useful for parallelizing. If pixlist is None then all bricks in the passed `survey` will be processed. bundlebricks : :class:`int`, defaults to None If not None, then instead of selecting the skies, print, to screen, the slurm script that will approximately balance the brick distribution at `bundlebricks` bricks per node. So, for instance, if bundlebricks is 14000 (which as of the latest git push works well to fit on the interactive nodes on Cori and run in about an hour), then commands would be returned with the correct pixlist values to pass to the code to pack at about 14000 bricks per node across all of the bricks in `survey`. brickspersec : :class:`float`, optional, defaults to 2.5 The rough number of bricks processed per second by the code (parallelized across a chosen number of nodes). Used in conjunction with `bundlebricks` for the code to estimate time to completion when parallelizing across pixels. dustdir : :class:`str`, optional, defaults to $DUST_DIR+'maps' The root directory pointing to SFD dust maps. If not sent the code will try to use $DUST_DIR+'maps') before failing. Returns ------- :class:`~numpy.ndarray` a numpy structured array with the following columns: RA: Right Ascension of a random point DEC: Declination of a random point BRICKNAME: Passed brick name NOBS_G: Number of observations at this location in the g-band NOBS_R: Number of observations at this location in the r-band NOBS_Z: Number of observations at this location in the z-band PSFDEPTH_G: PSF depth at this location in the g-band PSFDEPTH_R: PSF depth at this location in the r-band PSFDEPTH_Z: PSF depth at this location in the z-band GALDEPTH_G: Galaxy depth at this location in the g-band GALDEPTH_R: Galaxy depth at this location in the r-band GALDEPTH_Z: Galaxy depth at this location in the z-band MASKBITS: Extra mask bits info as stored in the header of e.g., dr7dir + 'coadd/111/1116p210/legacysurvey-1116p210-maskbits.fits.gz' EBV: E(B-V) at this location from the SFD dust maps """ # ADM read in the survey bricks file, which lists the bricks of interest for this DR. # ADM if this is pre-or-post-DR8 we need to find the correct directory or directories. drdirs = _pre_or_post_dr8(drdir) bricknames = [] brickinfo = [] for dd in drdirs: sbfile = glob(dd + '/*bricks-dr*') if len(sbfile) > 0: sbfile = sbfile[0] hdu = fits.open(sbfile) brickinfo.append(hdu[1].data) bricknames.append(hdu[1].data['BRICKNAME']) else: # ADM this is a hack for test bricks where we didn't always generate the # ADM bricks file. It's probably safe to remove it at some point. from desitarget.io import brickname_from_filename fns = glob(os.path.join(dd, 'tractor', '*', '*fits')) bricknames.append([brickname_from_filename(fn) for fn in fns]) brickinfo.append([]) if pixlist is not None or bundlebricks is not None: msg = 'DR-specific bricks file not found' msg += 'and pixlist or bundlebricks passed!!!' log.critical(msg) raise ValueError(msg) bricknames = np.concatenate(bricknames) brickinfo = np.concatenate(brickinfo) # ADM if the pixlist or bundlebricks option was sent, we'll need the HEALPixel # ADM information for each brick. if pixlist is not None or bundlebricks is not None: theta, phi = np.radians(90 - brickinfo["dec"]), np.radians( brickinfo["ra"]) pixnum = hp.ang2pix(nside, theta, phi, nest=True) # ADM if the bundlebricks option was sent, call the packing code. if bundlebricks is not None: bundle_bricks(pixnum, bundlebricks, nside, brickspersec=brickspersec, prefix='randoms', surveydir=drdir) return # ADM restrict to only bricks in a set of HEALPixels, if requested. if pixlist is not None: # ADM if an integer was passed, turn it into a list. if isinstance(pixlist, int): pixlist = [pixlist] wbricks = np.where([pix in pixlist for pix in pixnum])[0] bricknames = bricknames[wbricks] if len(wbricks) == 0: log.warning('ZERO bricks in passed pixel list!!!') log.info( "Processing bricks in (nside={}, pixel numbers={}) HEALPixels". format(nside, pixlist)) nbricks = len(bricknames) log.info( 'Processing {} bricks from DR at {} at density {:.1e} per sq. deg...t = {:.1f}s' .format(nbricks, drdir, density, time() - start)) # ADM a little more information if we're slurming across nodes. if os.getenv('SLURMD_NODENAME') is not None: log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME'))) # ADM initialize the bricks class, and retrieve the brick information look-up table # ADM so it can be used in a common fashion. from desiutil import brick bricktable = brick.Bricks(bricksize=0.25).to_table() # ADM the critical function to run on every brick. def _get_quantities(brickname): '''wrapper on nobs_positions_in_a_brick_from_edges() given a brick name''' # ADM retrieve the edges for the brick that we're working on wbrick = np.where(bricktable["BRICKNAME"] == brickname)[0] ramin, ramax, decmin, decmax = np.array(bricktable[wbrick]["RA1", "RA2", "DEC1", "DEC2"])[0] # ADM populate the brick with random points, and retrieve the quantities # ADM of interest at those points. return get_quantities_in_a_brick(ramin, ramax, decmin, decmax, brickname, drdir, density=density, dustdir=dustdir) # ADM this is just to count bricks in _update_status nbrick = np.zeros((), dtype='i8') t0 = time() def _update_status(result): ''' wrapper function for the critical reduction operation, that occurs on the main parallel process ''' if nbrick % 50 == 0 and nbrick > 0: rate = nbrick / (time() - t0) log.info('{}/{} bricks; {:.1f} bricks/sec'.format( nbrick, nbricks, rate)) # ADM if we're going to exceed 4 hours, warn the user if nbricks / rate > 4 * 3600.: log.error( "May take > 4 hours to run. Try running with bundlebricks instead." ) nbrick[...] += 1 # this is an in-place modification return result # - Parallel process input files if numproc > 1: pool = sharedmem.MapReduce(np=numproc) with pool: qinfo = pool.map(_get_quantities, bricknames, reduce=_update_status) else: qinfo = list() for brickname in bricknames: qinfo.append(_update_status(_get_quantities(brickname))) # ADM concatenate the randoms into a single long list and resolve whether # ADM they are officially in the north or the south. qinfo = np.concatenate(qinfo) qinfo = resolve(qinfo) # ADM one last shuffle to randomize across brick boundaries. np.random.seed(616) np.random.shuffle(qinfo) return qinfo
def select_gfas(infiles, maglim=18, numproc=4, nside=None, pixlist=None, bundlefiles=None, extra=None, mindec=-30, mingalb=10, addurat=True): """Create a set of GFA locations using Gaia and matching to sweeps. Parameters ---------- infiles : :class:`list` or `str` A list of input filenames (sweep files) OR a single filename. maglim : :class:`float`, optional, defaults to 18 Magnitude limit for GFAs in Gaia G-band. numproc : :class:`int`, optional, defaults to 4 The number of parallel processes to use. nside : :class:`int`, optional, defaults to `None` (NESTED) HEALPix `nside` to use with `pixlist` and `bundlefiles`. pixlist : :class:`list` or `int`, optional, defaults to `None` Only return targets in a set of (NESTED) HEALpixels at the supplied `nside`. Useful for parallelizing. bundlefiles : :class:`int`, defaults to `None` If not `None`, then, instead of selecting gfas, print the slurm script to run in pixels at `nside`. Is an integer rather than a boolean for historical reasons. extra : :class:`str`, optional Extra command line flags to be passed to the executable lines in the output slurm script. Used in conjunction with `bundlefiles`. mindec : :class:`float`, optional, defaults to -30 Minimum declination (o) for output sources that do NOT match an object in the passed `infiles`. mingalb : :class:`float`, optional, defaults to 10 Closest latitude to Galactic plane for output sources that do NOT match an object in the passed `infiles` (e.g. send 10 to limit to regions beyond -10o <= b < 10o)". addurat : :class:`bool`, optional, defaults to ``True`` If ``True`` then substitute proper motions from the URAT catalog where Gaia is missing proper motions. Requires that the :envvar:`URAT_DIR` is set and points to data downloaded and formatted by, e.g., :func:`~desitarget.uratmatch.make_urat_files`. Returns ------- :class:`~numpy.ndarray` GFA objects from Gaia with the passed geometric constraints limited to the passed maglim and matched to the passed input files, formatted according to `desitarget.gfa.gfadatamodel`. Notes ----- - If numproc==1, use the serial code instead of parallel code. - If numproc > 4, then numproc=4 is enforced for (just those) parts of the code that are I/O limited. """ # ADM the code can have memory issues for nside=2 with large numproc. if nside is not None and nside < 4 and numproc > 8: msg = 'Memory may be an issue near Plane for nside < 4 and numproc > 8' log.warning(msg) # ADM force to no more than numproc=4 for I/O limited processes. numproc4 = numproc if numproc4 > 4: log.info('Forcing numproc to 4 for I/O limited parts of code') numproc4 = 4 # ADM convert a single file, if passed to a list of files. if isinstance(infiles, str): infiles = [ infiles, ] # ADM check that files exist before proceeding. for filename in infiles: if not os.path.exists(filename): msg = "{} doesn't exist".format(filename) log.critical(msg) raise ValueError(msg) # ADM if the pixlist option was sent, we'll need to # ADM know which HEALPixels touch each file. if pixlist is not None: filesperpixel, _, _ = sweep_files_touch_hp(nside, pixlist, infiles) # ADM if the bundlefiles option was sent, call the packing code. if bundlefiles is not None: # ADM were files from one or two input directories passed? surveydirs = list(set([os.path.dirname(fn) for fn in infiles])) bundle_bricks([0], bundlefiles, nside, gather=False, prefix='gfas', surveydirs=surveydirs, extra=extra) return # ADM restrict to input files in a set of HEALPixels, if requested. if pixlist is not None: infiles = list(set(np.hstack([filesperpixel[pix] for pix in pixlist]))) if len(infiles) == 0: log.info('ZERO sweep files in passed pixel list!!!') log.info("Processing files in (nside={}, pixel numbers={}) HEALPixels". format(nside, pixlist)) nfiles = len(infiles) # ADM a little more information if we're slurming across nodes. if os.getenv('SLURMD_NODENAME') is not None: log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME'))) # ADM the critical function to run on every file. def _get_gfas(fn): '''wrapper on gaia_gfas_from_sweep() given a file name''' return gaia_gfas_from_sweep(fn, maglim=maglim) # ADM this is just to count sweeps files in _update_status. t0 = time() nfile = np.zeros((), dtype='i8') def _update_status(result): """wrapper function for the critical reduction operation, that occurs on the main parallel process""" if nfile % 20 == 0 and nfile > 0: elapsed = (time() - t0) / 60. rate = nfile / elapsed / 60. log.info('{}/{} files; {:.1f} files/sec...t = {:.1f} mins'.format( nfile, nfiles, rate, elapsed)) nfile[...] += 1 # this is an in-place modification. return result # - Parallel process input files. if len(infiles) > 0: if numproc4 > 1: pool = sharedmem.MapReduce(np=numproc4) with pool: gfas = pool.map(_get_gfas, infiles, reduce=_update_status) else: gfas = list() for file in infiles: gfas.append(_update_status(_get_gfas(file))) gfas = np.concatenate(gfas) # ADM resolve any duplicates between imaging data releases. gfas = resolve(gfas) # ADM retrieve Gaia objects in the DESI footprint or passed tiles. log.info('Retrieving additional Gaia objects...t = {:.1f} mins'.format( (time() - t0) / 60)) gaia = all_gaia_in_tiles(maglim=maglim, numproc=numproc4, allsky=True, mindec=mindec, mingalb=mingalb, nside=nside, pixlist=pixlist) # ADM remove any duplicates. Order is important here, as np.unique # ADM keeps the first occurence, and we want to retain sweeps # ADM information as much as possible. if len(infiles) > 0: gfas = np.concatenate([gfas, gaia]) _, ind = np.unique(gfas["REF_ID"], return_index=True) gfas = gfas[ind] else: gfas = gaia # ADM for zero/NaN proper motion objects, add URAT proper motions. if addurat: ii = ((np.isnan(gfas["PMRA"]) | (gfas["PMRA"] == 0)) & (np.isnan(gfas["PMDEC"]) | (gfas["PMDEC"] == 0))) log.info( 'Adding URAT for {} objects with no PMs...t = {:.1f} mins'.format( np.sum(ii), (time() - t0) / 60)) urat = add_urat_pms(gfas[ii], numproc=numproc) log.info( 'Found an additional {} URAT objects...t = {:.1f} mins'.format( np.sum(urat["URAT_ID"] != -1), (time() - t0) / 60)) for col in "PMRA", "PMDEC", "URAT_ID", "URAT_SEP": gfas[col][ii] = urat[col] # ADM restrict to only GFAs in a set of HEALPixels, if requested. if pixlist is not None: ii = is_in_hp(gfas, nside, pixlist) gfas = gfas[ii] return gfas
def select_skies(survey, numproc=16, nskiespersqdeg=None, bands=['g', 'r', 'z'], apertures_arcsec=[0.75], nside=2, pixlist=None, writebricks=False, bundlebricks=None, brickspersec=1.6): """Generate skies in parallel for all bricks in a Legacy Surveys Data Release. Parameters ---------- survey : :class:`object` `LegacySurveyData` object for a given Data Release of the Legacy Surveys; see :func:`~desitarget.skyutilities.legacypipe.util.LegacySurveyData` for details. numproc : :class:`int`, optional, defaults to 16 The number of processes over which to parallelize. nskiespersqdeg : :class:`float`, optional The minimum DENSITY of sky fibers to generate. Defaults to reading from :func:`~desimodel.io` with a margin of 4x. bands : :class:`list`, optional, defaults to ['g', 'r', 'z'] List of bands to be used to define good sky locations. apertures_arcsec : :class:`list`, optional, defaults to [0.75] Radii in arcsec of apertures for which to derive flux at a sky location. nside : :class:`int`, optional, defaults to nside=2 (859.4 sq. deg.) The HEALPixel nside number to be used with the `pixlist` input. pixlist : :class:`list` or `int`, optional, defaults to None Bricks will only be processed if the CENTER of the brick lies within the bounds of pixels that are in this list of integers, at the supplied HEALPixel `nside`. Uses the HEALPix NESTED scheme. Useful for parallelizing. If pixlist is ``None`` then all bricks in the passed `survey` will be processed. writebricks : :class:`boolean`, defaults to False If `True`, write the skyfibers object for EACH brick (in the format of the output from :func:`sky_fibers_for_brick()`) to file. The file name is derived from the input `survey` object and is in the form: `%(survey.survey_dir)/metrics/%(brick).3s/skies-%(brick)s.fits.gz` which is returned by `survey.find_file('skies')`. bundlebricks : :class:`int`, defaults to None If not None, then instead of selecting the skies, print, to screen, the slurm script that will approximately balance the brick distribution at `bundlebricks` bricks per node. So, for instance, if bundlebricks is 14000 (which as of the latest git push works well to fit on the interactive nodes on Cori), then commands would be returned with the correct pixlist values to pass to the code to pack at about 14000 bricks per node across all of the bricks in `survey`. brickspersec : :class:`float`, optional, defaults to 1.6 The rough number of bricks processed per second by the code (parallelized across a chosen number of nodes). Used in conjunction with `bundlebricks` for the code to estimate time to completion when parallelizing across pixels. Returns ------- :class:`~numpy.ndarray` a structured array of sky positions in the DESI sky target format for all bricks in a Legacy Surveys Data Release. Notes ----- - Some core code in this module was initially written by Dustin Lang (@dstndstn). - Returns nothing if bundlebricks is passed (and is not ``None``). """ # ADM these comments were for debugging photutils/astropy dependencies # ADM and they can be removed at any time # import astropy # print(astropy.version) # print(astropy.version.version) # print(photutils.version) # print(photutils.version.version) # ADM read in the survey bricks file, which lists the bricks of interest for this DR from glob import glob sbfile = glob(survey.survey_dir+'/*bricks-dr*')[0] brickinfo = fitsio.read(sbfile) # ADM remember that fitsio reads things in as bytes, so convert to unicode bricknames = brickinfo['brickname'].astype('U') # ADM if the pixlist or bundlebricks option was sent, we'll need the HEALPpixel # ADM information for each brick if pixlist is not None or bundlebricks is not None: theta, phi = np.radians(90-brickinfo["dec"]), np.radians(brickinfo["ra"]) pixnum = hp.ang2pix(nside, theta, phi, nest=True) # ADM if the bundlebricks option was sent, call the packing code if bundlebricks is not None: bundle_bricks(pixnum, bundlebricks, nside, prefix='skies', surveydir=survey.survey_dir, brickspersec=brickspersec) return # ADM restrict to only bricks in a set of HEALPixels, if requested if pixlist is not None: # ADM if an integer was passed, turn it into a list if isinstance(pixlist, int): pixlist = [pixlist] wbricks = np.where([pix in pixlist for pix in pixnum])[0] bricknames = bricknames[wbricks] if len(wbricks) == 0: log.warning('ZERO bricks in passed pixel list!!!') log.info("Processing bricks in (nside={}, pixel numbers={}) HEALPixels" .format(nside, pixlist)) nbricks = len(bricknames) log.info('Processing {} bricks that have observations from DR at {}...t = {:.1f}s' .format(nbricks, survey.survey_dir, time()-start)) # ADM a little more information if we're slurming across nodes if os.getenv('SLURMD_NODENAME') is not None: log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME'))) # ADM the critical function to run on every brick def _get_skies(brickname): '''wrapper on make_skies_for_a_brick() given a brick name''' return make_skies_for_a_brick(survey, brickname, nskiespersqdeg=nskiespersqdeg, bands=bands, apertures_arcsec=apertures_arcsec, write=writebricks) # ADM this is just in order to count bricks in _update_status nbrick = np.zeros((), dtype='i8') t0 = time() def _update_status(result): """wrapper function for the critical reduction operation, that occurs on the main parallel process""" if nbrick % 500 == 0 and nbrick > 0: rate = nbrick / (time() - t0) log.info('{}/{} bricks; {:.1f} bricks/sec'.format(nbrick, nbricks, rate)) nbrick[...] += 1 # this is an in-place modification return result # - Parallel process input files if numproc > 1: pool = sharedmem.MapReduce(np=numproc) with pool: skies = pool.map(_get_skies, bricknames, reduce=_update_status) else: skies = list() for brickname in bricknames: skies.append(_update_status(_get_skies(brickname))) # ADM Concatenate the parallelized results into one rec array of sky information skies = np.concatenate(skies) log.info('Done...t={:.1f}s'.format(time()-start)) return skies