Python MapReduce Examples

Programming Language: Python

Namespace/Package Name: desitarget.internal.sharedmem

Method/Function: MapReduce

Examples at hotexamples.com: 22

Python MapReduce - 22 examples found. These are the top rated real world Python examples of desitarget.internal.sharedmem.MapReduce extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: uratmatch.py Project: sdss/lvmtarget

def urat_csv_to_fits(numproc=5):
    """Convert files in $URAT_DIR/csv to files in $URAT_DIR/fits.

    Parameters
    ----------
    numproc : :class:`int`, optional, defaults to 5
        The number of parallel processes to use.

    Returns
    -------
    Nothing
        But the archived URAT CSV files in $URAT_DIR/csv are converted
        to FITS files in the directory $URAT_DIR/fits. Also, a look-up
        table is written to $URAT_DIR/fits/hpx-to-files.pickle for which
        each index is an nside=_get_urat_nside(), nested scheme HEALPixel
        and each entry is a list of the FITS files that touch that HEAPixel.

    Notes
    -----
        - The environment variable $URAT_DIR must be set.
        - if numproc==1, use the serial code instead of the parallel code.
        - Runs in about 10 minutes with numproc=25 for 575 files.
    """
    # ADM the resolution at which the URAT HEALPix files should be stored.
    nside = _get_urat_nside()

    # ADM check that the URAT_DIR is set.
    uratdir = _get_urat_dir()
    log.info("running on {} processors".format(numproc))

    # ADM construct the directories for reading/writing files.
    csvdir = os.path.join(uratdir, 'csv')
    fitsdir = os.path.join(uratdir, 'fits')

    # ADM make sure the output directory is empty.
    if os.path.exists(fitsdir):
        if len(os.listdir(fitsdir)) > 0:
            msg = "{} should be empty to make URAT FITS files!".format(fitsdir)
            log.critical(msg)
            raise ValueError(msg)
    # ADM make the output directory, if needed.
    else:
        log.info('Making URAT directory for storing FITS files')
        os.makedirs(fitsdir)

    # ADM construct the list of input files.
    infiles = glob("{}/*csv*".format(csvdir))
    nfiles = len(infiles)

    # ADM the critical function to run on every file.
    def _write_urat_fits(infile):
        """read an input name for a csv file and write it to FITS"""
        outbase = os.path.basename(infile)
        outfilename = "{}.fits".format(outbase.split(".")[0])
        outfile = os.path.join(fitsdir, outfilename)
        # ADM astropy understands without specifying format='csv'.
        fitstable = ascii.read(infile)

        # ADM map the ascii-read csv to typical DESI quantities.
        nobjs = len(fitstable)
        done = np.zeros(nobjs, dtype=uratdatamodel.dtype)
        # ADM have to do this one-by-one, given the format.
        done["RA"] = fitstable['col1'] / 1000. / 3600.
        done["DEC"] = fitstable['col2'] / 1000. / 3600. - 90.
        done["PMRA"] = fitstable['col16'] / 10.
        done["PMDEC"] = fitstable['col17'] / 10.
        done["PM_ERROR"] = fitstable['col18'] / 10.
        done["APASS_G_MAG"] = fitstable['col36'] / 1000.
        done["APASS_R_MAG"] = fitstable['col37'] / 1000.
        done["APASS_I_MAG"] = fitstable['col38'] / 1000.
        done["APASS_G_MAG_ERROR"] = fitstable['col41'] / 1000.
        done["APASS_R_MAG_ERROR"] = fitstable['col42'] / 1000.
        done["APASS_I_MAG_ERROR"] = fitstable['col43'] / 1000.
        done["URAT_ID"] = fitstable['col46']

        fitsio.write(outfile, done, extname='URATFITS')

        # ADM return the HEALPixels that this file touches.
        pix = set(radec2pix(nside, done["RA"], done["DEC"]))
        return [pix, os.path.basename(outfile)]

    # ADM this is just to count processed files in _update_status.
    nfile = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 25 == 0 and nfile > 0:
            rate = nfile / (time() - t0)
            elapsed = time() - t0
            log.info(
                '{}/{} files; {:.1f} files/sec; {:.1f} total mins elapsed'.
                format(nfile, nfiles, rate, elapsed / 60.))
        nfile[...] += 1  # this is an in-place modification
        return result

    # - Parallel process input files...
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            pixinfile = pool.map(_write_urat_fits,
                                 infiles,
                                 reduce=_update_status)
    # ADM ...or run in serial.
    else:
        pixinfile = list()
        for file in infiles:
            pixinfile.append(_update_status(_write_urat_fits(file)))

    # ADM create a list for which each index is a HEALPixel and each
    # ADM entry is a list of files that touch that HEALPixel.
    npix = hp.nside2npix(nside)
    pixlist = [[] for i in range(npix)]
    for pixels, file in pixinfile:
        for pix in pixels:
            pixlist[pix].append(file)

    # ADM write out the HEALPixel->files look-up table.
    outfilename = os.path.join(fitsdir, "hpx-to-files.pickle")
    outfile = open(outfilename, "wb")
    pickle.dump(pixlist, outfile)
    outfile.close()

    log.info('Done...t={:.1f}s'.format(time() - t0))

    return

Example #2

Show file

def supplement_skies(nskiespersqdeg=None,
                     numproc=16,
                     gaiadir=None,
                     mindec=-30.,
                     mingalb=10.,
                     radius=2.,
                     minobjid=0):
    """Generate supplemental sky locations using Gaia-G-band avoidance.

    Parameters
    ----------
    nskiespersqdeg : :class:`float`, optional
        The minimum DENSITY of sky fibers to generate. Defaults to
        reading from :func:`~desimodel.io` with a margin of 4x.
    numproc : :class:`int`, optional, defaults to 16
        The number of processes over which to parallelize.
    gaiadir : :class:`str`, optional, defaults to $GAIA_DIR
        The GAIA_DIR environment variable is set to this directory.
        If None is passed, then it's assumed to already exist.
    mindec : :class:`float`, optional, defaults to -30
        Minimum declination (o) to include for output sky locations.
    mingalb : :class:`float`, optional, defaults to 10
        Closest latitude to Galactic plane for output sky locations
        (e.g. send 10 to limit to areas beyond -10o <= b < 10o).
    radius : :class:`float`, optional, defaults to 2
        Radius at which to avoid (all) Gaia sources (arcseconds).
    minobjid : :class:`int`, optional, defaults to 0
        The minimum OBJID to start counting from in a brick. Used
        to make sure supplemental skies have different OBJIDs from
        regular skies.

    Returns
    -------
    :class:`~numpy.ndarray`
        a structured array of supplemental sky positions in the DESI sky
        target format within the passed `mindec` and `mingalb` limits.

    Notes
    -----
        - The environment variable $GAIA_DIR must be set, or `gaiadir`
          must be passed.
    """
    log.info("running on {} processors".format(numproc))

    # ADM if the GAIA directory was passed, set it.
    if gaiadir is not None:
        os.environ["GAIA_DIR"] = gaiadir

    # ADM if needed, determine the density of sky fibers to generate.
    if nskiespersqdeg is None:
        nskiespersqdeg = density_of_sky_fibers(margin=4)

    # ADM determine the HEALPixel nside of the standard Gaia files.
    anyfiles = find_gaia_files([0, 0], radec=True)
    hdr = fitsio.read_header(anyfiles[0], "GAIAHPX")
    nside = hdr["HPXNSIDE"]

    # ADM create a set of random locations accounting for mindec.
    log.info("Generating supplemental sky locations at Dec > {}o...t={:.1f}s".
             format(mindec,
                    time() - start))
    from desitarget.randoms import randoms_in_a_brick_from_edges
    ras, decs = randoms_in_a_brick_from_edges(0.,
                                              360.,
                                              mindec,
                                              90.,
                                              density=nskiespersqdeg,
                                              wrap=False)

    # ADM limit randoms by mingalb.
    log.info(
        "Generated {} sky locations. Limiting to |b| > {}o...t={:.1f}s".format(
            len(ras), mingalb,
            time() - start))
    bnorth = is_in_gal_box([ras, decs], [0, 360, mingalb, 90], radec=True)
    bsouth = is_in_gal_box([ras, decs], [0, 360, -90, -mingalb], radec=True)
    ras, decs = ras[bnorth | bsouth], decs[bnorth | bsouth]

    # ADM find HEALPixels for the random points.
    log.info(
        "Cut to {} sky locations. Finding their HEALPixels...t={:.1f}s".format(
            len(ras),
            time() - start))
    theta, phi = np.radians(90 - decs), np.radians(ras)
    pixels = hp.ang2pix(nside, theta, phi, nest=True)
    upixels = np.unique(pixels)
    npixels = len(upixels)
    log.info("Running across {} HEALPixels.".format(npixels))

    # ADM parallelize across pixels. The function to run on every pixel.
    def _get_supp(pix):
        """wrapper on get_supp_skies() given a HEALPixel"""
        ii = (pixels == pix)
        return get_supp_skies(ras[ii], decs[ii], radius=radius)

    # ADM this is just to count pixels in _update_status.
    npix = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if npix % 500 == 0 and npix > 0:
            rate = npix / (time() - t0)
            log.info('{}/{} HEALPixels; {:.1f} pixels/sec'.format(
                npix, npixels, rate))
        npix[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process across the unique pixels.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            supp = pool.map(_get_supp, upixels, reduce=_update_status)
    else:
        supp = []
        for upix in upixels:
            supp.append(_update_status(_get_supp(upix)))

    # ADM Concatenate the parallelized results into one rec array.
    supp = np.concatenate(supp)

    # ADM build the OBJIDs from the number of sources per brick.
    # ADM the for loop doesn't seem the smartest way, but it is O(n).
    log.info("Begin assigning OBJIDs to bricks...t={:.1f}s".format(time() -
                                                                   start))
    brxid = supp["BRICKID"]
    # ADM start each brick counting from minobjid.
    cntr = np.zeros(np.max(brxid) + 1, dtype=int) + minobjid
    objid = []
    for ibrx in brxid:
        cntr[ibrx] += 1
        objid.append(cntr[ibrx])
    # ADM ensure the number of sky positions that were generated doesn't exceed
    # ADM the largest possible OBJID (which is unlikely).
    if np.any(cntr > 2**targetid_mask.OBJID.nbits):
        log.fatal(
            '{} sky locations requested in brick {}, but OBJID cannot exceed {}'
            .format(nskies, brickname, 2**targetid_mask.OBJID.nbits))
        raise ValueError
    supp["OBJID"] = np.array(objid)
    log.info("Assigned OBJIDs to bricks...t={:.1f}s".format(time() - start))

    # ADM add the TARGETID, DESITARGET bits etc.
    nskies = len(supp)
    desi_target = np.zeros(nskies, dtype='>i8')
    desi_target |= desi_mask.SKY
    desi_target |= desi_mask.SUPP_SKY
    dum = np.zeros_like(desi_target)
    supp = finalize(supp, desi_target, dum, dum, sky=1)

    log.info('Done...t={:.1f}s'.format(time() - start))

    return supp

Example #3

Show file

def select_skies(survey,
                 numproc=16,
                 nskiespersqdeg=None,
                 bands=['g', 'r', 'z'],
                 apertures_arcsec=[0.75],
                 nside=2,
                 pixlist=None,
                 writebricks=False):
    """Generate skies in parallel for bricks in a Legacy Surveys DR.

    Parameters
    ----------
    survey : :class:`object`
        `LegacySurveyData` object for a given Data Release of the Legacy Surveys; see
        :func:`~desitarget.skyutilities.legacypipe.util.LegacySurveyData` for details.
    numproc : :class:`int`, optional, defaults to 16
        The number of processes over which to parallelize.
    nskiespersqdeg : :class:`float`, optional
        The minimum DENSITY of sky fibers to generate. Defaults to reading from
        :func:`~desimodel.io` with a margin of 4x.
    bands : :class:`list`, optional, defaults to ['g', 'r', 'z']
        List of bands to be used to define good sky locations.
    apertures_arcsec : :class:`list`, optional, defaults to [0.75]
        Radii in arcsec of apertures for which to derive flux at a sky location.
    nside : :class:`int`, optional, defaults to nside=2 (859.4 sq. deg.)
        The HEALPixel nside number to be used with the `pixlist` input.
    pixlist : :class:`list` or `int`, optional, defaults to None
        Bricks will only be processed if the CENTER of the brick lies within the bounds of
        pixels that are in this list of integers, at the supplied HEALPixel `nside`.
        Uses the HEALPix NESTED scheme. Useful for parallelizing. If pixlist is ``None``
        then all bricks in the passed `survey` will be processed.
    writebricks : :class:`boolean`, defaults to False
        If `True`, write the skyfibers object for EACH brick (in the format of the
        output from :func:`sky_fibers_for_brick()`) to file. The file name is derived
        from the input `survey` object and is in the form:
        `%(survey.survey_dir)/metrics/%(brick).3s/skies-%(brick)s.fits.gz`
        which is returned by `survey.find_file('skies')`.

    Returns
    -------
    :class:`~numpy.ndarray`
        a structured array of sky positions in the DESI sky target format for all
        bricks in a Legacy Surveys Data Release.

    Notes
    -----
        - Some core code in this module was initially written by Dustin Lang (@dstndstn).
    """
    # ADM retrieve the bricks of interest for this DR.
    brickdict = get_brick_info([survey.survey_dir])
    bricknames = np.array(list(brickdict.keys()))

    # ADM restrict to only bricks in a set of HEALPixels, if requested.
    if pixlist is not None:
        bra, bdec, _, _, _, _ = np.vstack(brickdict.values()).T
        theta, phi = np.radians(90 - bdec), np.radians(bra)
        pixnum = hp.ang2pix(nside, theta, phi, nest=True)
        # ADM if an integer was passed, turn it into a list.
        if isinstance(pixlist, int):
            pixlist = [pixlist]
        ii = [pix in pixlist for pix in pixnum]
        bricknames = bricknames[ii]
        # ADM if there are no bricks to process, then die immediately.
        if len(bricknames) == 0:
            log.warning(
                'NO bricks found (nside={}, HEALPixels={}, DRdir={})!'.format(
                    nside, pixlist, survey.survey_dir))
            return
        log.info(
            "Processing bricks (nside={}, HEALPixels={}, DRdir={})".format(
                nside, pixlist, survey.survey_dir))
    nbricks = len(bricknames)
    log.info(
        'Processing {} bricks that have observations from DR at {}...t = {:.1f}s'
        .format(nbricks, survey.survey_dir,
                time() - start))

    # ADM a little more information if we're slurming across nodes.
    if os.getenv('SLURMD_NODENAME') is not None:
        log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME')))

    # ADM the critical function to run on every brick.
    def _get_skies(brickname):
        '''wrapper on make_skies_for_a_brick() given a brick name'''

        return make_skies_for_a_brick(survey,
                                      brickname,
                                      nskiespersqdeg=nskiespersqdeg,
                                      bands=bands,
                                      apertures_arcsec=apertures_arcsec,
                                      write=writebricks)

    # ADM this is just in order to count bricks in _update_status.
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nbrick % 500 == 0 and nbrick > 0:
            elapsed = time() - t0
            rate = nbrick / elapsed
            log.info(
                '{}/{} bricks; {:.1f} bricks/sec; {:.1f} total mins elapsed'.
                format(nbrick, nbricks, rate, elapsed / 60.))

        nbrick[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process input files.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            skies = pool.map(_get_skies, bricknames, reduce=_update_status)
    else:
        skies = list()
        for brickname in bricknames:
            skies.append(_update_status(_get_skies(brickname)))

    # ADM some missing blobs may have contaminated the array.
    skies = [sk for sk in skies if sk is not None]
    # ADM Concatenate the parallelized results into one rec array.
    skies = np.concatenate(skies)

    log.info(
        'Done with (nside={}, HEALPixels={}, DRdir={})...t={:.1f}s'.format(
            nside, pixlist, survey.survey_dir,
            time() - start))

    return skies

Example #4

Show file

def select_targets(infiles, numproc=4, verbose=False):
    """
    Process input files in parallel to select targets
    
    Args:
        infiles: list of input filenames (tractor or sweep files),
            OR a single filename
        
    Optional:
        numproc: number of parallel processes to use
        verbose: if True, print progress messages
        
    Returns:
        targets numpy structured array: the subset of input targets which
            pass the cuts, including extra columns for DESI_TARGET,
            BGS_TARGET, and MWS_TARGET target selection bitmasks. 
            
    Notes:
        if numproc==1, use serial code instead of parallel
    """
    #- Convert single file to list of files
    if isinstance(infiles, (str, unicode)):
        infiles = [
            infiles,
        ]

    #- Sanity check that files exist before going further
    for filename in infiles:
        if not os.path.exists(filename):
            raise ValueError("{} doesn't exist".format(filename))

    #- function to run on every brick/sweep file
    def _select_targets_file(filename):
        '''Returns targets in filename that pass the cuts'''
        from desitarget import io
        objects = io.read_tractor(filename, columns=columns)
        keep = apply_cuts(objects)

        return io.fix_tractor_dr1_dtype(objects[keep])

    # Counter for number of bricks processed;
    # a numpy scalar allows updating nbrick in python 2
    # c.f https://www.python.org/dev/peps/pep-3104/
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        ''' wrapper function for the critical reduction operation,
            that occurs on the main parallel process '''
        if verbose and nbrick % 50 == 0 and nbrick > 0:
            rate = nbrick / (time() - t0)
            print('{} files; {:.1f} files/sec'.format(nbrick, rate))

        nbrick[...] += 1  # this is an in-place modification
        return result

    #- Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            targets = pool.map(_select_targets_file,
                               infiles,
                               reduce=_update_status)
    else:
        targets = list()
        for x in infiles:
            targets.append(_update_status(_select_targets_file(x)))

    print(len(targets))

    #import pdb ; pdb.set_trace()
    #targets1 = np.concatenate(targets)
    return targets

Example #5

Show file

File: cuts.py Project: michaelJwilson/LBGCMB

def select_targets(infiles,
                   numproc=4,
                   verbose=False,
                   qso_selection='randomforest',
                   sandbox=False,
                   FoMthresh=None,
                   Method=None):
    """Process input files in parallel to select targets

    Args:
        infiles: list of input filenames (tractor or sweep files),
            OR a single filename
        numproc (optional): number of parallel processes to use
        verbose (optional): if True, print progress messages
        qso_selection (optional): algorithm to use for QSO selection; valid options
            are 'colorcuts' and 'randomforest'
        sandbox (optional): if True, use the sample selection cuts in
            :mod:`desitarget.sandbox.cuts`.
        FoMthresh (optional): if a value is passed then run apply_XD_globalerror for ELGs in
            the sandbox. This will write out an "FoM.fits" file for every ELG target
            in the sandbox directory.
        Method (optional): Method used in sandbox    

    Returns:
        targets numpy structured array
            the subset of input targets which pass the cuts, including extra
            columns for DESI_TARGET, BGS_TARGET, and MWS_TARGET target
            selection bitmasks.

    Notes:
        if numproc==1, use serial code instead of parallel

    """
    #- Convert single file to list of files
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]

    #- Sanity check that files exist before going further
    for filename in infiles:
        if not os.path.exists(filename):
            raise ValueError("{} doesn't exist".format(filename))

    def _finalize_targets(objects, desi_target, bgs_target, mws_target):
        #- desi_target includes BGS_ANY and MWS_ANY, so we can filter just
        #- on desi_target != 0
        keep = (desi_target != 0)
        objects = objects[keep]
        desi_target = desi_target[keep]
        bgs_target = bgs_target[keep]
        mws_target = mws_target[keep]

        #- Add *_target mask columns
        targets = desitarget.targets.finalize(objects, desi_target, bgs_target,
                                              mws_target)

        return io.fix_tractor_dr1_dtype(targets)

    #- functions to run on every brick/sweep file
    def _select_targets_file(filename):
        '''Returns targets in filename that pass the cuts'''
        objects = io.read_tractor(filename)
        desi_target, bgs_target, mws_target = apply_cuts(
            objects, qso_selection)

        return _finalize_targets(objects, desi_target, bgs_target, mws_target)

    def _select_sandbox_targets_file(filename):
        '''Returns targets in filename that pass the sandbox cuts'''
        from desitarget.sandbox.cuts import apply_sandbox_cuts
        objects = io.read_tractor(filename)
        desi_target, bgs_target, mws_target = apply_sandbox_cuts(
            objects, FoMthresh, Method)

        return _finalize_targets(objects, desi_target, bgs_target, mws_target)

    # Counter for number of bricks processed;
    # a numpy scalar allows updating nbrick in python 2
    # c.f https://www.python.org/dev/peps/pep-3104/
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        ''' wrapper function for the critical reduction operation,
            that occurs on the main parallel process '''
        if verbose and nbrick % 50 == 0 and nbrick > 0:
            rate = nbrick / (time() - t0)
            print('{} files; {:.1f} files/sec'.format(nbrick, rate))

        nbrick[...] += 1  # this is an in-place modification
        return result

    #- Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            if sandbox:
                if verbose:
                    print("You're in the sandbox...")
                targets = pool.map(_select_sandbox_targets_file,
                                   infiles,
                                   reduce=_update_status)
            else:
                targets = pool.map(_select_targets_file,
                                   infiles,
                                   reduce=_update_status)
    else:
        targets = list()
        if sandbox:
            if verbose:
                print("You're in the sandbox...")
            for x in infiles:
                targets.append(_update_status(_select_sandbox_targets_file(x)))
        else:
            for x in infiles:
                targets.append(_update_status(_select_targets_file(x)))

    targets = np.concatenate(targets)

    return targets

Example #6

Show file

def select_targets(infiles, numproc=4, cmxdir=None, noqso=False):
    """Process input files in parallel to select commissioning (cmx) targets

    Parameters
    ----------
    infiles : :class:`list` or `str`
        List of input filenames (tractor/sweep files) OR one filename.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    cmxdir : :class:`str`, optional, defaults to :envvar:`CMX_DIR`
        Directory to find commmissioning files to which to match, such
        as the CALSPEC stars. If not specified, the cmx directory is
        taken to be the value of :envvar:`CMX_DIR`.
    noqso : :class:`boolean`, optional, defaults to ``False``
        If passed, do not run the quasar selection. All QSO bits will be
        set to zero. Intended use is to speed unit tests.

    Returns
    -------
    :class:`~numpy.ndarray`
        The subset of input targets which pass the cmx cuts, including an extra
        column for `CMX_TARGET`.

    Notes
    -----
        - if numproc==1, use serial code instead of parallel.
    """
    from desiutil.log import get_logger
    log = get_logger()

    # -Convert single file to list of files.
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]

    # -Sanity check that files exist before going further.
    for filename in infiles:
        if not os.path.exists(filename):
            raise ValueError("{} doesn't exist".format(filename))

    # ADM retrieve/check the cmxdir.
    cmxdir = _get_cmxdir(cmxdir)

    def _finalize_targets(objects, cmx_target, priority_shift):
        # -desi_target includes BGS_ANY and MWS_ANY, so we can filter just
        # -on desi_target != 0
        keep = (cmx_target != 0)
        objects = objects[keep]
        cmx_target = cmx_target[keep]
        priority_shift = priority_shift[keep]

        # -Add *_target mask columns
        # ADM note that only cmx_target is defined for commissioning
        # ADM so just pass that around
        targets = finalize(objects,
                           cmx_target,
                           cmx_target,
                           cmx_target,
                           survey='cmx')
        # ADM shift the priorities of targets with functional priorities.
        targets["PRIORITY_INIT"] += priority_shift

        return targets

    # -functions to run on every brick/sweep file
    def _select_targets_file(filename):
        '''Returns targets in filename that pass the cuts'''
        objects = io.read_tractor(filename)
        cmx_target, priority_shift = apply_cuts(objects,
                                                cmxdir=cmxdir,
                                                noqso=noqso)

        return _finalize_targets(objects, cmx_target, priority_shift)

    # Counter for number of bricks processed;
    # a numpy scalar allows updating nbrick in python 2
    # c.f https://www.python.org/dev/peps/pep-3104/
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        ''' wrapper function for the critical reduction operation,
            that occurs on the main parallel process '''
        if nbrick % 20 == 0 and nbrick > 0:
            elapsed = time() - t0
            rate = elapsed / nbrick
            log.info(
                '{} files; {:.1f} secs/file; {:.1f} total mins elapsed'.format(
                    nbrick, rate, elapsed / 60.))
        nbrick[...] += 1  # this is an in-place modification
        return result

    # -Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            targets = pool.map(_select_targets_file,
                               infiles,
                               reduce=_update_status)
    else:
        targets = list()
        for x in infiles:
            targets.append(_update_status(_select_targets_file(x)))

    targets = np.concatenate(targets)

    return targets

Example #7

Show file

File: gfa.py Project: ShaunMCole/desitarget

def add_urat_pms(objs, numproc=4):
    """Add proper motions from URAT to a set of objects.

    Parameters
    ----------
    objs : :class:`~numpy.ndarray`
        Array of objects to update. Must include the columns "PMRA",
        "PMDEC", "REF_ID" (unique per object) "URAT_ID" and "URAT_SEP".
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.

    Returns
    -------
    :class:`~numpy.ndarray`
        The input array with the "PMRA", PMDEC", "URAT_ID" and "URAT_SEP"
        columns updated to include URAT information.

    Notes
    -----
       - Order is retained using "REF_ID": The input and output
         arrays should have the same order.
    """
    # ADM check REF_ID is indeed unique for each object.
    assert len(objs["REF_ID"]) == len(np.unique(objs["REF_ID"]))

    # ADM record the original REF_IDs so we can match back to them.
    origids = objs["REF_ID"]

    # ADM loosely group the input objects on the sky. NSIDE=16 seems
    # ADM to nicely balance sample sizes for matching, with the code
    # ADM being quicker for clumped objects because of file I/O.
    theta, phi = np.radians(90 - objs["DEC"]), np.radians(objs["RA"])
    pixels = hp.ang2pix(16, theta, phi, nest=True)

    # ADM reorder objects (and pixels themselves) based on pixel number.
    ii = np.argsort(pixels)
    objs, pixels = objs[ii], pixels[ii]

    # ADM create pixel-split sub-lists of the objects.
    # ADM here, np.diff marks the transition to the next pixel number.
    splitobjs = np.split(objs, np.where(np.diff(pixels))[0] + 1)
    nallpix = len(splitobjs)

    # ADM function to run on each of the HEALPix-split input objs.
    def _get_urat_matches(splitobj):
        '''wrapper on match_to_urat() for rec array (matchrad=0.5")'''
        # ADM also return the REF_ID to track the objects.
        return [match_to_urat(splitobj, matchrad=0.5), splitobj["REF_ID"]]

    # ADM this is just to count pixels in _update_status.
    npix = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if npix % 200 == 0 and npix > 0:
            elapsed = (time() - t0) / 60.
            rate = npix / elapsed / 60.
            log.info('{}/{} pixels; {:.1f} pix/sec...t = {:.1f} mins'.format(
                npix, nallpix, rate, elapsed))
        npix[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process pixels.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            urats = pool.map(_get_urat_matches,
                             splitobjs,
                             reduce=_update_status)
    else:
        urats = []
        for splitobj in splitobjs:
            urats.append(_update_status(_get_urat_matches(splitobj)))

    # ADM remember to grab the REFIDs as well as the URAT matches.
    refids = np.concatenate(np.array(urats)[:, 1])
    urats = np.concatenate(np.array(urats)[:, 0])

    # ADM sort the output to match the input, on REF_ID.
    ii = np.zeros_like(refids)
    ii[np.argsort(origids)] = np.argsort(refids)
    assert np.all(refids[ii] == origids)

    return urats[ii]

Example #8

Show file

def write_gaia_matches(infiles, numproc=4, outdir="."):
    """Match sweeps files to Gaia and rewrite with the Gaia columns added

    Parameters
    ----------
    infiles : :class:`list` or `str`
        A list of input filenames (sweep files) OR a single filename.
        Arrays in the files must contain at least the columns "RA" and "DEC".
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    outdir : :class:`str`, optional, default to the current directory
        The directory to write the files.

    Returns
    -------
    :class:`~numpy.ndarray`
        The original sweeps files with the columns in `gaiadatamodel`
        added (except for the columns `GAIA_RA` and `GAIA_DEC`) are
        written to file. The filename is the same as the input
        filename with the ".fits" replaced by "-gaia$DRmatch.fits"
        where $DR is extracted from the $GAIA_DIR environment variable.

    Notes
    -----
        - if numproc==1, use the serial code instead of the parallel code.
        - The environment variable $GAIA_DIR must be set.
    """
    # ADM check that the GAIA_DIR is set and retrieve it.
    gaiadir = _get_gaia_dir()

    # ADM convert a single file, if passed to a list of files.
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]

    # ADM check that files exist before proceeding.
    for filename in infiles:
        if not os.path.exists(filename):
            raise ValueError("{} doesn't exist".format(filename))

    nfiles = len(infiles)

    # ADM extract a reasonable name for output files from the Gaia directory.
    drloc = gaiadir.find("dr")
    # ADM if we didn't find the substring "dr" go generic.
    if drloc == -1:
        ender = '-gaiamatch.fits'
    else:
        ender = '-gaia{}match.fits'.format(gaiadir[drloc:drloc + 3])

    # ADM the critical function to run on every file.
    def _get_gaia_matches(fnwdir):
        '''wrapper on match_gaia_to_primary() given a file name'''
        # ADM extract the output file name.
        fn = os.path.basename(fnwdir)
        outfile = '{}/{}'.format(outdir, fn.replace(".fits", ender))

        # ADM read in the objects.
        objs, hdr = io.read_tractor(fnwdir, header=True)

        # ADM match to Gaia sources.
        gaiainfo = match_gaia_to_primary(objs)
        log.info(
            'Done with Gaia match for {} primary objects...t = {:.1f}s'.format(
                len(objs),
                time() - start))

        # ADM remove the GAIA_RA, GAIA_DEC columns as they aren't
        # ADM in the imaging surveys data model.
        gaiainfo = pop_gaia_coords(gaiainfo)

        # ADM add the Gaia column information to the sweeps array.
        for col in gaiainfo.dtype.names:
            objs[col] = gaiainfo[col]

        fitsio.write(outfile, objs, extname='SWEEP', header=hdr, clobber=True)
        return True

    # ADM this is just to count sweeps files in _update_status.
    nfile = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 50 == 0 and nfile > 0:
            rate = nfile / (time() - t0)
            log.info('{}/{} files; {:.1f} files/sec'.format(
                nfile, nfiles, rate))
        nfile[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process input files.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            _ = pool.map(_get_gaia_matches, infiles, reduce=_update_status)
    else:
        for file in infiles:
            _ = _update_status(_get_gaia_matches(file))

    return

Example #9

Show file

def make_bright_star_mask(maglim=12., matchrad=1., numproc=32,
                          maskepoch=2023.0, gaiaepoch=2015.5,
                          nside=None, pixels=None):
    """Make an all-sky bright star mask using Tycho, Gaia and URAT.

    Parameters
    ----------
    maglim : :class:`float`, optional, defaults to 12.
        Faintest magnitude at which to make the mask. This magnitude is
        interpreted as G-band for Gaia and, in order of preference, VT
        then HP then BT for Tycho (not every Tycho source has each band).
    matchrad : :class:`int`, optional, defaults to 1.
        Tycho sources that match a Gaia source at this separation in
        ARCSECONDS are NOT included in the output mask. The matching is
        performed rigorously, accounting for Gaia proper motions.
    numproc : :class:`int`, optional, defaults to 16.
        Number of processes over which to parallelize
    maskepoch : :class:`float`
        The mask is built at this epoch. Not all sources have proper
        motions from every survey, so proper motions are used, in order
        of preference, from Gaia, URAT, then Tycho.
    gaiaepoch : :class:`float`, optional, defaults to Gaia DR2 (2015.5)
        The epoch of the Gaia observations. Should be 2015.5 unless we
        move beyond Gaia DR2.
    nside : :class:`int`, optional, defaults to ``None``
        If passed, create a mask only in nested HEALPixels in `pixels`
        at this `nside`. Otherwise, run for the whole sky. If `nside`
        is passed then `pixels` must be passed too.
    pixels : :class:`list`, optional, defaults to ``None``
        If passed, create a mask only in nested HEALPixels at `nside` for
        pixel integers in `pixels`. Otherwise, run for the whole sky. If
        `pixels` is passed then `nside` must be passed too.

    Returns
    -------
    :class:`recarray`
        - The bright star mask in the form of `maskdatamodel.dtype`:
        - `REF_CAT` is `"T2"` for Tycho and `"G2"` for Gaia.
        - `REF_ID` is `Tyc1`*1,000,000+`Tyc2`*10+`Tyc3` for Tycho2;
          `"sourceid"` for Gaia-DR2 and Gaia-DR2 with URAT.
        - `REF_MAG` is, in order of preference, G-band for Gaia, VT
          then HP then BT for Tycho.
        - `URAT_ID` contains the URAT reference number for Gaia objects
          that use the URAT proper motion, or -1 otherwise.
        - The radii are in ARCSECONDS.
        - `E1` and `E2` are placeholders for ellipticity components, and
          are set to 0 for Gaia and Tycho sources.
        - `TYPE` is always `PSF` for star-like objects.
        - Note that the mask is based on objects in the pixel AT THEIR
          NATIVE EPOCH *NOT* AT THE INPUT `maskepoch`. It is therefore
          possible for locations in the output mask to be just beyond
          the boundaries of the input pixel.

    Notes
    -----
        - Runs (all-sky) in ~20 minutes for `numproc=32` and `maglim=12`.
        - `IN_RADIUS` (`NEAR_RADIUS`) corresponds to `IN_BRIGHT_OBJECT`
          (`NEAR_BRIGHT_OBJECT`) in `data/targetmask.yaml`. These radii
          are set in the function `desitarget.brightmask.radius()`.
        - The correct mask size for DESI is an open question.
        - The `GAIA_DIR`, `URAT_DIR` and `TYCHO_DIR` environment
          variables must be set.
    """
    log.info("running on {} processors".format(numproc))

    # ADM check if HEALPixel parameters have been correctly sent.
    io.check_both_set(pixels, nside)

    # ADM grab the nside of the Tycho files, which is a reasonable
    # ADM resolution for bright stars.
    if nside is None:
        nside = get_tycho_nside()
        npixels = hp.nside2npix(nside)
        # ADM array of HEALPixels over which to parallelize...
        pixels = np.arange(npixels)
        # ADM ...shuffle for better balance across nodes (as there are
        # ADM more stars in regions of the sky where pixels adjoin).
    np.random.shuffle(pixels)

    # ADM the common function that is actually parallelized across.
    def _make_bright_star_mx(pixnum):
        """returns bright star mask in one HEALPixel"""
        return make_bright_star_mask_in_hp(
            nside, pixnum, maglim=maglim, matchrad=matchrad,
            gaiaepoch=gaiaepoch, maskepoch=maskepoch, verbose=False)

    # ADM this is just to count pixels in _update_status.
    npix = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrap key reduction operation on the main parallel process"""
        if npix % 10 == 0 and npix > 0:
            rate = (time() - t0) / npix
            log.info('{}/{} HEALPixels; {:.1f} secs/pixel...t = {:.1f} mins'.
                     format(npix, npixels, rate, (time()-t0)/60.))
        npix[...] += 1
        return result

    # ADM Parallel process across HEALPixels.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            mask = pool.map(_make_bright_star_mx, pixels, reduce=_update_status)
    else:
        mask = list()
        for pixel in pixels:
            mask.append(_update_status(_make_bright_star_mx(pixel)))

    mask = np.concatenate(mask)

    log.info("Done making mask...t = {:.1f} mins".format((time()-t0)/60.))

    return mask

Example #10

Show file

File: secondary.py Project: ShaunMCole/desitarget

def select_secondary(infiles, numproc=4, sep=1., scxdir=None, scnd_mask=None):
    """Process secondary targets and update relevant bits.

    Parameters
    ----------
    infiles : :class:`list` or `str`
        A list of input primary target file names OR a single file name.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    sep : :class:`float`, defaults to 1 arcsecond
        The separation at which to match in ARCSECONDS.
    scxdir : :class:`str`, optional, defaults to :envvar:`SCND_DIR`
        The name of the directory that hosts secondary targets.
    scnd_mask : :class:`desiutil.bitmask.BitMask`, optional
        A mask corresponding to a set of secondary targets, e.g, could
        be ``from desitarget.targetmask import scnd_mask`` for the
        main survey mask. Defaults to the main survey mask.

    Returns
    -------
    :class:`~numpy.ndarray`
        All secondary targets from `scxdir` with columns ``TARGETID``,
        ``SCND_TARGET``, ``PRIORITY_INIT``, ``SUBPRIORITY`` and
        ``NUMOBS_INIT`` added. These columns are also populated,
        excepting ``SUBPRIORITY``.

    Notes
    -----
        - In addition, the primary target `infiles` are written back to
          their original path with `.fits` changed to `-wscnd.fits` and
          the ``SCND_TARGET`` and ``SCND_ANY`` columns
          populated for matching targets.
    """
    # ADM import the default (main survey) mask.
    if scnd_mask is None:
        from desitarget.targetmask import scnd_mask

    # ADM if a single primary file was passed, convert it to a list.
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]
    nfiles = len(infiles)

    # - Sanity check that files exist before going further.
    for filename in infiles:
        if not os.path.exists(filename):
            msg = "{} doesn't exist".format(filename)
            log.critical(msg)
            raise ValueError(msg)

    # ADM retrieve the scxdir, check it's structure and fidelity...
    scxdir = _get_scxdir(scxdir)
    _check_files(scxdir, scnd_mask)
    # ADM ...and read in all of the secondary targets.
    scxtargs = read_files(scxdir, scnd_mask)

    # ADM split off any scx targets that have requested an OVERRIDE.
    scxover = scxtargs[scxtargs["OVERRIDE"]]
    scxtargs = scxtargs[~scxtargs["OVERRIDE"]]

    # ADM function to run on every input file.
    def _match_scx_file(fn):
        """wrapper on match_secondary() given a file name"""
        # ADM for one of the input primary target files, match to the
        # ADM non-override scx targets and update bits and TARGETID.
        return match_secondary(fn, scxtargs, sep=sep, scxdir=scxdir)

    # ADM this is just to count files in _update_status.
    nfile = np.array(1)
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 1 == 0 and nfile > 0:
            elapsed = (time() - t0) / 60.
            rate = nfile / elapsed / 60.
            log.info('{}/{} files; {:.1f} sec/file...t = {:.1f} mins'.format(
                nfile, nfiles, 1. / rate, elapsed))
        nfile[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            scxall = pool.map(_match_scx_file, infiles, reduce=_update_status)
        # ADM if we ran with numproc==1, then the TARGETID in the view of
        # ADM scxtargs will have naturally updated during the loop. This could
        # ADM be solved with an expensive copy, if it was necessary. For the
        # ADM numproc > 1 case, though, we need to find TARGETIDs that have
        # ADM been set across the scxall outputs.
        targetids = np.max(np.vstack([scxt['TARGETID'] for scxt in scxall]),
                           axis=0)
        scxtargs = scxall[-1]
        scxtargs["TARGETID"] = targetids
    else:
        scxall = []
        for infile in infiles:
            scxall.append(_update_status(_match_scx_file(infile)))
        scxtargs = scxall[-1]

    # ADM now we're done matching, bring the override targets back...
    scxout = np.concatenate([scxtargs, scxover])

    # ADM ...and assign TARGETIDs to non-matching secondary targets.
    scxout = finalize_secondary(scxout, scnd_mask, sep=sep)

    return scxout

Example #11

Show file

def gaia_csv_to_fits(numproc=4):
    """Convert files in $GAIA_DIR/csv to files in $GAIA_DIR/fits.

    Parameters
    ----------
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.

    Returns
    -------
    Nothing
        But the archived Gaia CSV files in $GAIA_DIR/csv are converted
        to FITS files in the directory $GAIA_DIR/fits. Also, a look-up
        table is written to $GAIA_DIR/fits/hpx-to-files.pickle for which
        each index is an nside=_get_gaia_nside(), nested scheme HEALPixel
        and each entry is a list of the FITS files that touch that HEAPixel.

    Notes
    -----
        - The environment variable $GAIA_DIR must be set.
        - if numproc==1, use the serial code instead of the parallel code.
        - Runs in 1-3 hours (depending on node) with numproc=32 for 60,000 files.
    """
    # ADM the resolution at which the Gaia HEALPix files should be stored.
    nside = _get_gaia_nside()

    # ADM check that the GAIA_DIR is set.
    gaiadir = _get_gaia_dir()
    log.info("running on {} processors".format(numproc))

    # ADM construct the directories for reading/writing files.
    csvdir = os.path.join(gaiadir, 'csv')
    fitsdir = os.path.join(gaiadir, 'fits')

    # ADM make sure the output directory is empty.
    if os.path.exists(fitsdir):
        if len(os.listdir(fitsdir)) > 0:
            msg = "{} should be empty to make Gaia FITS files!".format(fitsdir)
            log.critical(msg)
            raise ValueError(msg)
    # ADM make the output directory, if needed.
    else:
        log.info('Making Gaia directory for storing FITS files')
        os.makedirs(fitsdir)

    # ADM construct the list of input files.
    infiles = glob("{}/*csv*".format(csvdir))
    nfiles = len(infiles)

    # ADM the critical function to run on every file.
    def _write_gaia_fits(infile):
        """read an input name for a csv file and write it to FITS"""
        outbase = os.path.basename(infile)
        outfilename = "{}.fits".format(outbase.split(".")[0])
        outfile = os.path.join(fitsdir, outfilename)
        fitstable = ascii.read(infile, format='csv')

        # ADM need to convert 5-string values to boolean.
        cols = np.array(fitstable.dtype.names)
        boolcols = cols[np.hstack(fitstable.dtype.descr)[1::2] == '<U5']
        for col in boolcols:
            fitstable[col] = fitstable[col] == 'true'

        # ADM only write out the columns we need for targeting.
        nobjs = len(fitstable)
        done = np.zeros(nobjs, dtype=ingaiadatamodel.dtype)
        for col in done.dtype.names:
            if col == 'REF_CAT':
                done[col] = 'G2'
            else:
                done[col] = fitstable[col.lower()]
        fitsio.write(outfile, done, extname='GAIAFITS')

        # ADM return the HEALPixels that this file touches.
        pix = set(radec2pix(nside, fitstable["ra"], fitstable["dec"]))
        return [pix, os.path.basename(outfile)]

    # ADM this is just to count processed files in _update_status.
    nfile = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 100 == 0 and nfile > 0:
            rate = nfile / (time() - t0)
            elapsed = time() - t0
            log.info(
                '{}/{} files; {:.1f} files/sec; {:.1f} total mins elapsed'.
                format(nfile, nfiles, rate, elapsed / 60.))
        nfile[...] += 1  # this is an in-place modification
        return result

    # - Parallel process input files...
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            pixinfile = pool.map(_write_gaia_fits,
                                 infiles,
                                 reduce=_update_status)
    # ADM ...or run in serial.
    else:
        pixinfile = list()
        for file in infiles:
            pixinfile.append(_update_status(_write_gaia_fits(file)))

    # ADM create a list for which each index is a HEALPixel and each
    # ADM entry is a list of files that touch that HEALPixel.
    npix = hp.nside2npix(nside)
    pixlist = [[] for i in range(npix)]
    for pixels, file in pixinfile:
        for pix in pixels:
            pixlist[pix].append(file)

    # ADM write out the HEALPixel->files look-up table.
    outfilename = os.path.join(fitsdir, "hpx-to-files.pickle")
    outfile = open(outfilename, "wb")
    pickle.dump(pixlist, outfile)
    outfile.close()

    log.info('Done...t={:.1f}s'.format(time() - t0))

    return

Example #12

Show file

File: mtl.py Project: HiramHerrera/desitarget

def make_ledger(hpdirname, outdirname, obscon="DARK", numproc=1):
    """
    Make initial MTL ledger files for all HEALPixels.

    Parameters
    ----------
    hpdirname : :class:`str`
        Full path to either a directory containing targets that
        have been partitioned by HEALPixel (i.e. as made by
        `select_targets` with the `bundle_files` option). Or the
        name of a single file of targets.
    outdirname : :class:`str`
        Output directory to which to write the MTL (the file name is
        constructed on the fly).
    obscon : :class:`str`, optional, defaults to "DARK"
        A string matching ONE obscondition in the desitarget bitmask yaml
        file (i.e. in `desitarget.targetmask.obsconditions`), e.g. "GRAY"
        Governs how priorities are set based on "obsconditions". Also
        governs the sub-directory to which the ledger is written.
    numproc : :class:`int`, optional, defaults to 1 for serial
        Number of processes to parallelize across.

    Returns
    -------
    Nothing, but writes the full HEALPixel-split ledger to `outdirname`.

    Notes
    -----
    - For _get_mtl_nside()=32, takes about 25 minutes with `numproc=12`.
      `numproc>12` can run into memory issues.
    - For _get_mtl_nside()=16, takes about 50 minutes with `numproc=8`.
      `numproc>8` can run into memory issues.
    """
    # ADM grab information regarding how the targets were constructed.
    hdr, dt = io.read_targets_header(hpdirname, dtype=True)
    # ADM check the obscon for which the targets were made is
    # ADM consistent with the requested obscon.
    oc = hdr["OBSCON"]
    if obscon not in oc:
        msg = "File is type {} but requested behavior is {}".format(oc, obscon)
        log.critical(msg)
        raise ValueError(msg)

    # ADM the MTL datamodel must reflect the target flavor (SV, etc.).
    mtldm = switch_main_cmx_or_sv(mtldatamodel, np.array([], dt))
    # ADM speed-up by only reading the necessary columns.
    cols = list(set(mtldm.dtype.names).intersection(dt.names))

    # ADM optimal nside for reading in the targeting files.
    nside = hdr["FILENSID"]
    npixels = hp.nside2npix(nside)
    pixels = np.arange(npixels)

    # ADM the nside at which to write the MTLs.
    mtlnside = _get_mtl_nside()

    from desitarget.geomask import nside2nside

    # ADM the common function that is actually parallelized across.
    def _make_ledger_in_hp(pixnum):
        """make initial ledger in a single HEALPixel"""
        # ADM read in the needed columns from the targets.
        targs = io.read_targets_in_hp(hpdirname, nside, pixnum, columns=cols)
        if len(targs) == 0:
            return
        # ADM construct a list of all pixels in pixnum at the MTL nside.
        pixlist = nside2nside(nside, mtlnside, pixnum)
        # ADM write MTLs for the targs split over HEALPixels in pixlist.
        return make_ledger_in_hp(
            targs, outdirname, mtlnside, pixlist,
            obscon=obscon, indirname=hpdirname, verbose=False)

    # ADM this is just to count pixels in _update_status.
    npix = np.ones((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrap key reduction operation on the main parallel process"""
        if npix % 2 == 0 and npix > 0:
            rate = (time() - t0) / npix
            log.info('{}/{} HEALPixels; {:.1f} secs/pixel...t = {:.1f} mins'.
                     format(npix, npixels, rate, (time()-t0)/60.))
        npix[...] += 1
        return result

    # ADM Parallel process across HEALPixels.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            pool.map(_make_ledger_in_hp, pixels, reduce=_update_status)
    else:
        for pixel in pixels:
            _update_status(_make_ledger_in_hp(pixel))

    log.info("Done writing ledger...t = {:.1f} mins".format((time()-t0)/60.))

    return

Example #13

Show file

File: brightstar.py Project: michaelJwilson/LBGCMB

def collect_bright_stars(
        bands,
        maglim,
        numproc=4,
        rootdirname='/global/project/projectdirs/cosmo/data/legacysurvey/dr3.1/sweep/3.1',
        outfilename=None,
        verbose=False):
    """Extract a structure from the sweeps containing only bright stars in a given band to a given magnitude limit

    Parameters
    ----------
    bands : :class:`str`
        A magnitude band from the sweeps, e.g., "G", "R", "Z".
        Can pass multiple bands as string, e.g. "GRZ", in which case maglim has to be a
        list of the same length as the string
    maglim : :class:`float`
        The upper limit in that magnitude band for which to assemble a list of bright stars.
        Can pass a list of magnitude limits, in which case bands has to be a string of the
        same length (e.g., "GRZ" for [12.3,12.7,12.6]
    numproc : :class:`int`, optional
        Number of processes over which to parallelize
    rootdirname : :class:`str`, optional, defaults to dr3
        Root directory containing either sweeps or tractor files...e.g. for dr3 this might be
        /global/project/projectdirs/cosmo/data/legacysurvey/dr3/sweeps/dr3.1
    outfilename : :class:`str`, optional, defaults to not writing anything to file
        (FITS) File name to which to write the output structure of bright stars
    verbose : :class:`bool`, optional
        Send to write progress to screen

    Returns
    -------
    :class:`recarray`
        The structure of bright stars from the sweeps limited in the passed band(s) to the
        passed maglim(s).
    """

    #ADM use io.py to retrieve list of sweeps or tractor files
    infiles = io.list_sweepfiles(rootdirname)
    if len(infiles) == 0:
        infiles = io.list_tractorfiles(rootdirname)
    if len(infiles) == 0:
        raise IOError(
            'No sweep or tractor files found in {}'.format(rootdirname))

    #ADM force the input maglim to be a list (in case a single value was passed)
    if type(maglim) == type(16) or type(maglim) == type(16.):
        maglim = [maglim]

    #ADM set bands to uppercase if passed as lower case
    bands = bands.upper()
    #ADM the band names as a flux array instead of a string
    bandnames = np.array(["FLUX_" + band for band in bands])

    if len(bandnames) != len(maglim):
        raise IOError(
            'bands has to be the same length as maglim and {} does not equal {}'
            .format(len(bands), len(maglim)))

    #ADM change input magnitude(s) to a flux to test against
    fluxlim = 10.**((22.5 - np.array(maglim)) / 2.5)

    #ADM parallel formalism from this step forward is stolen from cuts.select_targets

    #ADM function to grab the bright stars from a given file
    def _get_bright_stars(filename):
        '''Retrieves bright stars from a sweeps/Tractor file'''
        objs = io.read_tractor(filename)
        #ADM write the fluxes as an array instead of as named columns
        fluxes = objs[bandnames].view(
            objs[bandnames].dtype[0]).reshape(objs[bandnames].shape + (-1, ))
        #ADM Retain rows for which ANY band is brighter than maglim
        w = np.where(np.any(fluxes > fluxlim, axis=1))
        if len(w[0]) > 0:
            return objs[w]

    #ADM counter for how many files have been processed
    #ADM critical to use np.ones because a numpy scalar allows in place modifications
    # c.f https://www.python.org/dev/peps/pep-3104/
    totfiles = np.ones((), dtype='i8') * len(infiles)
    nfiles = np.ones((), dtype='i8')
    t0 = time()
    if verbose:
        print('Collecting bright stars from sweeps...')

    def _update_status(result):
        '''wrapper function for the critical reduction operation,
        that occurs on the main parallel process'''
        if verbose and nfiles % 25 == 0:
            elapsed = time() - t0
            rate = nfiles / elapsed
            print('{}/{} files; {:.1f} files/sec; {:.1f} total mins elapsed'.
                  format(nfiles, totfiles, rate, elapsed / 60.))
        nfiles[...] += 1  #this is an in-place modification
        return result

    #ADM did we ask to parallelize, or not?
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            starstruc = pool.map(_get_bright_stars,
                                 infiles,
                                 reduce=_update_status)
    else:
        starstruc = []
        for file in infiles:
            starstruc.append(_update_status(_get_bright_stars(file)))

    #ADM note that if there were no bright stars in a file then
    #ADM the _get_bright_stars function will have returned NoneTypes
    #ADM so we need to filter those out
    starstruc = [x for x in starstruc if x is not None]
    if len(starstruc) == 0:
        raise IOError(
            'There are no stars brighter than {} in {} in files in {} with which to make a mask'
            .format(str(maglim), bands, rootdirname))
    #ADM concatenate all of the output recarrays
    starstruc = np.hstack(starstruc)

    #ADM if the name of a file for output is passed, then write to it
    if outfilename is not None:
        fitsio.write(outfilename, starstruc, clobber=True)

    return starstruc

Example #14

Show file

def select_skies(survey, numproc=16, nskiespersqdeg=None, bands=['g', 'r', 'z'],
                 apertures_arcsec=[0.75], nside=2, pixlist=None,
                 writebricks=False, bundlebricks=None, brickspersec=1.6):
    """Generate skies in parallel for all bricks in a Legacy Surveys Data Release.

    Parameters
    ----------
    survey : :class:`object`
        `LegacySurveyData` object for a given Data Release of the Legacy Surveys; see
        :func:`~desitarget.skyutilities.legacypipe.util.LegacySurveyData` for details.
    numproc : :class:`int`, optional, defaults to 16
        The number of processes over which to parallelize.
    nskiespersqdeg : :class:`float`, optional
        The minimum DENSITY of sky fibers to generate. Defaults to reading from
        :func:`~desimodel.io` with a margin of 4x.
    bands : :class:`list`, optional, defaults to ['g', 'r', 'z']
        List of bands to be used to define good sky locations.
    apertures_arcsec : :class:`list`, optional, defaults to [0.75]
        Radii in arcsec of apertures for which to derive flux at a sky location.
    nside : :class:`int`, optional, defaults to nside=2 (859.4 sq. deg.)
        The HEALPixel nside number to be used with the `pixlist` input.
    pixlist : :class:`list` or `int`, optional, defaults to None
        Bricks will only be processed if the CENTER of the brick lies within the bounds of
        pixels that are in this list of integers, at the supplied HEALPixel `nside`.
        Uses the HEALPix NESTED scheme. Useful for parallelizing. If pixlist is ``None``
        then all bricks in the passed `survey` will be processed.
    writebricks : :class:`boolean`, defaults to False
        If `True`, write the skyfibers object for EACH brick (in the format of the
        output from :func:`sky_fibers_for_brick()`) to file. The file name is derived
        from the input `survey` object and is in the form:
        `%(survey.survey_dir)/metrics/%(brick).3s/skies-%(brick)s.fits.gz`
        which is returned by `survey.find_file('skies')`.
    bundlebricks : :class:`int`, defaults to None
        If not None, then instead of selecting the skies, print, to screen, the slurm
        script that will approximately balance the brick distribution at `bundlebricks`
        bricks per node. So, for instance, if bundlebricks is 14000 (which as of
        the latest git push works well to fit on the interactive nodes on Cori), then
        commands would be returned with the correct pixlist values to pass to the code
        to pack at about 14000 bricks per node across all of the bricks in `survey`.
    brickspersec : :class:`float`, optional, defaults to 1.6
        The rough number of bricks processed per second by the code (parallelized across
        a chosen number of nodes). Used in conjunction with `bundlebricks` for the code
        to estimate time to completion when parallelizing across pixels.

    Returns
    -------
    :class:`~numpy.ndarray`
        a structured array of sky positions in the DESI sky target format for all
        bricks in a Legacy Surveys Data Release.

    Notes
    -----
        - Some core code in this module was initially written by Dustin Lang (@dstndstn).
        - Returns nothing if bundlebricks is passed (and is not ``None``).
    """
    # ADM these comments were for debugging photutils/astropy dependencies
    # ADM and they can be removed at any time
#    import astropy
#    print(astropy.version)
#    print(astropy.version.version)
#    print(photutils.version)
#    print(photutils.version.version)

    # ADM read in the survey bricks file, which lists the bricks of interest for this DR
    from glob import glob
    sbfile = glob(survey.survey_dir+'/*bricks-dr*')[0]
    brickinfo = fitsio.read(sbfile)
    # ADM remember that fitsio reads things in as bytes, so convert to unicode
    bricknames = brickinfo['brickname'].astype('U')

    # ADM if the pixlist or bundlebricks option was sent, we'll need the HEALPpixel
    # ADM information for each brick
    if pixlist is not None or bundlebricks is not None:
        theta, phi = np.radians(90-brickinfo["dec"]), np.radians(brickinfo["ra"])
        pixnum = hp.ang2pix(nside, theta, phi, nest=True)

    # ADM if the bundlebricks option was sent, call the packing code
    if bundlebricks is not None:
        bundle_bricks(pixnum, bundlebricks, nside, prefix='skies',
                      surveydir=survey.survey_dir, brickspersec=brickspersec)
        return

    # ADM restrict to only bricks in a set of HEALPixels, if requested
    if pixlist is not None:
        # ADM if an integer was passed, turn it into a list
        if isinstance(pixlist, int):
            pixlist = [pixlist]
        wbricks = np.where([pix in pixlist for pix in pixnum])[0]
        bricknames = bricknames[wbricks]
        if len(wbricks) == 0:
            log.warning('ZERO bricks in passed pixel list!!!')
        log.info("Processing bricks in (nside={}, pixel numbers={}) HEALPixels"
                 .format(nside, pixlist))

    nbricks = len(bricknames)
    log.info('Processing {} bricks that have observations from DR at {}...t = {:.1f}s'
             .format(nbricks, survey.survey_dir, time()-start))

    # ADM a little more information if we're slurming across nodes
    if os.getenv('SLURMD_NODENAME') is not None:
        log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME')))

    # ADM the critical function to run on every brick
    def _get_skies(brickname):
        '''wrapper on make_skies_for_a_brick() given a brick name'''

        return make_skies_for_a_brick(survey, brickname,
                                      nskiespersqdeg=nskiespersqdeg, bands=bands,
                                      apertures_arcsec=apertures_arcsec,
                                      write=writebricks)

    # ADM this is just in order to count bricks in _update_status
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nbrick % 500 == 0 and nbrick > 0:
            rate = nbrick / (time() - t0)
            log.info('{}/{} bricks; {:.1f} bricks/sec'.format(nbrick, nbricks, rate))

        nbrick[...] += 1    # this is an in-place modification
        return result

    # - Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            skies = pool.map(_get_skies, bricknames, reduce=_update_status)
    else:
        skies = list()
        for brickname in bricknames:
            skies.append(_update_status(_get_skies(brickname)))

    # ADM Concatenate the parallelized results into one rec array of sky information
    skies = np.concatenate(skies)

    log.info('Done...t={:.1f}s'.format(time()-start))

    return skies

Example #15

Show file

File: uratmatch.py Project: sdss/lvmtarget

def urat_fits_to_healpix(numproc=5):
    """Convert files in $URAT_DIR/fits to files in $URAT_DIR/healpix.

    Parameters
    ----------
    numproc : :class:`int`, optional, defaults to 5
        The number of parallel processes to use.

    Returns
    -------
    Nothing
        But the archived URAT FITS files in $URAT_DIR/fits are
        rearranged by HEALPixel in the directory $URAT_DIR/healpix.
        The HEALPixel sense is nested with nside=_get_urat_nside(), and
        each file in $URAT_DIR/healpix is called healpix-xxxxx.fits,
        where xxxxx corresponds to the HEALPixel number.

    Notes
    -----
        - The environment variable $URAT_DIR must be set.
        - if numproc==1, use the serial code instead of the parallel code.
        - Runs in about 10 minutes with numproc=25.
    """
    # ADM the resolution at which the URAT HEALPix files should be stored.
    nside = _get_urat_nside()

    # ADM check that the URAT_DIR is set.
    uratdir = _get_urat_dir()

    # ADM construct the directories for reading/writing files.
    fitsdir = os.path.join(uratdir, 'fits')
    hpxdir = os.path.join(uratdir, 'healpix')

    # ADM make sure the output directory is empty.
    if os.path.exists(hpxdir):
        if len(os.listdir(hpxdir)) > 0:
            msg = "{} should be empty to make URAT HEALPix files!".format(
                hpxdir)
            log.critical(msg)
            raise ValueError(msg)
    # ADM make the output directory, if needed.
    else:
        log.info('Making URAT directory for storing HEALPix files')
        os.makedirs(hpxdir)

    # ADM read the pixel -> file look-up table.
    infilename = os.path.join(fitsdir, "hpx-to-files.pickle")
    infile = open(infilename, "rb")
    pixlist = pickle.load(infile)
    npixels = len(pixlist)
    # ADM include the pixel number explicitly in the look-up table.
    pixlist = list(zip(np.arange(npixels), pixlist))

    # ADM the critical function to run on every file.
    def _write_hpx_fits(pixlist):
        """from files that touch a pixel, write out objects in each pixel"""
        pixnum, files = pixlist
        # ADM only proceed if some files touch a pixel.
        if len(files) > 0:
            # ADM track if it's our first time through the files loop.
            first = True
            # ADM Read in files that touch a pixel.
            for file in files:
                filename = os.path.join(fitsdir, file)
                objs = fitsio.read(filename)
                # ADM only retain objects in the correct pixel.
                pix = radec2pix(nside, objs["RA"], objs["DEC"])
                if first:
                    done = objs[pix == pixnum]
                    first = False
                else:
                    done = np.hstack([done, objs[pix == pixnum]])
            # ADM construct the name of the output file.
            outfilename = 'healpix-{:05d}.fits'.format(pixnum)
            outfile = os.path.join(hpxdir, outfilename)
            # ADM write out the file.
            hdr = fitsio.FITSHDR()
            hdr['HPXNSIDE'] = nside
            hdr['HPXNEST'] = True
            fitsio.write(outfile, done, extname='URATHPX', header=hdr)

        return

    # ADM this is just to count processed files in _update_status.
    npix = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if npix % 500 == 0 and npix > 0:
            rate = npix / (time() - t0)
            elapsed = time() - t0
            log.info(
                '{}/{} files; {:.1f} files/sec; {:.1f} total mins elapsed'.
                format(npix, npixels, rate, elapsed / 60.))
        npix[...] += 1  # this is an in-place modification
        return result

    # - Parallel process input files...
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            _ = pool.map(_write_hpx_fits, pixlist, reduce=_update_status)
    # ADM ...or run in serial.
    else:
        for pix in pixlist:
            _update_status(_write_hpx_fits(pix))

    log.info('Done...t={:.1f}s'.format(time() - t0))

    return

Example #16

Show file

def all_gaia_in_tiles(maglim=18,
                      numproc=4,
                      allsky=False,
                      tiles=None,
                      mindec=-30,
                      mingalb=10,
                      nside=None,
                      pixlist=None,
                      addobjid=False):
    """An array of all Gaia objects in the DESI tiling footprint

    Parameters
    ----------
    maglim : :class:`float`, optional, defaults to 18
        Magnitude limit for GFAs in Gaia G-band.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    allsky : :class:`bool`,  defaults to ``False``
        If ``True``, assume that the DESI tiling footprint is the
        entire sky regardless of the value of `tiles`.
    tiles : :class:`~numpy.ndarray`, optional, defaults to ``None``
        Array of DESI tiles. If None, then load the entire footprint.
    mindec : :class:`float`, optional, defaults to -30
        Minimum declination (o) to include for output Gaia objects.
    mingalb : :class:`float`, optional, defaults to 10
        Closest latitude to Galactic plane for output Gaia objects
        (e.g. send 10 to limit to areas beyond -10o <= b < 10o).
    nside : :class:`int`, optional, defaults to `None`
        (NESTED) HEALPix `nside` to use with `pixlist`.
    pixlist : :class:`list` or `int`, optional, defaults to `None`
        Only return sources in a set of (NESTED) HEALpixels at the
        supplied `nside`.
    addobjid : :class:`bool`, optional, defaults to ``False``
        If ``True``, include, in the output, a column "GAIA_OBJID"
        that is the integer number of each row read from each Gaia file.

    Returns
    -------
    :class:`~numpy.ndarray`
        Gaia objects within the passed geometric constraints brighter
        than `maglim`, formatted like `desitarget.gfa.gfadatamodel`.

    Notes
    -----
       - The environment variables $GAIA_DIR and $DESIMODEL must be set.
    """
    # ADM to guard against no files being found.
    if pixlist is None:
        dummyfile = find_gaia_files_hp(_get_gaia_nside(), [0],
                                       neighbors=False)[0]
    else:
        # ADM this is critical for, e.g., unit tests for which the
        # ADM Gaia "00000" pixel file might not exist.
        dummyfile = find_gaia_files_hp(_get_gaia_nside(),
                                       pixlist[0],
                                       neighbors=False)[0]
    dummygfas = np.array([], gaia_in_file(dummyfile).dtype)

    # ADM grab paths to Gaia files in the sky or the DESI footprint.
    if allsky:
        infilesbox = find_gaia_files_box([0, 360, mindec, 90])
        infilesgalb = find_gaia_files_beyond_gal_b(mingalb)
        infiles = list(set(infilesbox).intersection(set(infilesgalb)))
        if pixlist is not None:
            infileshp = find_gaia_files_hp(nside, pixlist, neighbors=False)
            infiles = list(set(infiles).intersection(set(infileshp)))
    else:
        infiles = find_gaia_files_tiles(tiles=tiles, neighbors=False)
    nfiles = len(infiles)

    # ADM the critical function to run on every file.
    def _get_gaia_gfas(fn):
        '''wrapper on gaia_in_file() given a file name'''
        return gaia_in_file(fn,
                            maglim=maglim,
                            mindec=mindec,
                            mingalb=mingalb,
                            nside=nside,
                            pixlist=pixlist,
                            addobjid=addobjid)

    # ADM this is just to count sweeps files in _update_status.
    nfile = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 100 == 0 and nfile > 0:
            elapsed = (time() - t0) / 60.
            rate = nfile / elapsed / 60.
            log.info('{}/{} files; {:.1f} files/sec...t = {:.1f} mins'.format(
                nfile, nfiles, rate, elapsed))
        nfile[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process Gaia files.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            gfas = pool.map(_get_gaia_gfas, infiles, reduce=_update_status)
    else:
        gfas = list()
        for file in infiles:
            gfas.append(_update_status(_get_gaia_gfas(file)))

    if len(gfas) > 0:
        gfas = np.concatenate(gfas)
    else:
        # ADM if nothing was found, return an empty np array.
        gfas = dummygfas

    log.info('Retrieved {} Gaia objects...t = {:.1f} mins'.format(
        len(gfas), (time() - t0) / 60.))

    return gfas

Example #17

Show file

def select_randoms(drdir,
                   density=100000,
                   numproc=32,
                   nside=4,
                   pixlist=None,
                   bundlebricks=None,
                   brickspersec=2.5,
                   dustdir=None):
    """NOBS, GALDEPTH, PSFDEPTH (per-band) for random points in a DR of the Legacy Surveys

    Parameters
    ----------
    drdir : :class:`str`
       The root directory pointing to a Data Release from the Legacy Surveys
       e.g. /global/project/projectdirs/cosmo/data/legacysurvey/dr7.
    density : :class:`int`, optional, defaults to 100,000
        The number of random points to return per sq. deg. As a typical brick is
        ~0.25 x 0.25 sq. deg. about (0.0625*density) points will be returned
    numproc : :class:`int`, optional, defaults to 32
        The number of processes over which to parallelize
    nside : :class:`int`, optional, defaults to nside=4 (214.86 sq. deg.)
        The (NESTED) HEALPixel nside to be used with the `pixlist` and `bundlebricks` input.
    pixlist : :class:`list` or `int`, optional, defaults to None
        Bricks will only be processed if the CENTER of the brick lies within the bounds of
        pixels that are in this list of integers, at the supplied HEALPixel `nside`.
        Uses the HEALPix NESTED scheme. Useful for parallelizing. If pixlist is None
        then all bricks in the passed `survey` will be processed.
    bundlebricks : :class:`int`, defaults to None
        If not None, then instead of selecting the skies, print, to screen, the slurm
        script that will approximately balance the brick distribution at `bundlebricks`
        bricks per node. So, for instance, if bundlebricks is 14000 (which as of
        the latest git push works well to fit on the interactive nodes on Cori and run
        in about an hour), then commands would be returned with the correct pixlist values
        to pass to the code to pack at about 14000 bricks per node across all of the bricks
        in `survey`.
    brickspersec : :class:`float`, optional, defaults to 2.5
        The rough number of bricks processed per second by the code (parallelized across
        a chosen number of nodes). Used in conjunction with `bundlebricks` for the code
        to estimate time to completion when parallelizing across pixels.
    dustdir : :class:`str`, optional, defaults to $DUST_DIR+'maps'
        The root directory pointing to SFD dust maps. If not
        sent the code will try to use $DUST_DIR+'maps')
        before failing.

    Returns
    -------
    :class:`~numpy.ndarray`
        a numpy structured array with the following columns:
            RA: Right Ascension of a random point
            DEC: Declination of a random point
            BRICKNAME: Passed brick name
            NOBS_G: Number of observations at this location in the g-band
            NOBS_R: Number of observations at this location in the r-band
            NOBS_Z: Number of observations at this location in the z-band
            PSFDEPTH_G: PSF depth at this location in the g-band
            PSFDEPTH_R: PSF depth at this location in the r-band
            PSFDEPTH_Z: PSF depth at this location in the z-band
            GALDEPTH_G: Galaxy depth at this location in the g-band
            GALDEPTH_R: Galaxy depth at this location in the r-band
            GALDEPTH_Z: Galaxy depth at this location in the z-band
            MASKBITS: Extra mask bits info as stored in the header of e.g.,
              dr7dir + 'coadd/111/1116p210/legacysurvey-1116p210-maskbits.fits.gz'
            EBV: E(B-V) at this location from the SFD dust maps
    """
    # ADM read in the survey bricks file, which lists the bricks of interest for this DR.
    # ADM if this is pre-or-post-DR8 we need to find the correct directory or directories.
    drdirs = _pre_or_post_dr8(drdir)
    bricknames = []
    brickinfo = []
    for dd in drdirs:
        sbfile = glob(dd + '/*bricks-dr*')
        if len(sbfile) > 0:
            sbfile = sbfile[0]
            hdu = fits.open(sbfile)
            brickinfo.append(hdu[1].data)
            bricknames.append(hdu[1].data['BRICKNAME'])
        else:
            # ADM this is a hack for test bricks where we didn't always generate the
            # ADM bricks file. It's probably safe to remove it at some point.
            from desitarget.io import brickname_from_filename
            fns = glob(os.path.join(dd, 'tractor', '*', '*fits'))
            bricknames.append([brickname_from_filename(fn) for fn in fns])
            brickinfo.append([])
            if pixlist is not None or bundlebricks is not None:
                msg = 'DR-specific bricks file not found'
                msg += 'and pixlist or bundlebricks passed!!!'
                log.critical(msg)
                raise ValueError(msg)
    bricknames = np.concatenate(bricknames)
    brickinfo = np.concatenate(brickinfo)

    # ADM if the pixlist or bundlebricks option was sent, we'll need the HEALPixel
    # ADM information for each brick.
    if pixlist is not None or bundlebricks is not None:
        theta, phi = np.radians(90 - brickinfo["dec"]), np.radians(
            brickinfo["ra"])
        pixnum = hp.ang2pix(nside, theta, phi, nest=True)

    # ADM if the bundlebricks option was sent, call the packing code.
    if bundlebricks is not None:
        bundle_bricks(pixnum,
                      bundlebricks,
                      nside,
                      brickspersec=brickspersec,
                      prefix='randoms',
                      surveydir=drdir)
        return

    # ADM restrict to only bricks in a set of HEALPixels, if requested.
    if pixlist is not None:
        # ADM if an integer was passed, turn it into a list.
        if isinstance(pixlist, int):
            pixlist = [pixlist]
        wbricks = np.where([pix in pixlist for pix in pixnum])[0]
        bricknames = bricknames[wbricks]
        if len(wbricks) == 0:
            log.warning('ZERO bricks in passed pixel list!!!')
        log.info(
            "Processing bricks in (nside={}, pixel numbers={}) HEALPixels".
            format(nside, pixlist))

    nbricks = len(bricknames)
    log.info(
        'Processing {} bricks from DR at {} at density {:.1e} per sq. deg...t = {:.1f}s'
        .format(nbricks, drdir, density,
                time() - start))

    # ADM a little more information if we're slurming across nodes.
    if os.getenv('SLURMD_NODENAME') is not None:
        log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME')))

    # ADM initialize the bricks class, and retrieve the brick information look-up table
    # ADM so it can be used in a common fashion.
    from desiutil import brick
    bricktable = brick.Bricks(bricksize=0.25).to_table()

    # ADM the critical function to run on every brick.
    def _get_quantities(brickname):
        '''wrapper on nobs_positions_in_a_brick_from_edges() given a brick name'''
        # ADM retrieve the edges for the brick that we're working on
        wbrick = np.where(bricktable["BRICKNAME"] == brickname)[0]
        ramin, ramax, decmin, decmax = np.array(bricktable[wbrick]["RA1",
                                                                   "RA2",
                                                                   "DEC1",
                                                                   "DEC2"])[0]

        # ADM populate the brick with random points, and retrieve the quantities
        # ADM of interest at those points.
        return get_quantities_in_a_brick(ramin,
                                         ramax,
                                         decmin,
                                         decmax,
                                         brickname,
                                         drdir,
                                         density=density,
                                         dustdir=dustdir)

    # ADM this is just to count bricks in _update_status
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        ''' wrapper function for the critical reduction operation,
            that occurs on the main parallel process '''
        if nbrick % 50 == 0 and nbrick > 0:
            rate = nbrick / (time() - t0)
            log.info('{}/{} bricks; {:.1f} bricks/sec'.format(
                nbrick, nbricks, rate))
            # ADM if we're going to exceed 4 hours, warn the user
            if nbricks / rate > 4 * 3600.:
                log.error(
                    "May take > 4 hours to run. Try running with bundlebricks instead."
                )

        nbrick[...] += 1  # this is an in-place modification
        return result

    # - Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            qinfo = pool.map(_get_quantities,
                             bricknames,
                             reduce=_update_status)
    else:
        qinfo = list()
        for brickname in bricknames:
            qinfo.append(_update_status(_get_quantities(brickname)))

    # ADM concatenate the randoms into a single long list and resolve whether
    # ADM they are officially in the north or the south.
    qinfo = np.concatenate(qinfo)
    qinfo = resolve(qinfo)

    # ADM one last shuffle to randomize across brick boundaries.
    np.random.seed(616)
    np.random.shuffle(qinfo)

    return qinfo

Example #18

Show file

def select_gfas(infiles,
                maglim=18,
                numproc=4,
                nside=None,
                pixlist=None,
                bundlefiles=None,
                extra=None,
                mindec=-30,
                mingalb=10,
                addurat=True):
    """Create a set of GFA locations using Gaia and matching to sweeps.

    Parameters
    ----------
    infiles : :class:`list` or `str`
        A list of input filenames (sweep files) OR a single filename.
    maglim : :class:`float`, optional, defaults to 18
        Magnitude limit for GFAs in Gaia G-band.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    nside : :class:`int`, optional, defaults to `None`
        (NESTED) HEALPix `nside` to use with `pixlist` and `bundlefiles`.
    pixlist : :class:`list` or `int`, optional, defaults to `None`
        Only return targets in a set of (NESTED) HEALpixels at the
        supplied `nside`. Useful for parallelizing.
    bundlefiles : :class:`int`, defaults to `None`
        If not `None`, then, instead of selecting gfas, print the slurm
        script to run in pixels at `nside`. Is an integer rather than
        a boolean for historical reasons.
    extra : :class:`str`, optional
        Extra command line flags to be passed to the executable lines in
        the output slurm script. Used in conjunction with `bundlefiles`.
    mindec : :class:`float`, optional, defaults to -30
        Minimum declination (o) for output sources that do NOT match
        an object in the passed `infiles`.
    mingalb : :class:`float`, optional, defaults to 10
        Closest latitude to Galactic plane for output sources that
        do NOT match an object in the passed `infiles` (e.g. send
        10 to limit to regions beyond -10o <= b < 10o)".
    addurat : :class:`bool`, optional, defaults to ``True``
        If ``True`` then substitute proper motions from the URAT
        catalog where Gaia is missing proper motions. Requires that
        the :envvar:`URAT_DIR` is set and points to data downloaded and
        formatted by, e.g., :func:`~desitarget.uratmatch.make_urat_files`.

    Returns
    -------
    :class:`~numpy.ndarray`
        GFA objects from Gaia with the passed geometric constraints
        limited to the passed maglim and matched to the passed input
        files, formatted according to `desitarget.gfa.gfadatamodel`.

    Notes
    -----
        - If numproc==1, use the serial code instead of parallel code.
        - If numproc > 4, then numproc=4 is enforced for (just those)
          parts of the code that are I/O limited.
    """
    # ADM the code can have memory issues for nside=2 with large numproc.
    if nside is not None and nside < 4 and numproc > 8:
        msg = 'Memory may be an issue near Plane for nside < 4 and numproc > 8'
        log.warning(msg)

    # ADM force to no more than numproc=4 for I/O limited processes.
    numproc4 = numproc
    if numproc4 > 4:
        log.info('Forcing numproc to 4 for I/O limited parts of code')
        numproc4 = 4

    # ADM convert a single file, if passed to a list of files.
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]

    # ADM check that files exist before proceeding.
    for filename in infiles:
        if not os.path.exists(filename):
            msg = "{} doesn't exist".format(filename)
            log.critical(msg)
            raise ValueError(msg)

    # ADM if the pixlist option was sent, we'll need to
    # ADM know which HEALPixels touch each file.
    if pixlist is not None:
        filesperpixel, _, _ = sweep_files_touch_hp(nside, pixlist, infiles)

    # ADM if the bundlefiles option was sent, call the packing code.
    if bundlefiles is not None:
        # ADM were files from one or two input directories passed?
        surveydirs = list(set([os.path.dirname(fn) for fn in infiles]))
        bundle_bricks([0],
                      bundlefiles,
                      nside,
                      gather=False,
                      prefix='gfas',
                      surveydirs=surveydirs,
                      extra=extra)
        return

    # ADM restrict to input files in a set of HEALPixels, if requested.
    if pixlist is not None:
        infiles = list(set(np.hstack([filesperpixel[pix] for pix in pixlist])))
        if len(infiles) == 0:
            log.info('ZERO sweep files in passed pixel list!!!')
        log.info("Processing files in (nside={}, pixel numbers={}) HEALPixels".
                 format(nside, pixlist))
    nfiles = len(infiles)

    # ADM a little more information if we're slurming across nodes.
    if os.getenv('SLURMD_NODENAME') is not None:
        log.info('Running on Node {}'.format(os.getenv('SLURMD_NODENAME')))

    # ADM the critical function to run on every file.
    def _get_gfas(fn):
        '''wrapper on gaia_gfas_from_sweep() given a file name'''
        return gaia_gfas_from_sweep(fn, maglim=maglim)

    # ADM this is just to count sweeps files in _update_status.
    t0 = time()
    nfile = np.zeros((), dtype='i8')

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 20 == 0 and nfile > 0:
            elapsed = (time() - t0) / 60.
            rate = nfile / elapsed / 60.
            log.info('{}/{} files; {:.1f} files/sec...t = {:.1f} mins'.format(
                nfile, nfiles, rate, elapsed))
        nfile[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process input files.
    if len(infiles) > 0:
        if numproc4 > 1:
            pool = sharedmem.MapReduce(np=numproc4)
            with pool:
                gfas = pool.map(_get_gfas, infiles, reduce=_update_status)
        else:
            gfas = list()
            for file in infiles:
                gfas.append(_update_status(_get_gfas(file)))
        gfas = np.concatenate(gfas)
        # ADM resolve any duplicates between imaging data releases.
        gfas = resolve(gfas)

    # ADM retrieve Gaia objects in the DESI footprint or passed tiles.
    log.info('Retrieving additional Gaia objects...t = {:.1f} mins'.format(
        (time() - t0) / 60))
    gaia = all_gaia_in_tiles(maglim=maglim,
                             numproc=numproc4,
                             allsky=True,
                             mindec=mindec,
                             mingalb=mingalb,
                             nside=nside,
                             pixlist=pixlist)

    # ADM remove any duplicates. Order is important here, as np.unique
    # ADM keeps the first occurence, and we want to retain sweeps
    # ADM information as much as possible.
    if len(infiles) > 0:
        gfas = np.concatenate([gfas, gaia])
        _, ind = np.unique(gfas["REF_ID"], return_index=True)
        gfas = gfas[ind]
    else:
        gfas = gaia

    # ADM for zero/NaN proper motion objects, add URAT proper motions.
    if addurat:
        ii = ((np.isnan(gfas["PMRA"]) | (gfas["PMRA"] == 0)) &
              (np.isnan(gfas["PMDEC"]) | (gfas["PMDEC"] == 0)))
        log.info(
            'Adding URAT for {} objects with no PMs...t = {:.1f} mins'.format(
                np.sum(ii), (time() - t0) / 60))
        urat = add_urat_pms(gfas[ii], numproc=numproc)
        log.info(
            'Found an additional {} URAT objects...t = {:.1f} mins'.format(
                np.sum(urat["URAT_ID"] != -1), (time() - t0) / 60))
        for col in "PMRA", "PMDEC", "URAT_ID", "URAT_SEP":
            gfas[col][ii] = urat[col]

    # ADM restrict to only GFAs in a set of HEALPixels, if requested.
    if pixlist is not None:
        ii = is_in_hp(gfas, nside, pixlist)
        gfas = gfas[ii]

    return gfas

Example #19

Show file

File: gfa.py Project: ShaunMCole/desitarget

def all_gaia_in_tiles(maglim=18,
                      numproc=4,
                      allsky=False,
                      tiles=None,
                      mindec=-30,
                      mingalb=10):
    """An array of all Gaia objects in the DESI tiling footprint

    Parameters
    ----------
    maglim : :class:`float`, optional, defaults to 18
        Magnitude limit for GFAs in Gaia G-band.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    allsky : :class:`bool`,  defaults to ``False``
        If ``True``, assume that the DESI tiling footprint is the
        entire sky (i.e. return *all* Gaia objects across the sky).
    tiles : :class:`~numpy.ndarray`, optional, defaults to ``None``
        Array of DESI tiles. If None, then load the entire footprint.
    mindec : :class:`float`, optional, defaults to -30
        Minimum declination (o) to include for output Gaia objects.
    mingalb : :class:`float`, optional, defaults to 10
        Closest latitude to Galactic plane for output Gaia objects
        (e.g. send 10 to limit to areas beyond -10o <= b < 10o).

    Returns
    -------
    :class:`~numpy.ndarray`
        Gaia objects within the passed geometric constraints brighter
        than `maglim`, formatted like `desitarget.gfa.gfadatamodel`.

    Notes
    -----
       - The environment variables $GAIA_DIR and $DESIMODEL must be set.
    """
    # ADM grab paths to Gaia files in the sky or the DESI footprint.
    if allsky:
        infilesbox = find_gaia_files_box([0, 360, mindec, 90])
        infilesgalb = find_gaia_files_beyond_gal_b(mingalb)
        infiles = list(set(infilesbox).intersection(set(infilesgalb)))
    else:
        infiles = find_gaia_files_tiles(tiles=tiles, neighbors=False)
    nfiles = len(infiles)

    # ADM the critical function to run on every file.
    def _get_gaia_gfas(fn):
        '''wrapper on gaia_in_file() given a file name'''
        return gaia_in_file(fn, maglim=maglim, mindec=mindec, mingalb=mingalb)

    # ADM this is just to count sweeps files in _update_status.
    nfile = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 1000 == 0 and nfile > 0:
            elapsed = (time() - t0) / 60.
            rate = nfile / elapsed / 60.
            log.info('{}/{} files; {:.1f} files/sec...t = {:.1f} mins'.format(
                nfile, nfiles, rate, elapsed))
        nfile[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process Gaia files.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            gfas = pool.map(_get_gaia_gfas, infiles, reduce=_update_status)
    else:
        gfas = list()
        for file in infiles:
            gfas.append(_update_status(_get_gaia_gfas(file)))

    gfas = np.concatenate(gfas)

    log.info('Retrieved {} Gaia objects...t = {:.1f} mins'.format(
        len(gfas), (time() - t0) / 60.))

    return gfas

Example #20

Show file

File: cuts.py Project: michaelJwilson/LBGCMB

def check_input_files(infiles, numproc=4, verbose=False):
    """
    Process input files in parallel to check whether they have
    any bugs that will prevent select_targets from completing,
    or whether files are corrupted.
    Useful to run before a full run of select_targets.

    Args:
        infiles: list of input filenames (tractor or sweep files),
            OR a single filename

    Optional:
        numproc: number of parallel processes
        verbose: if True, print progress messages

    Returns:
        Nothing, but prints any problematic files to screen
        together with information on the problem

    Notes:
        if numproc==1, use serial code instead of parallel
    """
    #- Convert single file to list of files
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]

    #- Sanity check that files exist before going further
    for filename in infiles:
        if not os.path.exists(filename):
            raise ValueError("{} doesn't exist".format(filename))

    #- function to run on every brick/sweep file
    def _check_input_files(filename):
        '''Check for corrupted values in a file'''
        from functools import partial
        from os.path import getsize

        #ADM read in Tractor or sweeps files
        objects = io.read_tractor(filename)
        #ADM if everything is OK the default meassage will be "OK"
        filemessageroot = 'OK'
        filemessageend = ''
        #ADM columns that shouldn't have zero values
        cols = [
            'BRICKID',
            #            'RA_IVAR', 'DEC_IVAR',
            'MW_TRANSMISSION_G',
            'MW_TRANSMISSION_R',
            'MW_TRANSMISSION_Z',
            #            'WISE_FLUX',
            #            'WISE_MW_TRANSMISSION','DCHISQ'
        ]
        #ADM for each of these columnes that shouldn't have zero values,
        #ADM loop through and look for zero values
        for colname in cols:
            if np.min(objects[colname]) == 0:
                filemessageroot = "WARNING...some values are zero for"
                filemessageend += " " + colname

        #ADM now, loop through entries in the file and search for 4096-byte
        #ADM blocks that are all zeros (a sign of corruption in file-writing)
        #ADM Note that fits files are padded by 2880 bytes, so we only want to
        #ADM process the file length (in bytes) - 2880
        bytestop = getsize(filename) - 2880

        with open(filename, 'rb') as f:
            for block_number, data in enumerate(
                    iter(partial(f.read, 4096), b'')):
                if not any(data):
                    if block_number * 4096 < bytestop:
                        filemessageroot = "WARNING...some values are zero for"
                        filemessageend += ' 4096-byte-block-#{0}'.format(
                            block_number)

        return [filename, filemessageroot + filemessageend]

    # Counter for number of bricks processed;
    # a numpy scalar allows updating nbrick in python 2
    # c.f https://www.python.org/dev/peps/pep-3104/
    nbrick = np.zeros((), dtype='i8')

    t0 = time()

    def _update_status(result):
        ''' wrapper function for the critical reduction operation,
            that occurs on the main parallel process '''
        if verbose and nbrick % 25 == 0 and nbrick > 0:
            elapsed = time() - t0
            rate = nbrick / elapsed
            print(
                '{} files; {:.1f} files/sec; {:.1f} total mins elapsed'.format(
                    nbrick, rate, elapsed / 60.))
        nbrick[...] += 1  # this is an in-place modification
        return result

    #- Parallel process input files
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            fileinfo = pool.map(_check_input_files,
                                infiles,
                                reduce=_update_status)
    else:
        fileinfo = list()
        for fil in infiles:
            fileinfo.append(_update_status(_check_input_files(fil)))

    fileinfo = np.array(fileinfo)
    w = np.where(fileinfo[..., 1] != 'OK')

    if len(w[0]) == 0:
        print('ALL FILES ARE OK')
    else:
        for fil in fileinfo[w]:
            print(fil[0], fil[1])

    return len(w[0])

Example #21

Show file

File: gfa.py Project: ShaunMCole/desitarget

def select_gfas(infiles,
                maglim=18,
                numproc=4,
                tilesfile=None,
                cmx=False,
                mindec=-30,
                mingalb=10,
                addurat=True):
    """Create a set of GFA locations using Gaia and matching to sweeps.

    Parameters
    ----------
    infiles : :class:`list` or `str`
        A list of input filenames (sweep files) OR a single filename.
    maglim : :class:`float`, optional, defaults to 18
        Magnitude limit for GFAs in Gaia G-band.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    tilesfile : :class:`str`, optional, defaults to ``None``
        Name of tiles file to load. For full details, see
        :func:`~desimodel.io.load_tiles`.
    cmx : :class:`bool`,  defaults to ``False``
        If ``True``, do not limit output to DESI tiling footprint.
        Used for selecting wider-ranging commissioning targets.
    mindec : :class:`float`, optional, defaults to -30
        Minimum declination (o) for output sources that do NOT match
        an object in the passed `infiles`.
    mingalb : :class:`float`, optional, defaults to 10
        Closest latitude to Galactic plane for output sources that
        do NOT match an object in the passed `infiles` (e.g. send
        10 to limit to regions beyond -10o <= b < 10o)".
    addurat : :class:`bool`, optional, defaults to ``True``
        If ``True`` then substitute proper motions from the URAT
        catalog where Gaia is missing proper motions. Requires that
        the :envvar:`URAT_DIR` is set and points to data downloaded and
        formatted by, e.g., :func:`~desitarget.uratmatch.make_urat_files`.

    Returns
    -------
    :class:`~numpy.ndarray`
        GFA objects from Gaia with the passed geometric constraints
        limited to the passed maglim and matched to the passed input
        files, formatted according to `desitarget.gfa.gfadatamodel`.

    Notes
    -----
        - If numproc==1, use the serial code instead of parallel code.
        - If numproc > 4, then numproc=4 is enforced for (just those)
          parts of the code that are I/O limited.
        - The tiles loaded from `tilesfile` will only be those in DESI.
          So, for custom tilings, set IN_DESI==1 in your tiles file.
    """
    # ADM force to no more than numproc=4 for I/O limited processes.
    numproc4 = numproc
    if numproc4 > 4:
        log.info('Forcing numproc to 4 for I/O limited parts of code')
        numproc4 = 4

    # ADM convert a single file, if passed to a list of files.
    if isinstance(infiles, str):
        infiles = [
            infiles,
        ]
    nfiles = len(infiles)

    # ADM check that files exist before proceeding.
    for filename in infiles:
        if not os.path.exists(filename):
            msg = "{} doesn't exist".format(filename)
            log.critical(msg)
            raise ValueError(msg)

    # ADM load the tiles file.
    tiles = desimodel.io.load_tiles(tilesfile=tilesfile)
    # ADM check some files loaded.
    if len(tiles) == 0:
        msg = "no tiles found in {}".format(tilesfile)
        log.critical(msg)
        raise ValueError(msg)

    # ADM the critical function to run on every file.
    def _get_gfas(fn):
        '''wrapper on gaia_gfas_from_sweep() given a file name'''
        return gaia_gfas_from_sweep(fn, maglim=maglim)

    # ADM this is just to count sweeps files in _update_status.
    t0 = time()
    nfile = np.zeros((), dtype='i8')

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 50 == 0 and nfile > 0:
            elapsed = (time() - t0) / 60.
            rate = nfile / elapsed / 60.
            log.info('{}/{} files; {:.1f} files/sec...t = {:.1f} mins'.format(
                nfile, nfiles, rate, elapsed))
        nfile[...] += 1  # this is an in-place modification.
        return result

    # - Parallel process input files.
    if numproc4 > 1:
        pool = sharedmem.MapReduce(np=numproc4)
        with pool:
            gfas = pool.map(_get_gfas, infiles, reduce=_update_status)
    else:
        gfas = list()
        for file in infiles:
            gfas.append(_update_status(_get_gfas(file)))

    gfas = np.concatenate(gfas)

    # ADM resolve any duplicates between imaging data releases.
    gfas = resolve(gfas)

    # ADM retrieve Gaia objects in the DESI footprint or passed tiles.
    log.info('Retrieving additional Gaia objects...t = {:.1f} mins'.format(
        (time() - t0) / 60))
    gaia = all_gaia_in_tiles(maglim=maglim,
                             numproc=numproc4,
                             allsky=cmx,
                             tiles=tiles,
                             mindec=mindec,
                             mingalb=mingalb)

    # ADM remove any duplicates. Order is important here, as np.unique
    # ADM keeps the first occurence, and we want to retain sweeps
    # ADM information as much as possible.
    gfas = np.concatenate([gfas, gaia])
    _, ind = np.unique(gfas["REF_ID"], return_index=True)
    gfas = gfas[ind]

    # ADM for zero/NaN proper motion objects, add in URAT proper motions.
    if addurat:
        ii = ((np.isnan(gfas["PMRA"]) | (gfas["PMRA"] == 0)) &
              (np.isnan(gfas["PMDEC"]) | (gfas["PMDEC"] == 0)))
        log.info(
            'Adding URAT for {} objects with no PMs...t = {:.1f} mins'.format(
                np.sum(ii), (time() - t0) / 60))
        urat = add_urat_pms(gfas[ii], numproc=numproc)
        log.info(
            'Found an additional {} URAT objects...t = {:.1f} mins'.format(
                np.sum(urat["URAT_ID"] != -1), (time() - t0) / 60))
        for col in "PMRA", "PMDEC", "URAT_ID", "URAT_SEP":
            gfas[col][ii] = urat[col]

    # ADM a final clean-up to remove columns that are NaN (from
    # ADM Gaia-matching) or that are exactly 0 (in the sweeps).
    ii = ((np.isnan(gfas["PMRA"]) | (gfas["PMRA"] == 0)) &
          (np.isnan(gfas["PMDEC"]) | (gfas["PMDEC"] == 0)))
    gfas = gfas[~ii]

    # ADM limit to DESI footprint or passed tiles, if not cmx'ing.
    if not cmx:
        ii = is_point_in_desi(tiles, gfas["RA"], gfas["DEC"])
        gfas = gfas[ii]

    return gfas

Example #22

Show file

def select_gfas(infiles, maglim=18, numproc=4, tilesfile=None, cmx=False):
    """Create a set of GFA locations using Gaia.

    Parameters
    ----------
    infiles : :class:`list` or `str`
        A list of input filenames (sweep files) OR a single filename.
    maglim : :class:`float`, optional, defaults to 18
        Magnitude limit for GFAs in Gaia G-band.
    numproc : :class:`int`, optional, defaults to 4
        The number of parallel processes to use.
    tilesfile : :class:`str`, optional, defaults to ``None``
        Name of tiles file to load. For full details, see
        :func:`~desimodel.io.load_tiles`.
    cmx : :class:`bool`,  defaults to ``False``
        If ``True``, do not limit output to DESI tiling footprint.
        Used for selecting wider-ranging commissioning targets.

    Returns
    -------
    :class:`~numpy.ndarray`
        GFA objects from Gaia across all of the passed input files, formatted
        according to `desitarget.gfa.gfadatamodel`.

    Notes
    -----
        - If numproc==1, use the serial code instead of the parallel code.
        - The tiles loaded from `tilesfile` will only be those in DESI.
          So, for custom tilings, set IN_DESI==1 in your tiles file.
    """
    # ADM convert a single file, if passed to a list of files.
    if isinstance(infiles, str):
        infiles = [infiles, ]
    nfiles = len(infiles)

    # ADM check that files exist before proceeding.
    for filename in infiles:
        if not os.path.exists(filename):
            msg = "{} doesn't exist".format(filename)
            log.critical(msg)
            raise ValueError(msg)

    # ADM load the tiles file.
    tiles = desimodel.io.load_tiles(tilesfile=tilesfile)
    # ADM check some files loaded.
    if len(tiles) == 0:
        msg = "no tiles found in {}".format(tilesfile)
        log.critical(msg)
        raise ValueError(msg)

    # ADM the critical function to run on every file.
    def _get_gfas(fn):
        '''wrapper on gaia_gfas_from_sweep() given a file name'''
        return gaia_gfas_from_sweep(fn, maglim=maglim)

    # ADM this is just to count sweeps files in _update_status.
    nfile = np.zeros((), dtype='i8')
    t0 = time()

    def _update_status(result):
        """wrapper function for the critical reduction operation,
        that occurs on the main parallel process"""
        if nfile % 50 == 0 and nfile > 0:
            elapsed = (time()-t0)/60.
            rate = nfile/elapsed/60.
            log.info('{}/{} files; {:.1f} files/sec...t = {:.1f} mins'
                     .format(nfile, nfiles, rate, elapsed))
        nfile[...] += 1    # this is an in-place modification.
        return result

    # - Parallel process input files.
    if numproc > 1:
        pool = sharedmem.MapReduce(np=numproc)
        with pool:
            gfas = pool.map(_get_gfas, infiles, reduce=_update_status)
    else:
        gfas = list()
        for file in infiles:
            gfas.append(_update_status(_get_gfas(file)))

    gfas = np.concatenate(gfas)

    # ADM resolve any duplicates between imaging data releases.
    gfas = resolve(gfas)

    # ADM retrieve Gaia objects in the DESI footprint or passed tiles.
    log.info('Retrieving additional Gaia objects...t = {:.1f} mins'
             .format((time()-t0)/60))
    gaia = all_gaia_in_tiles(maglim=maglim, numproc=numproc, allsky=cmx,
                             tiles=tiles)
    # ADM and limit them to just any missing bricks...
    brickids = set(gfas['BRICKID'])
    ii = [gbrickid not in brickids for gbrickid in gaia["BRICKID"]]
    gaia = gaia[ii]

    gfas = np.concatenate([gfas, gaia])
    # ADM limit to DESI footprint or passed tiles, if not cmx'ing.
    if not cmx:
        ii = is_point_in_desi(tiles, gfas["RA"], gfas["DEC"])
        gfas = gfas[ii]

    return gfas