예제 #1
0
def read_psf(filename):
    """
    Read GaussHermite PSF data from input filename
    
    Returns a dictionary of astropy Tables from the input PSF FITS file
    with keys XTRACE, YTRACE, PSF to match input file HDU EXTNAMEs
    """
    psfdata = dict()
    psfdata['PSF'] = Table.read(filename, 'PSF')

    log = get_logger()

    if 'PSFERR' not in psfdata['PSF'].meta:
        default_psferr = 0.01
        log.debug(f'PSFERR not found in PSF meta. Setting to {default_psferr}')
        psfdata['PSF'].meta['PSFERR'] = default_psferr

    with fitsio.FITS(filename, 'r') as fx:
        for extname in ('XTRACE', 'YTRACE'):
            data = fx[extname].read()
            hdr = fx[extname].read_header()
            t = Table()
            t[extname[0]] = data
            for key in ('WAVEMIN', 'WAVEMAX'):
                t.meta[key] = hdr[key]
            psfdata[extname] = t

    return psfdata
예제 #2
0
def extract_frame(img,
                  psf,
                  bundlesize,
                  specmin,
                  nspec,
                  wavelength=None,
                  nwavestep=50,
                  nsubbundles=1,
                  comm=None,
                  rank=0,
                  size=1,
                  gpu=None,
                  loglevel=None):
    """
    Extract 1D spectra from 2D image.

    Args:
        img: dictionary image object (see gpu_specter.io.read_img)
        psf: dictionary psf object (see gpu_specter.io.read_psf)
        bundlesize: fixed number of spectra per bundle (25 for DESI)
        specmin: index of first spectrum to extract
        nspec: number of spectra to extract

    Options:
        wavelength: wavelength range to extract, formatted as 'wmin,wmax,dw'
        nwavestep: number of wavelength bins per patch
        nsubbundles: number of spectra per patch
        comm: mpi communicator (no mpi: None)
        rank: integer process identifier (no mpi: 0)
        size: number of mpi processes (no mpi: 1)
        gpu: use GPU for extraction (not yet implemented)
        loglevel: log print level

    Returns:
        frame: dictionary frame object (see gpu_specter.io.write_frame)
    """

    timer = Timer()

    log = get_logger(loglevel)

    #- Determine MPI communication strategy based on number of gpu devices and MPI ranks
    if gpu:
        import cupy as cp
        #- TODO: specify number of gpus to use?
        device_count = cp.cuda.runtime.getDeviceCount()
        assert size % device_count == 0, 'Number of MPI ranks must be divisible by number of GPUs'
        device_id = rank % device_count
        cp.cuda.Device(device_id).use()

        #- Divide mpi ranks evenly among gpus
        device_size = size // device_count
        bundle_rank = rank // device_count

        if device_count > 1:
            #- Multi gpu, MPI communication needs to happen at frame level
            frame_comm = comm.Split(color=bundle_rank, key=device_id)
            if device_size > 1:
                #- If multiple ranks per gpu, also need to communicate at bundle level
                bundle_comm = comm.Split(color=device_id, key=bundle_rank)
            else:
                #- If only one rank per gpu, don't need bundle level communication
                bundle_comm = None
        else:
            #- Single gpu, only do MPI communication at bundle level
            frame_comm = None
            bundle_comm = comm
    else:
        #- No gpu, do MPI communication at bundle level
        frame_comm = None
        bundle_comm = comm

    timer.split('init')

    imgpixels = imgivar = None
    if rank == 0:
        imgpixels = img['image']
        imgivar = img['ivar']

    #- If using MPI, broadcast image, ivar, and psf to all ranks
    if comm is not None:
        if rank == 0:
            log.info('Broadcasting inputs to other MPI ranks')
        imgpixels = comm.bcast(imgpixels, root=0)
        imgivar = comm.bcast(imgivar, root=0)
        psf = comm.bcast(psf, root=0)

    #- If using GPU, move image and ivar to device
    #- TODO: is there a way for ranks to share a pointer to device memory?
    if gpu:
        cp.cuda.nvtx.RangePush('copy imgpixels, imgivar to device')
        device_id = cp.cuda.runtime.getDevice()
        log.info(f'Rank {rank}: Moving image data to device {device_id}')
        imgpixels = cp.asarray(imgpixels)
        imgivar = cp.asarray(imgivar)
        cp.cuda.nvtx.RangePop()

    timer.split('distributed data')

    if wavelength is not None:
        wmin, wmax, dw = map(float, wavelength.split(','))
    else:
        wmin, wmax = psf['PSF'].meta['WAVEMIN'], psf['PSF'].meta['WAVEMAX']
        dw = 0.8

    if rank == 0:
        log.info(f'Extracting wavelengths {wmin},{wmax},{dw}')

    #- TODO: calculate this instead of hardcoding it
    wavepad = 10

    #- Wavelength range that we want to extract
    wave = np.arange(wmin, wmax + 0.5 * dw, dw)
    nwave = len(wave)

    #- Pad that with buffer wavelengths to extract and discard, including an
    #- extra args.nwavestep bins to allow coverage for a final partial bin
    wavelo = np.arange(wavepad) * dw
    wavelo -= (np.max(wavelo) + dw)
    wavelo += wmin
    wavehi = wave[-1] + (1.0 + np.arange(wavepad + nwavestep)) * dw

    fullwave = np.concatenate((wavelo, wave, wavehi))
    assert np.allclose(np.diff(fullwave), dw)

    #- TODO: barycentric wavelength corrections

    #- Work bundle by bundle
    if frame_comm is None:
        bundle_start = 0
        bundle_step = 1
    else:
        bundle_start = device_id
        bundle_step = device_count
    bspecmins = list(range(specmin, specmin + nspec, bundlesize))
    bundles = list()
    for bspecmin in bspecmins[bundle_start::bundle_step]:
        log.info(
            f'Rank {rank}: Extracting spectra [{bspecmin}:{bspecmin+bundlesize}]'
        )
        sys.stdout.flush()
        if gpu:
            cp.cuda.nvtx.RangePush('extract_bundle')
        bundle = extract_bundle(imgpixels,
                                imgivar,
                                psf,
                                wave,
                                fullwave,
                                bspecmin,
                                bundlesize=bundlesize,
                                nsubbundles=nsubbundles,
                                nwavestep=nwavestep,
                                wavepad=wavepad,
                                comm=bundle_comm,
                                gpu=gpu)
        if gpu:
            cp.cuda.nvtx.RangePop()
        bundles.append((bspecmin, bundle))

        #- for good measure, have other ranks wait for rank 0
        if bundle_comm is not None:
            bundle_comm.barrier()

    timer.split('extracted bundles')

    if frame_comm is not None:
        # gather results from multiple mpi groups
        if bundle_rank == 0:
            bspecmins, bundles = zip(*bundles)
            flux, ivar, resolution = zip(*bundles)
            bspecmins = frame_comm.gather(bspecmins, root=0)
            flux = gather_ndarray(flux, frame_comm)
            ivar = gather_ndarray(ivar, frame_comm)
            resolution = gather_ndarray(resolution, frame_comm)
            if rank == 0:
                bspecmin = [
                    bspecmin for rankbspecmins in bspecmins
                    for bspecmin in rankbspecmins
                ]
                rankbundles = [
                    list(zip(bspecmin, zip(flux, ivar, resolution))),
                ]
    else:
        # no mpi or single group with all ranks
        rankbundles = [
            bundles,
        ]

    timer.split('collected data')

    #- Finalize and write output
    frame = None
    if rank == 0:

        #- flatten list of lists into single list
        allbundles = list()
        for rb in rankbundles:
            allbundles.extend(rb)

        allbundles.sort(key=lambda x: x[0])

        specflux = np.vstack([b[1][0] for b in allbundles])
        specivar = np.vstack([b[1][1] for b in allbundles])
        Rdiags = np.vstack([b[1][2] for b in allbundles])

        timer.split(f'combined data')

        #- Convert flux to photons/A instead of photons/bin
        dwave = np.gradient(wave)
        specflux /= dwave
        specivar *= dwave**2

        #- TODO: specmask and chi2pix
        specmask = (specivar == 0).astype(np.int)
        chi2pix = np.ones(specflux.shape)

        frame = dict(
            specflux=specflux,
            specivar=specivar,
            specmask=specmask,
            wave=wave,
            Rdiags=Rdiags,
            chi2pix=np.ones(specflux.shape),
            imagehdr=img['imagehdr'],
            fibermap=img['fibermap'],
            fibermaphdr=img['fibermaphdr'],
        )

        timer.split(f'finished frame')
        timer.log_splits(log)

    return frame
예제 #3
0
def extract_bundle(image,
                   imageivar,
                   psf,
                   wave,
                   fullwave,
                   bspecmin,
                   bundlesize=25,
                   nsubbundles=1,
                   nwavestep=50,
                   wavepad=10,
                   comm=None,
                   gpu=None,
                   loglevel=None):
    """
    Extract 1D spectra from a single bundle of a 2D image.

    Args:
        image: full 2D array of image pixels
        imageivar: full 2D array of inverse variance for the image
        psf: dictionary psf object (see gpu_specter.io.read_psf)
        wave: 1D array of wavelengths to extract
        fullwave: Padded 1D array of wavelengths to extract
        bspecmin: index of the first spectrum in the bundle

    Options:
        bundlesize: fixed number of spectra per bundle (25 for DESI)
        nsubbundles: number of spectra per patch
        nwavestep: number of wavelength bins per patch
        wavepad: number of wavelengths bins to add on each end of patch for extraction
        comm: mpi communicator (no mpi: None)
        rank: integer process identifier (no mpi: 0)
        size: number of mpi processes (no mpi: 1)
        gpu: use GPU for extraction (not yet implemented)
        loglevel: log print level

    Returns:
        bundle: (flux, ivar, R) tuple

    """
    timer = Timer()

    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    log = get_logger(loglevel)

    #- Extracting on CPU or GPU?
    if gpu:
        from gpu_specter.extract.gpu import \
                get_spots, ex2d_padded
    else:
        from gpu_specter.extract.cpu import \
                get_spots, ex2d_padded

    nwave = len(wave)
    ndiag = psf['PSF'].meta['HSIZEY']

    timer.split('init')

    #- Cache PSF spots for all wavelengths for spectra in this bundle
    if gpu:
        cp.cuda.nvtx.RangePush('get_spots')
    spots, corners = get_spots(bspecmin, bundlesize, fullwave, psf)
    if gpu:
        cp.cuda.nvtx.RangePop()

    timer.split('spots/corners')

    #- Size of the individual spots
    spot_nx, spot_ny = spots.shape[2:4]

    #- Organize what sub-bundle patches to extract
    patches = list()
    nspectra_per_patch = bundlesize // nsubbundles
    for ispec in range(bspecmin, bspecmin + bundlesize, nspectra_per_patch):
        for iwave in range(wavepad, wavepad + nwave, nwavestep):
            patch = Patch(ispec, iwave, bspecmin, nspectra_per_patch,
                          nwavestep, wavepad, nwave, bundlesize, ndiag)
            patches.append(patch)

    if rank == 0:
        log.info(f'Dividing {len(patches)} patches between {size} ranks')

    timer.split('organize patches')

    #- place to keep extraction patch results before assembling in rank 0
    results = list()
    for patch in patches[rank::size]:

        log.debug(f'rank={rank}, ispec={patch.ispec}, iwave={patch.iwave}')

        #- Always extract the same patch size (more efficient for GPU
        #- memory transfer) then decide post-facto whether to keep it all

        if gpu:
            cp.cuda.nvtx.RangePush('ex2d_padded')

        result = ex2d_padded(image,
                             imageivar,
                             patch.ispec - bspecmin,
                             patch.nspectra_per_patch,
                             patch.iwave,
                             patch.nwavestep,
                             spots,
                             corners,
                             wavepad=patch.wavepad,
                             bundlesize=bundlesize)
        if gpu:
            cp.cuda.nvtx.RangePop()

        results.append((patch, result))

    timer.split('extracted patches')

    if comm is not None:
        if gpu:
            # If we have gpu and an MPI comm for this bundle, transfer data
            # back to host before assembling the patches
            patches = []
            flux = []
            fluxivar = []
            resolution = []
            for patch, results in results:
                patches.append(patch)
                flux.append(results['flux'])
                fluxivar.append(results['ivar'])
                resolution.append(results['Rdiags'])

            # transfer to host in 3 chunks
            cp.cuda.nvtx.RangePush('copy bundle results to host')
            device_id = cp.cuda.runtime.getDevice()
            log.info(
                f'Rank {rank}: Moving bundle {bspecmin} patches to host from device {device_id}'
            )
            flux = cp.asnumpy(cp.array(flux, dtype=cp.float64))
            fluxivar = cp.asnumpy(cp.array(fluxivar, dtype=cp.float64))
            resolution = cp.asnumpy(cp.array(resolution, dtype=cp.float64))
            cp.cuda.nvtx.RangePop()

            # gather to root MPI rank
            patches = comm.gather(patches, root=0)
            flux = gather_ndarray(flux, comm, root=0)
            fluxivar = gather_ndarray(fluxivar, comm, root=0)
            resolution = gather_ndarray(resolution, comm, root=0)

            if rank == 0:
                # unpack patches
                patches = [
                    patch for rankpatches in patches for patch in rankpatches
                ]
                # repack everything
                rankresults = [
                    zip(
                        patches,
                        map(lambda x: dict(flux=x[0], ivar=x[1], Rdiags=x[2]),
                            zip(flux, fluxivar, resolution)))
                ]
        else:
            rankresults = comm.gather(results, root=0)
    else:
        # this is fine for GPU w/out MPI comm
        rankresults = [
            results,
        ]

    timer.split('gathered patches')

    bundle = None
    if rank == 0:
        if gpu:
            cp.cuda.nvtx.RangePush('assemble patches on device')
            device_id = cp.cuda.runtime.getDevice()
            log.info(
                f'Rank {rank}: Assembling bundle {bspecmin} patches on device {device_id}'
            )
        bundle = assemble_bundle_patches(rankresults)
        if gpu:
            cp.cuda.nvtx.RangePop()
            if comm is None:
                cp.cuda.nvtx.RangePush('copy bundle results to host')
                device_id = cp.cuda.runtime.getDevice()
                log.info(
                    f'Rank {rank}: Moving bundle {bspecmin} to host from device {device_id}'
                )
                bundle = tuple(cp.asnumpy(x) for x in bundle)
                cp.cuda.nvtx.RangePop()
        timer.split('assembled patches')
        timer.log_splits(log)
    return bundle
예제 #4
0
def extract_frame(img,
                  psf,
                  bundlesize,
                  specmin,
                  nspec,
                  wavelength=None,
                  nwavestep=50,
                  nsubbundles=1,
                  model=None,
                  regularize=0,
                  psferr=None,
                  comm=None,
                  gpu=None,
                  loglevel=None,
                  timing=None,
                  wavepad=10,
                  pixpad_frac=0.8,
                  wavepad_frac=0.2,
                  batch_subbundle=True,
                  ranks_per_bundle=None):
    """
    Extract 1D spectra from 2D image.

    Args:
        img: dictionary image object (see gpu_specter.io.read_img)
        psf: dictionary psf object (see gpu_specter.io.read_psf)
        bundlesize: fixed number of spectra per bundle (25 for DESI)
        specmin: index of first spectrum to extract
        nspec: number of spectra to extract

    Options:
        wavelength: wavelength range to extract, formatted as 'wmin,wmax,dw'
        nwavestep: number of wavelength bins per patch
        nsubbundles: number of spectra per patch
        model: indicate whether or not to compute the image model
        regularize: regularization parameter
        psferr: scale factor to use for psf in chi2
        comm: mpi communicator (no mpi: None)
        gpu: use GPU for extraction
        loglevel: log print level
        timing: dictionary to return timing splits
        wavepad: number of wavelength bins to pad extraction with (must be greater than
            spotsize)
        pixpad_frac: fraction of a PSF spotsize to pad in pixels when extracting
        wavepad_frac: fraction of a PSF spotsize to pad in wavelengths when extracting
        batch_subbundle: perform extraction in subbundle batch of patches (GPU-only)
        ranks_per_bundle: number of mpi ranks per bundle comm

    Returns:
        frame: dictionary frame object (see gpu_specter.io.write_frame)
    """

    timer = Timer()
    time_start = time.time()

    log = get_logger(loglevel)

    if comm is None:
        rank, size = 0, 1
    else:
        rank, size = comm.rank, comm.size

    #- Disable batch subbundle for CPU extraction
    if not gpu:
        batch_subbundle = False

    #- Batch subbundle extraction constrains the number of MPI ranks per bundle
    if batch_subbundle:
        #- Default to one MPI rank per bundle
        if ranks_per_bundle is None:
            ranks_per_bundle = 1
        assert ranks_per_bundle <= nsubbundles, 'ranks_per_bundle should be <= nsubbundles'
        assert nsubbundles % ranks_per_bundle == 0, 'ranks_per_bundle should evenly divide nsubbundles'

    bundle_comm, frame_comm = decompose_comm(comm, gpu, ranks_per_bundle)

    bundle_rank = 0 if bundle_comm is None else bundle_comm.rank
    bundle_size = 1 if bundle_comm is None else bundle_comm.size
    frame_rank = 0 if frame_comm is None else frame_comm.rank
    frame_size = 1 if frame_comm is None else frame_comm.size

    if rank == 0:
        log.info(f'Using GPU: {gpu}')
        log.info(f'Using batch subbundle extraction: {batch_subbundle}')
        log.info(f'Size of frame MPI comm: {frame_size}')
        log.info(f'Size of bundle MPI comm: {bundle_size}')

    #- MPI rank to bundle/frame comm mapping
    log.debug(
        f'{rank=} {frame_rank=}/{frame_size=} {bundle_rank=}/{bundle_size=}')

    timer.split('init-mpi-comm')
    time_init_mpi_comm = time.time()

    imgpixels = imgivar = None
    if rank == 0:
        imgpixels = img['image']
        imgivar = img['ivar']

    #- If using MPI, broadcast image, ivar, and psf to all ranks
    if comm is not None:
        # cp.cuda.nvtx.RangePush('mpi bcast')
        if rank == 0:
            log.info('Broadcasting inputs to other MPI ranks')

        if gpu:
            empty = cp.empty
        else:
            empty = np.empty

        # cp.cuda.nvtx.RangePush('shape')
        if rank == 0:
            shape = imgpixels.shape
        else:
            shape = None
        shape = comm.bcast(shape, root=0)
        if rank > 0:
            imgpixels = empty(shape, dtype='f8')
            imgivar = empty(shape, dtype='f8')
        # cp.cuda.nvtx.RangePop() # shape

        # cp.cuda.nvtx.RangePush('imgpixels')
        comm.Bcast(imgpixels, root=0)
        # imgpixels = comm.bcast(imgpixels, root=0)
        # cp.cuda.nvtx.RangePop() # imgpixels

        # cp.cuda.nvtx.RangePush('imgivar')
        comm.Bcast(imgivar, root=0)
        # imgivar = comm.bcast(imgivar, root=0)
        # cp.cuda.nvtx.RangePop() # imgivar

        # cp.cuda.nvtx.RangePush('psf')
        psf = comm.bcast(psf, root=0)
        # cp.cuda.nvtx.RangePop() # psf
        # cp.cuda.nvtx.RangePop() # mpi bcast

    timer.split('mpi-bcast-raw')
    time_mpi_bcast_raw = time.time()

    #- If using GPU, move image and ivar to device
    #- TODO: is there a way for ranks to share a pointer to device memory?
    if gpu:
        cp.cuda.nvtx.RangePush('copy imgpixels, imgivar to device')
        device_id = cp.cuda.runtime.getDevice()
        log.debug(f'Rank {rank}: Moving image data to device {device_id}')
        imgpixels = cp.asarray(imgpixels)
        imgivar = cp.asarray(imgivar)
        cp.cuda.nvtx.RangePop()

        timer.split('host-to-device-raw')
    time_host_to_device_raw = time.time()

    if isinstance(wavelength, np.ndarray):
        wave = wavelength
        wmin, wmax = wave[0], wave[-1]
        dw = np.gradient(wave)[0]
    else:
        if isinstance(wavelength, str):
            wmin, wmax, dw = map(float, wavelength.split(','))
        elif isinstance(wavelength, tuple):
            wmin, wmax, dw = wavelength
        else:
            wmin, wmax = psf['PSF'].meta['WAVEMIN'], psf['PSF'].meta['WAVEMAX']
            dw = 0.8
        wave = np.arange(wmin, wmax + 0.5 * dw, dw)

    #- Wavelength range that we want to extract
    if rank == 0:
        log.info(f'Extracting wavelengths {wmin},{wmax},{dw}')

    #- Pad that with buffer wavelengths to extract and discard, including an
    #- extra args.nwavestep bins to allow coverage for a final partial bin

    #- TODO: calculate initial wavepad from psf spotsize instead of using parameter
    wavepad += int(wavepad * wavepad_frac)

    if rank == 0:
        log.info(f'Padding patches with {wavepad} wave bins on both ends')

    wavelo = np.arange(wavepad) * dw
    wavelo -= (np.max(wavelo) + dw)
    wavelo += wmin
    wavehi = wave[-1] + (1.0 + np.arange(wavepad + nwavestep)) * dw

    fullwave = np.concatenate((wavelo, wave, wavehi))
    assert np.allclose(np.diff(fullwave), dw)

    bspecmins = list(range(specmin, specmin + nspec, bundlesize))
    bundles = list()
    for bspecmin in bspecmins[frame_rank::frame_size]:
        # log.info(f'Rank {rank}: Extracting spectra [{bspecmin}:{bspecmin+bundlesize}]')
        # sys.stdout.flush()
        if gpu:
            cp.cuda.nvtx.RangePush('extract_bundle')
        bundle = extract_bundle(
            imgpixels,
            imgivar,
            psf,
            wave,
            fullwave,
            bspecmin,
            bundlesize=bundlesize,
            nsubbundles=nsubbundles,
            batch_subbundle=batch_subbundle,
            nwavestep=nwavestep,
            wavepad=wavepad,
            comm=bundle_comm,
            gpu=gpu,
            loglevel=loglevel,
            model=model,
            regularize=regularize,
            psferr=psferr,
            pixpad_frac=pixpad_frac,
        )
        if gpu:
            cp.cuda.nvtx.RangePop()
        bundles.append((bspecmin, bundle))

        #- for good measure, have other ranks wait for rank 0
        if bundle_comm is not None:
            bundle_comm.barrier()

    timer.split('extracted-bundles')
    time_extracted_bundles = time.time()

    # cp.cuda.nvtx.RangePush('mpi gather')
    if frame_comm is not None:
        # gather results from multiple mpi groups
        if bundle_comm is None or bundle_comm.rank == 0:
            bspecmins, bundles = zip(*bundles)
            flux, ivar, resolution, pixmask_fraction, chi2pix, modelimage, xyslice = zip(
                *bundles)
            bspecmins = frame_comm.gather(bspecmins, root=0)
            xyslice = frame_comm.gather(xyslice, root=0)
            flux = gather_ndarray(flux, frame_comm)
            ivar = gather_ndarray(ivar, frame_comm)
            resolution = gather_ndarray(resolution, frame_comm)
            pixmask_fraction = gather_ndarray(pixmask_fraction, frame_comm)
            chi2pix = gather_ndarray(chi2pix, frame_comm)
            modelimage = frame_comm.gather(modelimage, root=0)
            if rank == 0:
                bspecmin = [
                    bspecmin for rankbspecmins in bspecmins
                    for bspecmin in rankbspecmins
                ]
                modelimage = [m for _ in modelimage for m in _]
                mxy = [xy for rankxyslice in xyslice for xy in rankxyslice]
                rankbundles = [
                    list(
                        zip(
                            bspecmin,
                            zip(flux, ivar, resolution, pixmask_fraction,
                                chi2pix, modelimage, mxy))),
                ]
    else:
        # no mpi or single group with all ranks
        rankbundles = [
            bundles,
        ]
    # cp.cuda.nvtx.RangePop() # mpi gather

    timer.split('staged-bundles')
    time_staged_bundles = time.time()

    #- Finalize and write output
    frame = None
    # cp.cuda.nvtx.RangePush('finalize output')
    if rank == 0:

        #- flatten list of lists into single list
        allbundles = list()
        for rb in rankbundles:
            allbundles.extend(rb)

        allbundles.sort(key=lambda x: x[0])

        specflux = np.vstack([b[1][0] for b in allbundles])
        specivar = np.vstack([b[1][1] for b in allbundles])
        Rdiags = np.vstack([b[1][2] for b in allbundles])
        pixmask_fraction = np.vstack([b[1][3] for b in allbundles])
        chi2pix = np.vstack([b[1][4] for b in allbundles])

        if model:
            modelimage = np.zeros(imgpixels.shape)
            for b in allbundles:
                bundleimage = b[1][5]
                xyslice = b[1][6]
                modelimage[xyslice] += bundleimage
        else:
            modelimage = None

        timer.split(f'merged-bundles')
        time_merged_bundles = time.time()

        #- Convert flux to photons/A instead of photons/bin
        dwave = np.gradient(wave)
        specflux /= dwave
        specivar *= dwave**2

        #- TODO: specmask and chi2pix
        # mask = np.zeros(flux.shape, dtype=np.uint32)
        # mask[results['pixmask_fraction']>0.5] |= specmask.SOMEBADPIX
        # mask[results['pixmask_fraction']==1.0] |= specmask.ALLBADPIX
        # mask[chi2pix>100.0] |= specmask.BAD2DFIT
        specmask = (specivar == 0).astype(int)

        frame = dict(
            specflux=specflux,
            specivar=specivar,
            specmask=specmask,
            wave=wave,
            Rdiags=Rdiags,
            pixmask_fraction=pixmask_fraction,
            chi2pix=chi2pix,
            modelimage=modelimage,
        )

        timer.split(f'assembled-frame')
        time_assembled_frame = time.time()
        timer.log_splits(log)
    else:
        time_merged_bundles = time.time()
        time_assembled_frame = time.time()
    # cp.cuda.nvtx.RangePop() # finalize output

    if isinstance(timing, dict):
        timing['init-mpi-comm'] = time_init_mpi_comm
        timing['init-mpi-bcast'] = time_mpi_bcast_raw
        timing['host-to-device'] = time_host_to_device_raw
        timing['extracted-bundles'] = time_extracted_bundles
        timing['staged-bundles'] = time_staged_bundles
        timing['merged-bundles'] = time_merged_bundles
        timing['assembled-frame'] = time_assembled_frame

    return frame
예제 #5
0
def extract_bundle(image,
                   imageivar,
                   psf,
                   wave,
                   fullwave,
                   bspecmin,
                   bundlesize=25,
                   nsubbundles=1,
                   batch_subbundle=False,
                   nwavestep=50,
                   wavepad=10,
                   comm=None,
                   gpu=None,
                   loglevel=None,
                   model=None,
                   regularize=0,
                   psferr=None,
                   pixpad_frac=0):
    """
    Extract 1D spectra from a single bundle of a 2D image.

    Args:
        image: full 2D array of image pixels
        imageivar: full 2D array of inverse variance for the image
        psf: dictionary psf object (see gpu_specter.io.read_psf)
        wave: 1D array of wavelengths to extract
        fullwave: Padded 1D array of wavelengths to extract
        bspecmin: index of the first spectrum in the bundle

    Options:
        bundlesize: fixed number of spectra per bundle (25 for DESI)
        nsubbundles: number of spectra per patch
        batch_subbundles: whether or not to use batch subbundle extraction
        nwavestep: number of wavelength bins per patch
        wavepad: number of wavelengths bins to add on each end of patch for extraction
        comm: mpi communicator (no mpi: None)
        gpu: use GPU for extraction (not yet implemented)
        loglevel: log print level
        model: indicate whether or not to compute the image model
        regularize: regularization parameter
        psferr: scale factor to use for psf in chi2
        pixpad_frac: fraction of padded pixels to use in extraction

    Returns:
        bundle: (flux, ivar, resolution, pixmask_fraction, chi2pix, modelimage, xyslice) tuple

    """
    timer = Timer()

    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    log = get_logger(loglevel)

    #- Extracting on CPU or GPU?
    if gpu:
        from gpu_specter.extract.gpu import (get_spots, ex2d_padded,
                                             ex2d_subbundle)
    else:
        from gpu_specter.extract.cpu import (get_spots, ex2d_padded)

    nwave = len(wave)
    ndiag = psf['PSF'].meta['HSIZEY']

    timer.split('init')

    #- Cache PSF spots for all wavelengths for spectra in this bundle
    if gpu:
        cp.cuda.nvtx.RangePush('get_spots')
    spots, corners, psfparams = get_spots(bspecmin, bundlesize, fullwave, psf)
    if gpu:
        cp.cuda.nvtx.RangePop()
    if psferr is None:
        psferr = psf['PSF'].meta['PSFERR']

    timer.split('spots/corners')

    #- Size of the individual spots
    spot_nx, spot_ny = spots.shape[2:4]

    #- Organize what sub-bundle patches to extract
    subbundles = list()
    nspectra_per_patch = bundlesize // nsubbundles
    for ispec in range(bspecmin, bspecmin + bundlesize, nspectra_per_patch):
        patches = list()
        for iwave in range(wavepad, wavepad + nwave, nwavestep):
            patch = Patch(ispec, iwave, bspecmin, nspectra_per_patch,
                          nwavestep, wavepad, nwave, bundlesize, ndiag)
            patches.append(patch)
        subbundles.append(patches)

    timer.split('organize patches')

    #- place to keep extraction patch results before assembling in rank 0
    results = list()

    if gpu and batch_subbundle:
        for subbundle in subbundles[rank::size]:
            result = ex2d_subbundle(image, imageivar, subbundle, spots,
                                    corners, pixpad_frac, regularize, model,
                                    psferr)
            results += result
    else:
        patches = [patch for subbundle in subbundles for patch in subbundle]
        for patch in patches[rank::size]:
            try:
                result = ex2d_padded(image, imageivar, patch, spots, corners,
                                     pixpad_frac, regularize, model, psferr)
            except RuntimeError:
                if regularize == 0:
                    #- Add a smidgen of regularization and to try to power through...
                    regularize = 1e-4
                    log.warning(
                        f'Error extracting patch ({patch.ispec}, {patch.iwave}) extraction, retrying with regularize={regularize}'
                    )
                    result = ex2d_padded(image, imageivar, patch, spots,
                                         corners, pixpad_frac, regularize,
                                         model, psferr)
                else:
                    raise
            patch.xyslice = result['xyslice']
            results.append((patch, result))

    timer.split('extracted patches')

    bundle = None
    if comm is not None:
        if gpu:
            # If we have gpu and an MPI comm for this bundle, transfer data
            # back to host before assembling the patches
            patches = []
            flux = []
            fluxivar = []
            resolution = []
            pixmask_fraction = []
            chi2pix = []
            modelimage = []
            for patch, result in results:
                patches.append(patch)
                flux.append(result['flux'])
                fluxivar.append(result['ivar'])
                resolution.append(result['Rdiags'])
                pixmask_fraction.append(result['pixmask_fraction'])
                chi2pix.append(result['chi2pix'])
                modelimage.append(cp.asnumpy(result['modelimage']))

            # transfer to host in chunks
            cp.cuda.nvtx.RangePush('copy bundle results to host')
            device_id = cp.cuda.runtime.getDevice()
            log.debug(
                f'Rank {rank}: Moving bundle {bspecmin} patches to host from device {device_id}'
            )
            flux = cp.asnumpy(cp.array(flux, dtype=cp.float64))
            fluxivar = cp.asnumpy(cp.array(fluxivar, dtype=cp.float64))
            resolution = cp.asnumpy(cp.array(resolution, dtype=cp.float64))
            pixmask_fraction = cp.asnumpy(
                cp.array(pixmask_fraction, dtype=cp.float64))
            chi2pix = cp.asnumpy(cp.array(chi2pix, dtype=cp.float64))
            cp.cuda.nvtx.RangePop()

            # gather to root MPI rank
            patches = comm.gather(patches, root=0)
            flux = gather_ndarray(flux, comm, root=0)
            fluxivar = gather_ndarray(fluxivar, comm, root=0)
            resolution = gather_ndarray(resolution, comm, root=0)
            pixmask_fraction = gather_ndarray(pixmask_fraction, comm, root=0)
            chi2pix = gather_ndarray(chi2pix, comm, root=0)
            modelimage = comm.gather(modelimage, root=0)

            if rank == 0:
                # unpack patches
                patches = [
                    patch for rankpatches in patches for patch in rankpatches
                ]
                modelimage = [m for _ in modelimage for m in _]

                # repack everything
                rankresults = [
                    zip(
                        patches,
                        map(
                            lambda x: dict(flux=x[0],
                                           ivar=x[1],
                                           Rdiags=x[2],
                                           pixmask_fraction=x[3],
                                           chi2pix=x[4],
                                           modelimage=x[5]),
                            zip(flux, fluxivar, resolution, pixmask_fraction,
                                chi2pix, modelimage)))
                ]
        else:
            rankresults = comm.gather(results, root=0)
        if rank == 0:
            bundle = assemble_bundle_patches(rankresults)
    else:
        # this is fine for GPU w/out MPI comm
        rankresults = [
            results,
        ]
        bundle = assemble_bundle_patches(rankresults)
        if gpu:
            bundle = tuple(
                cp.asnumpy(x) if isinstance(x, cp.ndarray) else x
                for x in bundle)

    return bundle
예제 #6
0
파일: spex.py 프로젝트: desihub/gpu_specter
def main_gpu_specter(args=None, comm=None, timing=None, coordinator=None):

    timer = Timer()

    if args is None:
        args = parse()

    log = get_logger(args.loglevel)

    #- Preflight checks on input arguments
    ok, message = check_input_options(args)
    if not ok:
        log.critical(message)
        raise ValueError(message)

    #- Load MPI only if requested and coordinator not provided
    if coordinator is not None:
        pass
    elif comm is not None or args.mpi:
        #- Use MPI if comm is provided or args.mpi is specified
        if comm is None:
            from mpi4py import MPI
            comm = MPI.COMM_WORLD
        if args.async_io:
            coordinator = ParallelIOCoordinator(comm)
        else:
            coordinator = SerialIOCoordinator(comm)
    else:
        coordinator = NoMPIIOCoordinator()

    timer.split('init')

    if args.gpu:
        #- If using gpu, move input data to device on read using cupy
        import cupy as cp
        array = cp.array
    else:
        #- Otherwise, use numpy array for cpu
        array = np.array

    #- Load inputs
    def read():
        log.info(f'Reading image: {args.input}')
        img = read_img(args.input)
        img['image'] = array(img['image'])
        img['ivar'] = array(img['ivar'])
        log.info(f'Reading PSF: {args.psf}')
        psf = read_psf(args.psf)
        return img, psf

    img, psf = coordinator.read(read, (None, None))

    timer.split('load')

    def process():
        #- Perform extraction
        frame = extract_frame(
            img,
            psf,
            args.bundlesize,  # input data
            args.specmin,
            args.nspec,  # spectra to extract (specmin, specmin + nspec)
            args.wavelength,  # wavelength range to extract
            args.nwavestep,
            args.nsubbundles,  # extraction algorithm parameters
            args.model,
            args.regularize,
            args.psferr,
            coordinator.work_comm,  # mpi parameters
            args.gpu,  # gpu parameters
            args.loglevel,  # log
            wavepad=args.wavepad,
            wavepad_frac=args.wavepad_frac,
            pixpad_frac=args.pixpad_frac,
            ranks_per_bundle=args.ranks_per_bundle,
        )
        #- Pass other input data through for output
        if coordinator.is_worker_root(coordinator.rank):
            frame['imagehdr'] = img['imagehdr']
            frame['fibermap'] = img['fibermap']
            frame['fibermaphdr'] = img['fibermaphdr']
        return frame

    frame = coordinator.process(process, None)

    timer.split('extract')

    #- Write output
    def write(frame):
        if args.output is not None:
            log.info(f'Writing {args.output}')
            write_frame(args.output, frame)

        if args.model is not None:
            log.info(f'Writing model {args.model}')
            write_model(args.model, frame)

    coordinator.write(write, frame)

    #- Print timing summary
    timer.split('write')
    if coordinator.is_worker_root(coordinator.rank):
        timer.log_splits(log)