예제 #1
0
파일: desi.py 프로젝트: tskisner/redrock
    def __init__(self,
                 spectrafiles,
                 coadd=True,
                 targetids=None,
                 first_target=None,
                 n_target=None,
                 comm=None):

        comm_size = 1
        comm_rank = 0
        if comm is not None:
            comm_size = comm.size
            comm_rank = comm.rank

        # check the file list
        if isinstance(spectrafiles, basestring):
            import glob
            spectrafiles = glob.glob(spectrafiles)

        assert len(spectrafiles) > 0

        self._spectrafiles = spectrafiles

        # This is the mapping between specs to targets for each file

        self._spec_to_target = {}
        self._target_specs = {}
        self._spec_keep = {}
        self._spec_sliced = {}

        # The bands for each file

        self._bands = {}
        self._wave = {}

        # The full list of targets from all files

        self._alltargetids = set()

        # The fibermaps from all files

        self._fmaps = {}

        for sfile in spectrafiles:
            hdus = None
            nhdu = None
            fmap = None
            if comm_rank == 0:
                hdus = fits.open(sfile, memmap=True)
                nhdu = len(hdus)
                fmap = encode_table(
                    Table(hdus["FIBERMAP"].data, copy=True).as_array())

            if comm is not None:
                nhdu = comm.bcast(nhdu, root=0)
                fmap = comm.bcast(fmap, root=0)

            # Now every process has the fibermap and number of HDUs.  Build the
            # mapping between spectral rows and target IDs.

            keep_targetids = targetids
            if targetids is None:
                keep_targetids = fmap["TARGETID"]

            # Select a subset of the target range from each file if desired.

            if first_target is None:
                first_target = 0
            if first_target > len(keep_targetids):
                raise RuntimeError("first_target value \"{}\" is beyond the "
                    "number of selected targets in the file".\
                    format(first_target))

            if n_target is None:
                n_target = len(keep_targetids)
            if first_target + n_target > len(keep_targetids):
                raise RuntimeError(
                    "Requested first_target / n_target "
                    " range is larger than the number of selected targets "
                    " in the file")

            keep_targetids = keep_targetids[first_target:first_target +
                                            n_target]

            self._alltargetids.update(keep_targetids)

            # This is the spectral row to target mapping using the original
            # global indices (before slicing).

            self._spec_to_target[sfile] = [ x if y in keep_targetids else -1 \
                for x, y in enumerate(fmap["TARGETID"]) ]

            # The reduced set of spectral rows.

            self._spec_keep[sfile] = [ x for x in self._spec_to_target[sfile] \
                if x >= 0 ]

            # The mapping between original spectral indices and the sliced ones

            self._spec_sliced[sfile] = { x : y for y, x in \
                enumerate(self._spec_keep[sfile]) }

            # Slice the fibermap

            self._fmaps[sfile] = fmap[self._spec_keep[sfile]]

            # For each target, store the sliced row index of all spectra,
            # so that we can do a fast lookup later.

            self._target_specs[sfile] = {}
            for id in keep_targetids:
                self._target_specs[sfile][id] = [ x for x, y in \
                    enumerate(self._fmaps[sfile]["TARGETID"]) if y == id ]

            # We need some more metadata information for each file-
            # specifically, the bands that are used and their wavelength grids.
            # That information will allow us to pre-allocate our local target
            # list and then fill that with one pass through all HDUs in the
            # files.

            self._bands[sfile] = []
            self._wave[sfile] = dict()

            if comm_rank == 0:
                for h in range(nhdu):
                    name = None
                    if "EXTNAME" not in hdus[h].header:
                        continue
                    name = hdus[h].header["EXTNAME"]
                    mat = re.match(r"(.*)_(.*)", name)
                    if mat is None:
                        continue
                    band = mat.group(1).lower()
                    if band not in self._bands[sfile]:
                        self._bands[sfile].append(band)
                    htype = mat.group(2)

                    if htype == "WAVELENGTH":
                        self._wave[sfile][band] = \
                            hdus[h].data.astype(np.float64).copy()

            if comm is not None:
                self._bands[sfile] = comm.bcast(self._bands[sfile], root=0)
                self._wave[sfile] = comm.bcast(self._wave[sfile], root=0)

            if comm_rank == 0:
                hdus.close()

        self._keep_targets = list(sorted(self._alltargetids))

        # Now we have the metadata for all targets in all files.  Distribute
        # the targets among process weighted by the amount of work to do for
        # each target.  This weight is either "1" if we are going to use coadds
        # or the number of spectra if we are using all the data.

        tweights = None
        if not coadd:
            tweights = dict()
            for t in self._keep_targets:
                tweights[t] = 0
                for sfile in spectrafiles:
                    if t in self._target_specs[sfile]:
                        tweights[t] += len(self._target_specs[sfile][t])

        self._proc_targets = distribute_work(comm_size,
                                             self._keep_targets,
                                             weights=tweights)

        self._my_targets = self._proc_targets[comm_rank]

        # Reverse mapping- target ID to index in our list
        self._my_target_indx = {y: x for x, y in enumerate(self._my_targets)}

        # Now every process has its local target IDs assigned.  Pre-create our
        # local target list with empty spectral data (except for wavelengths)

        self._my_data = list()

        for t in self._my_targets:
            speclist = list()
            tileids = set()
            exps = set()
            bname = None
            for sfile in spectrafiles:
                for b in self._bands[sfile]:
                    if t in self._target_specs[sfile]:
                        nspec = len(self._target_specs[sfile][t])
                        for s in range(nspec):
                            sindx = self._target_specs[sfile][t][s]
                            frow = self._fmaps[sfile][sindx]
                            if bname is None:
                                bname = frow["BRICKNAME"]
                            exps.add(frow["EXPID"])
                            if "TILEID" in frow.dtype.names:
                                tileids.add(frow["TILEID"])
                            speclist.append(
                                Spectrum(self._wave[sfile][b], None, None,
                                         None, None))
            # Meta dictionary for this target.  Whatever keys we put in here
            # will end up as columns in the final zbest output table.
            tmeta = dict()
            tmeta["NUMEXP"] = len(exps)
            tmeta["NUMEXP_datatype"] = "i4"
            tmeta["NUMTILE"] = len(tileids)
            tmeta["NUMTILE_datatype"] = "i4"
            tmeta["BRICKNAME"] = bname
            tmeta["BRICKNAME_datatype"] = "S8"
            self._my_data.append(Target(t, speclist, coadd=False, meta=tmeta))

        # Iterate over the data and broadcast.  Every process selects the rows
        # of each table that contain pieces of local target data and copies it
        # into place.

        # these are for tracking offsets within the spectra for each target.
        tspec_flux = {x: 0 for x in self._my_targets}
        tspec_ivar = tspec_flux.copy()
        tspec_mask = tspec_flux.copy()
        tspec_res = tspec_flux.copy()

        for sfile in spectrafiles:
            rows = self._spec_keep[sfile]
            if len(rows) == 0:
                continue

            hdus = None
            if comm_rank == 0:
                hdus = fits.open(sfile, memmap=True)

            for b in self._bands[sfile]:
                extname = "{}_{}".format(b.upper(), "FLUX")
                hdata = None
                if comm_rank == 0:
                    hdata = hdus[extname].data[rows]
                if comm is not None:
                    hdata = comm.bcast(hdata, root=0)

                toff = 0
                for t in self._my_targets:
                    if t in self._target_specs[sfile]:
                        for trow in self._target_specs[sfile][t]:
                            self._my_data[toff].spectra[tspec_flux[t]].flux = \
                                hdata[trow].astype(np.float64).copy()
                            tspec_flux[t] += 1
                    toff += 1

                extname = "{}_{}".format(b.upper(), "IVAR")
                hdata = None
                if comm_rank == 0:
                    hdata = hdus[extname].data[rows]

                if comm is not None:
                    hdata = comm.bcast(hdata, root=0)

                toff = 0
                for t in self._my_targets:
                    if t in self._target_specs[sfile]:
                        for trow in self._target_specs[sfile][t]:
                            self._my_data[toff].spectra[tspec_ivar[t]].ivar = \
                                hdata[trow].astype(np.float64).copy()
                            tspec_ivar[t] += 1
                    toff += 1

                extname = "{}_{}".format(b.upper(), "MASK")
                hdata = None
                if comm_rank == 0:
                    if extname in hdus:
                        hdata = hdus[extname].data[rows]
                if comm is not None:
                    hdata = comm.bcast(hdata, root=0)

                if hdata is not None:
                    toff = 0
                    for t in self._my_targets:
                        if t in self._target_specs[sfile]:
                            for trow in self._target_specs[sfile][t]:
                                self._my_data[toff].spectra[tspec_mask[t]]\
                                    .ivar *= (hdata[trow] == 0)
                                tspec_mask[t] += 1
                        toff += 1

                extname = "{}_{}".format(b.upper(), "RESOLUTION")
                hdata = None
                if comm_rank == 0:
                    hdata = hdus[extname].data[rows]

                if comm is not None:
                    hdata = comm.bcast(hdata, root=0)

                toff = 0
                for t in self._my_targets:
                    if t in self._target_specs[sfile]:
                        for trow in self._target_specs[sfile][t]:
                            dia = Resolution(hdata[trow].astype(np.float64))
                            csr = dia.tocsr()
                            self._my_data[toff].spectra[tspec_res[t]].R = dia
                            self._my_data[toff].spectra[tspec_res[t]].Rcsr = \
                                csr
                            tspec_res[t] += 1
                    toff += 1

                del hdata

            if comm_rank == 0:
                hdus.close()

        # Compute the coadds now if we are going to use those

        if coadd:
            for t in self._my_data:
                t.compute_coadd()

        self.fibermap = Table(np.hstack([ self._fmaps[x] \
            for x in self._spectrafiles ]))

        super(DistTargetsDESI, self).__init__(self._keep_targets, comm=comm)
예제 #2
0
def read_spectra(spplate_name, targetids=None, use_frames=False,
    fiberid=None, coadd=False):
    """Read targets from a list of spectra files

    Args:
        spplate_name (str): input spPlate file
        targetids (list): restrict targets to this subset.
        use_frames (bool): if True, use frames.
        fiberid (int): Use this fiber ID.
        coadd (bool): if True, compute and use the coadds.

    Returns:
        tuple: (targets, meta) where targets is a list of Target objects and
        meta is a Table of metadata (currently only BRICKNAME).

    """
    ## read spplate
    spplate = fitsio.FITS(spplate_name)
    plate = spplate[0].read_header()["PLATEID"]
    mjd = spplate[0].read_header()["MJD"]
    if not use_frames:
        infiles = [spplate_name]
    if use_frames:
        path = os.path.dirname(spplate_name)
        cameras = ['b1','r1','b2','r2']

        infiles = []
        nexp_tot=0
        for c in cameras:
            try:
                nexp = spplate[0].read_header()["NEXP_{}".format(c.upper())]
            except ValueError:
                print("DEBUG: spplate {} has no exposures in camera {} ".format(spplate_name,c))
                continue
            for i in range(1,nexp+1):
                nexp_tot += 1
                expid = str(nexp_tot)
                if nexp_tot<10:
                    expid = '0'+expid
                exp = path+"/spCFrame-"+spplate[0].read_header()["EXPID"+expid][:11]+".fits"
                infiles.append(exp)

    spplate.close()
    bricknames={}
    dic_spectra = {}

    for infile in infiles:
        h = fitsio.FITS(infile)
        assert plate == h[0].read_header()["PLATEID"]
        fs = h[5]["FIBERID"][:]
        if fiberid is not None:
            w = np.in1d(fs,fiberid)
            fs = fs[w]

        fl = h[0].read()
        iv = h[1].read()*(h[2].read()==0)
        wd = h[4].read()

        ## crop to lmin, lmax
        lmin = 3500.
        lmax = 10000.
        if use_frames:
            la = 10**h[3].read()
            if h[0].read_header()["CAMERAS"][0]=="b":
                lmax = 6000.
            else:
                lmin = 5500.
        else:
            coeff0 = h[0].read_header()["COEFF0"]
            coeff1 = h[0].read_header()["COEFF1"]
            la = 10**(coeff0 + coeff1*np.arange(fl.shape[1]))
            la = np.broadcast_to(la,fl.shape)

        imin = abs(la-lmin).min(axis=0).argmin()
        imax = abs(la-lmax).min(axis=0).argmin()

        la = la[:,imin:imax]
        fl = fl[:,imin:imax]
        iv = iv[:,imin:imax]
        wd = wd[:,imin:imax]

        w = wd<1e-5
        wd[w]=2.
        ii = np.arange(la.shape[1])
        di = ii-ii[:,None]
        di2 = di**2
        ndiag = int(4*np.ceil(wd.max())+1)
        nbins = wd.shape[1]

        for f in fs:
            i = (f-1)
            if use_frames:
                i = i%500
            if np.all(iv[i]==0):
                print("DEBUG: skipping plate,fid = {},{} (no data)".format(plate,f))
                continue

            t = platemjdfiber2targetid(plate, mjd, f)
            if t not in dic_spectra:
                dic_spectra[t]=[]
                brickname = '{}-{}'.format(plate,mjd)
                bricknames[t] = brickname

            ## build resolution from wdisp
            reso = np.zeros([ndiag,nbins])
            for idiag in range(ndiag):
                offset = ndiag//2-idiag
                d = np.diagonal(di2,offset=offset)
                if offset<0:
                    reso[idiag,:len(d)] = np.exp(-d/2/wd[i,:len(d)]**2)
                else:
                    reso[idiag,nbins-len(d):nbins]=np.exp(-d/2/wd[i,nbins-len(d):nbins]**2)

            R = Resolution(reso)
            ccd = sparse.spdiags(1./R.sum(axis=1).T, 0, *R.shape)
            R = (ccd*R).todia()
            dic_spectra[t].append(Spectrum(la[i], fl[i], iv[i], R, R.tocsr()))

        h.close()
        print("DEBUG: read {} ".format(infile))

    if targetids == None:
        targetids = sorted(list(dic_spectra.keys()))

    targets = []
    for targetid in targetids:
        spectra = dic_spectra[targetid]
        # Add the brickname to the meta dictionary.  The keys of this dictionary
        # will end up as extra columns in the output ZBEST HDU.
        tmeta = dict()
        tmeta["BRICKNAME"] = bricknames[targetid]
        tmeta["BRICKNAME_datatype"] = "S8"
        if len(spectra) > 0:
            targets.append(Target(targetid, spectra, coadd=coadd, meta=tmeta))
        else:
            print('ERROR: Target {} on {} has no good spectra'.format(targetid, os.path.basename(brickfiles[0])))

    #- Create a metadata table in case we might want to add other columns
    #- in the future
    assert len(bricknames.keys()) == len(targets)

    metatable = Table(names=("TARGETID", "BRICKNAME"), dtype=("i8", "S8",))
    for t in targetids:
        metatable.add_row( (t, bricknames[t]) )

    return targets, metatable