def run(self, patchRefList, butler, selectDataList=[]): """!Run stacking on a tract This method only runs on the master node. @param patchRefList: List of patch data references for tract @param butler: Data butler @param selectDataList: List of SelectStruct for inputs """ pool = Pool("stacker") pool.cacheClear() pool.storeSet(butler=butler, warpType=self.config.coaddName + "Coadd_directWarp", coaddType=self.config.coaddName + "Coadd") patchIdList = [patchRef.dataId for patchRef in patchRefList] selectedData = pool.map(self.warp, patchIdList, selectDataList) if self.config.doBackgroundReference: self.backgroundReference.runDataRef(patchRefList, selectDataList) def refNamer(patchRef): return tuple(map(int, patchRef.dataId["patch"].split(","))) lookup = dict(zip(map(refNamer, patchRefList), selectedData)) coaddData = [ Struct(patchId=patchRef.dataId, selectDataList=lookup[refNamer(patchRef)]) for patchRef in patchRefList ] pool.map(self.coadd, coaddData)
def runDataRef(self, index): #Prepare the pool pool = Pool("processCosmo") pool.cacheClear() fieldList = np.arange(200 * index, 200 * (index + 1)) pool.map(self.process, fieldList) return
def runDataRef(self, patchRefList, selectDataList=[]): """!Run association processing on coadds Only the master node runs this method. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # Group all filters by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runAssociation, patches.values(), selectDataList)
def runDataRef(self, dataRefList): """Process a single Coadd, with scatter-gather-scatter using MPI. """ pool = Pool("all") pool.cacheClear() pool.map(self.runForced, dataRefList)
def runDataRef(self, patchRefList): """!Combine forced diaObjects into a single catalog to construct light curves Only the master node runs this method. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # Group all filters by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runCombine, patches.values())
def runDataRef(self, index): #Prepare the pool pool = Pool("processBasic") pool.cacheClear() pool.storeSet(doHSM=self.config.doHSM) pool.storeSet(doFPFS=self.config.doFPFS) pool.storeSet(galDir=self.config.galDir) pool.storeSet(outDir=self.config.outDir) fieldList = np.arange(100 * index, 100 * (index + 1)) pool.map(self.process, fieldList) return
def runDataRef(self, Id): self.log.info('beginning group %d' % (Id)) perGroup = self.config.perGroup fMin = perGroup * Id fMax = perGroup * (Id + 1) #Prepare the pool pool = Pool("noiSim") pool.cacheClear() fieldList = range(fMin, fMax) pool.map(self.process, fieldList) self.log.info('finish group %d' % (Id)) return
def runDataRef(self, rawRefList, butler): pool = Pool("visits") pool.cacheClear() pool.storeSet(butler=butler) #Make unique combinations of visit and CCD number: #This 4 needs to be replaced by a config parameter. visitCcdIdList = set() for rawRef in rawRefList: visitCcdIdList.add((rawRef.dataId['visit']<<4)+rawRef.dataId['ccd']) visitCcdIdList = list(visitCcdIdList) #Map visits/ccds out to separate nodes: pool.map(self.runVisit, visitCcdIdList, rawRefList)
def runDataRef(self, index): self.log.info('begining for group %d' % (index)) #Prepare the storeSet pool = Pool("cgcSimBasicBatch") pool.cacheClear() # expDir = "galaxy_basic_psf60" # expDir = "small0_psf60" #expDir = "galaxy_basic2Center_psf60" expDir = "galaxy_basic2Shift_psf60" if not os.path.isdir(expDir): os.mkdir(expDir) pool.storeSet(expDir=expDir) fieldList = np.arange(200 * index, 200 * (index + 1)) pool.map(self.process, fieldList) return
def runDataRef(self, Id): self.log.info('begining for group %d' % (Id)) #Prepare the storeSet pool = Pool("galsimProcessBatch") pool.cacheClear() expDir = "sim20210301/galaxy_basic_psf75" assert os.path.isdir(expDir) pool.storeSet(expDir=expDir) pool.storeSet(Id=Id) #Prepare the pool p2List = ['0000', '1111', '2222'] p1List = ['g1', 'g2'] pendList = ['%s-%s' % (i1, i2) for i1 in p1List for i2 in p2List] pool.map(self.process, pendList) self.log.info('finish group %d' % (Id)) return
def runDataRef(self, patchRefList): """!Run multiband processing on coadds Only the master node runs this method. No real MPI communication (scatter/gather) takes place: all I/O goes through the disk. We want the intermediate stages on disk, and the component Tasks are implemented around this, so we just follow suit. @param patchRefList: Data references to run measurement """ print(len(patchRefList)) for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # Group by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) print(patches.values()) dataRefList = [ getDataRef(cache.butler, dataId, self.config.coaddName + "Coadd_calexp") for dataId in patches.values() ] pool.map(self.runAssociation, dataRefList)
def runDataRef(self, pend): self.log.info('begining for setup %s' % (pend)) #Prepare the storeSet pool = Pool("cgcSimCosmoBatch") pool.cacheClear() expDir = "galaxy_cosmoR_psf60" if not os.path.isdir(expDir): os.mkdir(expDir) pool.storeSet(expDir=expDir) pool.storeSet(pend=pend) hpList = imgSimutil.cosmoHSThpix[4:5] #TODO: remove p2List = ['0000', '2222', '2000', '0200', '0020', '0002'] p1List = ['g1'] #['g1','g2'] hpList = ['%s-%s' % (i1, i2) for i1 in p1List for i2 in p2List] pool.map(self.process, hpList) self.log.info('finish setup %s' % (pend)) return
def run(self, expRef): """Measure focus for exposure This method is the top-level for running the focus measurement as a stand-alone BatchPoolTask. Only the master node runs this method. """ pool = Pool("processFocus") pool.cacheClear() pool.storeSet(butler=expRef.getButler()) dataIdList = sorted([ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists("raw") and self.isFocus(ccdRef)]) results = pool.map(self.processPool, dataIdList) camera = expRef.get("camera") plotFilename = expRef.get("focusPlot_filename") focus = self.measureFocus(results, camera, plotFilename) self.log.info("Focus result for %s: %s" % (expRef.dataId, focus)) return focus
def runTract(self, patchRefList, butler, selectDataList=[]): """Run stacking on a tract This method only runs on the master node. @param patchRefList: List of patch data references for tract @param butler: Data butler @param selectDataList: List of SelectStruct for inputs """ pool = Pool("stacker") pool.cacheClear() pool.storeSet(butler=butler, warpType=self.config.coaddName + "Coadd_tempExp", coaddType=self.config.coaddName + "Coadd") patchIdList = [patchRef.dataId for patchRef in patchRefList] selectedData = pool.map(self.warp, patchIdList, selectDataList) if self.config.doBackgroundReference: self.backgroundReference.run(patchRefList, selectDataList) refNamer = lambda patchRef: tuple(map(int, patchRef.dataId["patch"].split(","))) lookup = dict(zip(map(refNamer, patchRefList), selectedData)) coaddData = [Struct(patchId=patchRef.dataId, selectDataList=lookup[refNamer(patchRef)]) for patchRef in patchRefList] pool.map(self.coadd, coaddData)
def runDataRef(self, pend): psfFWHM = '60' #'60','HSC' npend = 'outCosmoR-var36em4' outDir = os.path.join(self.config.rootDir, npend, 'mag245-res03-bm38-dis4') if not os.path.isdir(outDir): os.mkdir(outDir) self.log.info('beginning for %s, seeing %s: ' % (pend, psfFWHM)) #Prepare the storeSet pool = Pool("reGaussCosmoMeasBatch") pool.cacheClear() pool.storeSet(pend=pend) pool.storeSet(psfFWHM=psfFWHM) pool.storeSet(npend=npend) #Prepare the pool resList = pool.map(self.process, np.arange(1000)) resList = [x for x in resList if x is not None] if len(resList) > 1: newTab = Table(rows=resList,names=('e1_z1','n_z1',\ 'e1_z2','n_z2','e1_z3','n_z3','e1_z4','n_z4')) finOname = os.path.join(outDir, 'e1_%s_psf%s.fits' % (pend, psfFWHM)) newTab.write(finOname, overwrite=True) return
def runDataRef(self, expRef): """Perform sky correction on an exposure We restore the original sky, and remove it again using multiple algorithms. We optionally apply: 1. A large-scale background model. This step removes very-large-scale sky such as moonlight. 2. A sky frame. 3. A medium-scale background model. This step removes residual sky (This is smooth on the focal plane). Only the master node executes this method. The data is held on the slave nodes, which do all the hard work. Parameters ---------- expRef : `lsst.daf.persistence.ButlerDataRef` Data reference for exposure. See Also -------- ~lsst.pipe.drivers.SkyCorrectionTask.run """ if DEBUG: extension = "-%(visit)d.fits" % expRef.dataId with self.logOperation("processing %s" % (expRef.dataId,)): pool = Pool() pool.cacheClear() pool.storeSet(butler=expRef.getButler()) camera = expRef.get("camera") dataIdList = [ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists(self.config.calexpType)] exposures = pool.map(self.loadImage, dataIdList) if DEBUG: makeCameraImage(camera, exposures, "restored" + extension) exposures = pool.mapToPrevious(self.collectOriginal, dataIdList) makeCameraImage(camera, exposures, "original" + extension) exposures = pool.mapToPrevious(self.collectMask, dataIdList) makeCameraImage(camera, exposures, "mask" + extension) if self.config.doBgModel: exposures = self.focalPlaneBackground(camera, pool, dataIdList, self.config.bgModel) if self.config.doSky: measScales = pool.mapToPrevious(self.measureSkyFrame, dataIdList) scale = self.sky.solveScales(measScales) self.log.info("Sky frame scale: %s" % (scale,)) exposures = pool.mapToPrevious(self.subtractSkyFrame, dataIdList, scale) if DEBUG: makeCameraImage(camera, exposures, "skysub" + extension) calibs = pool.mapToPrevious(self.collectSky, dataIdList) makeCameraImage(camera, calibs, "sky" + extension) if self.config.doBgModel2: exposures = self.focalPlaneBackground(camera, pool, dataIdList, self.config.bgModel2) # Persist camera-level image of calexp image = makeCameraImage(camera, exposures) expRef.put(image, "calexp_camera") pool.mapToPrevious(self.write, dataIdList)
def run(self, expRef): """Perform sky correction on an exposure We restore the original sky, and remove it again using multiple algorithms. We optionally apply: 1. A large-scale background model. 2. A sky frame. Only the master node executes this method. The data is held on the slave nodes, which do all the hard work. Parameters ---------- expRef : `lsst.daf.persistence.ButlerDataRef` Data reference for exposure. """ if DEBUG: extension = "-%(visit)d.fits" % expRef.dataId with self.logOperation("processing %s" % (expRef.dataId, )): pool = Pool() pool.cacheClear() pool.storeSet(butler=expRef.getButler()) camera = expRef.get("camera") dataIdList = [ ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists("calexp") ] exposures = pool.map(self.loadImage, dataIdList) if DEBUG: makeCameraImage(camera, exposures, "restored" + extension) exposures = pool.mapToPrevious(self.collectOriginal, dataIdList) makeCameraImage(camera, exposures, "original" + extension) exposures = pool.mapToPrevious(self.collectMask, dataIdList) makeCameraImage(camera, exposures, "mask" + extension) if self.config.doBgModel: bgModel = FocalPlaneBackground.fromCamera( self.config.bgModel, camera) data = [ Struct(dataId=dataId, bgModel=bgModel.clone()) for dataId in dataIdList ] bgModelList = pool.mapToPrevious(self.accumulateModel, data) for ii, bg in enumerate(bgModelList): self.log.info("Background %d: %d pixels", ii, bg._numbers.getArray().sum()) bgModel.merge(bg) if DEBUG: bgModel.getStatsImage().writeFits("bgModel" + extension) bgImages = pool.mapToPrevious(self.realiseModel, dataIdList, bgModel) makeCameraImage(camera, bgImages, "bgModelCamera" + extension) exposures = pool.mapToPrevious(self.subtractModel, dataIdList, bgModel) if DEBUG: makeCameraImage(camera, exposures, "modelsub" + extension) if self.config.doSky: measScales = pool.mapToPrevious(self.measureSkyFrame, dataIdList) scale = self.sky.solveScales(measScales) self.log.info("Sky frame scale: %s" % (scale, )) exposures = pool.mapToPrevious(self.subtractSkyFrame, dataIdList, scale) if DEBUG: makeCameraImage(camera, exposures, "skysub" + extension) calibs = pool.mapToPrevious(self.collectSky, dataIdList) makeCameraImage(camera, calibs, "sky" + extension) # Persist camera-level image of calexp image = makeCameraImage(camera, exposures) expRef.put(image, "calexp_camera") pool.mapToPrevious(self.write, dataIdList)
def runDataRef(self, patchRefList): """!Run multiband processing on coadds Only the master node runs this method. No real MPI communication (scatter/gather) takes place: all I/O goes through the disk. We want the intermediate stages on disk, and the component Tasks are implemented around this, so we just follow suit. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # MultiBand measurements require that the detection stage be completed # before measurements can be made. # # The configuration for coaddDriver.py allows detection to be turned # of in the event that fake objects are to be added during the # detection process. This allows the long co-addition process to be # run once, and multiple different MultiBand reruns (with different # fake objects) to exist from the same base co-addition. # # However, we only re-run detection if doDetection is explicitly True # here (this should always be the opposite of coaddDriver.doDetection); # otherwise we have no way to tell reliably whether any detections # present in an input repo are safe to use. if self.config.doDetection: detectionList = [] for patchRef in patchRefList: if ("detectCoaddSources" in self.reuse and patchRef.datasetExists( self.coaddType + "Coadd_calexp", write=True)): self.log.info( "Skipping detectCoaddSources for %s; output already exists." % patchRef.dataId) continue if not patchRef.datasetExists(self.coaddType + "Coadd"): self.log.debug( "Not processing %s; required input %sCoadd missing." % (patchRef.dataId, self.config.coaddName)) continue detectionList.append(patchRef) pool.map(self.runDetection, detectionList) patchRefList = [ patchRef for patchRef in patchRefList if patchRef.datasetExists(self.coaddType + "Coadd_calexp") and patchRef.datasetExists(self.config.coaddName + "Coadd_det", write=self.config.doDetection) ] dataIdList = [patchRef.dataId for patchRef in patchRefList] # Group by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runMergeDetections, patches.values()) # Deblend merged detections, and test for reprocessing # # The reprocessing allows us to have multiple attempts at deblending large footprints. Large # footprints can suck up a lot of memory in the deblender, which means that when we process on a # cluster, we want to refuse to deblend them (they're flagged "deblend.parent-too-big"). But since # they may have astronomically interesting data, we want the ability to go back and reprocess them # with a more permissive configuration when we have more memory or processing time. # # self.runDeblendMerged will return whether there are any footprints in that image that required # reprocessing. We need to convert that list of booleans into a dict mapping the patchId (x,y) to # a boolean. That tells us whether the merge measurement and forced photometry need to be re-run on # a particular patch. # # This determination of which patches need to be reprocessed exists only in memory (the measurements # have been written, clobbering the old ones), so if there was an exception we would lose this # information, leaving things in an inconsistent state (measurements, merged measurements and # forced photometry old). To attempt to preserve this status, we touch a file (dataset named # "deepCoadd_multibandReprocessing") --- if this file exists, we need to re-run the measurements, # merge and forced photometry. # # This is, hopefully, a temporary workaround until we can improve the # deblender. try: reprocessed = pool.map(self.runDeblendMerged, patches.values()) finally: if self.config.reprocessing: patchReprocessing = {} for dataId, reprocess in zip(dataIdList, reprocessed): patchId = dataId["patch"] patchReprocessing[patchId] = patchReprocessing.get( patchId, False) or reprocess # Persist the determination, to make error recover easier reprocessDataset = self.config.coaddName + "Coadd_multibandReprocessing" for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) if patchReprocessing[patchId]: filename = butler.get(reprocessDataset + "_filename", dataId)[0] open(filename, 'a').close() # Touch file elif butler.datasetExists(reprocessDataset, dataId): # We must have failed at some point while reprocessing # and we're starting over patchReprocessing[patchId] = True # Only process patches that have been identifiedz as needing it pool.map(self.runMeasurements, [ dataId1 for dataId1 in dataIdList if not self.config.reprocessing or patchReprocessing[dataId1["patch"]] ]) pool.map(self.runMergeMeasurements, [ idList for patchId, idList in patches.items() if not self.config.reprocessing or patchReprocessing[patchId] ]) pool.map(self.runForcedPhot, [ dataId1 for dataId1 in dataIdList if not self.config.reprocessing or patchReprocessing[dataId1["patch"]] ]) # Remove persisted reprocessing determination if self.config.reprocessing: for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) filename = butler.get(reprocessDataset + "_filename", dataId)[0] os.unlink(filename)
def run(self, patchRefList): """Run multiband processing on coadds All nodes execute this method, though the master and slaves take different routes through it. No real MPI communication takes place: all I/O goes through the disk. We want the intermediate stages on disk, and the component Tasks are implemented around this, so we just follow suit. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) patchRefList = [patchRef for patchRef in patchRefList if patchRef.datasetExists(self.config.coaddName + "Coadd") and patchRef.datasetExists(self.config.coaddName + "Coadd_det")] dataIdList = [patchRef.dataId for patchRef in patchRefList] # Group by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runMergeDetections, patches.values()) # Measure merged detections, and test for reprocessing # # The reprocessing allows us to have multiple attempts at deblending large footprints. Large # footprints can suck up a lot of memory in the deblender, which means that when we process on a # cluster, we want to refuse to deblend them (they're flagged "deblend.parent-too-big"). But since # they may have astronomically interesting data, we want the ability to go back and reprocess them # with a more permissive configuration when we have more memory or processing time. # # self.runMeasureMerged will return whether there are any footprints in that image that required # reprocessing. We need to convert that list of booleans into a dict mapping the patchId (x,y) to # a boolean. That tells us whether the merge measurement and forced photometry need to be re-run on # a particular patch. # # This determination of which patches need to be reprocessed exists only in memory (the measurements # have been written, clobbering the old ones), so if there was an exception we would lose this # information, leaving things in an inconsistent state (measurements new, but merged measurements and # forced photometry old). To attempt to preserve this status, we touch a file (dataset named # "deepCoadd_multibandReprocessing") --- if this file exists, we need to re-run the merge and # forced photometry. # # This is, hopefully, a temporary workaround until we can improve the deblender. try: reprocessed = pool.map(self.runMeasureMerged, dataIdList) finally: if self.config.reprocessing: patchReprocessing = {} for dataId, reprocess in zip(dataIdList, reprocessed): patchId = dataId["patch"] patchReprocessing[patchId] = patchReprocessing.get(patchId, False) or reprocess # Persist the determination, to make error recover easier reprocessDataset = self.config.coaddName + "Coadd_multibandReprocessing" for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) if patchReprocessing[patchId]: filename = butler.get(reprocessDataset + "_filename", dataId)[0] open(filename, 'a').close() # Touch file elif butler.datasetExists(reprocessDataset, dataId): # We must have failed at some point while reprocessing and we're starting over patchReprocessing[patchId] = True # Only process patches that have been identified as needing it pool.map(self.runMergeMeasurements, [idList for patchId, idList in patches.iteritems() if not self.config.reprocessing or patchReprocessing[patchId]]) pool.map(self.runForcedPhot, [dataId for dataId in dataIdList if not self.config.reprocessing or patchReprocessing[dataId["patch"]]]) # Remove persisted reprocessing determination if self.config.reprocessing: for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) filename = butler.get(reprocessDataset + "_filename", dataId)[0] os.unlink(filename)