def run(self, tractPatchRefList, butler, selectIdList=[]): """!Determine which tracts are non-empty before processing @param tractPatchRefList: List of tracts and patches to include in the coaddition @param butler: butler reference object @param selectIdList: List of data Ids (i.e. visit, ccd) to consider when making the coadd @return list of references to sel.runTract function evaluation for each tractPatchRefList member """ pool = Pool("tracts") pool.storeSet(butler=butler, skymap=butler.get(self.config.coaddName + "Coadd_skyMap")) tractIdList = [] for patchRefList in tractPatchRefList: tractSet = set([patchRef.dataId["tract"] for patchRef in patchRefList]) assert len(tractSet) == 1 tractIdList.append(tractSet.pop()) selectDataList = [data for data in pool.mapNoBalance(self.readSelection, selectIdList) if data is not None] nonEmptyList = pool.mapNoBalance(self.checkTract, tractIdList, selectDataList) tractPatchRefList = [patchRefList for patchRefList, nonEmpty in zip(tractPatchRefList, nonEmptyList) if nonEmpty] self.log.info("Non-empty tracts (%d): %s" % (len(tractPatchRefList), [patchRefList[0].dataId["tract"] for patchRefList in tractPatchRefList])) # Install the dataRef in the selectDataList for data in selectDataList: data.dataRef = getDataRef(butler, data.dataId, "calexp") # Process the non-empty tracts return [self.runTract(patchRefList, butler, selectDataList) for patchRefList in tractPatchRefList]
def runDataRef(self, expRef): """Generate an image of the entire visit Only the master node executes this method; it controls the slave nodes, which do the data retrieval. Parameters ---------- expRef : `lsst.daf.persistence.ButlerDataRef` Data reference for exposure. """ pool = Pool() if not self._storedButler: pool.storeSet(butler=expRef.getButler()) with self.logOperation("processing %s" % (expRef.dataId, )): camera = expRef.get("camera") dataIdList = [ ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists("calexp") ] exposures = pool.map(self.readImage, dataIdList) exposures = dict(keyValue for keyValue in exposures if keyValue is not None) image = makeCameraImage(camera, exposures, self.config.binning) expRef.put(image, "calexp_camera")
def run(self, args): """Run ingest We read and ingest the files in parallel, and then stuff the registry database in serial. """ # Parallel pool = Pool(None) filenameList = self.expandFiles(args.files) dataList = [ Struct(filename=filename, position=ii) for ii, filename in enumerate(filenameList) ] infoList = pool.map(self.runFileWrapper, dataList, args) # Serial root = args.input context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun) with context as registry: for hduInfoList in infoList: if hduInfoList is None: continue for info in hduInfoList: self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
def run(self, patchRefList, butler, selectDataList=[]): """!Run stacking on a tract This method only runs on the master node. @param patchRefList: List of patch data references for tract @param butler: Data butler @param selectDataList: List of SelectStruct for inputs """ pool = Pool("stacker") pool.cacheClear() pool.storeSet(butler=butler, warpType=self.config.coaddName + "Coadd_directWarp", coaddType=self.config.coaddName + "Coadd") patchIdList = [patchRef.dataId for patchRef in patchRefList] selectedData = pool.map(self.warp, patchIdList, selectDataList) if self.config.doBackgroundReference: self.backgroundReference.runDataRef(patchRefList, selectDataList) def refNamer(patchRef): return tuple(map(int, patchRef.dataId["patch"].split(","))) lookup = dict(zip(map(refNamer, patchRefList), selectedData)) coaddData = [ Struct(patchId=patchRef.dataId, selectDataList=lookup[refNamer(patchRef)]) for patchRef in patchRefList ] pool.map(self.coadd, coaddData)
def runDataRef(self, patchRefList, selectDataList=[]): """!Run association processing on coadds Only the master node runs this method. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # Group all filters by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runAssociation, patches.values(), selectDataList)
def runDataRef(self, patchRefList): """!Combine forced diaObjects into a single catalog to construct light curves Only the master node runs this method. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # Group all filters by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runCombine, patches.values())
def run(self, expRefList, butler, calibId): """!Construct a calib from a list of exposure references This is the entry point, called by the TaskRunner.__call__ Only the master node executes this method. @param expRefList List of data references at the exposure level @param butler Data butler @param calibId Identifier dict for calib """ for expRef in expRefList: self.addMissingKeys(expRef.dataId, butler, self.config.ccdKeys, 'raw') outputId = self.getOutputId(expRefList, calibId) ccdIdLists = getCcdIdListFromExposures(expRefList, level="sensor", ccdKeys=self.config.ccdKeys) # Ensure we can generate filenames for each output outputIdItemList = list(outputId.items()) for ccdName in ccdIdLists: dataId = dict([(k, ccdName[i]) for i, k in enumerate(self.config.ccdKeys)]) dataId.update(outputIdItemList) self.addMissingKeys(dataId, butler) dataId.update(outputIdItemList) try: butler.get(self.calibName + "_filename", dataId) except Exception as e: raise RuntimeError( "Unable to determine output filename \"%s_filename\" from %s: %s" % (self.calibName, dataId, e)) pool = Pool() pool.storeSet(butler=butler) # Scatter: process CCDs independently data = self.scatterProcess(pool, ccdIdLists) # Gather: determine scalings scales = self.scale(ccdIdLists, data) # Scatter: combine self.scatterCombine(pool, outputId, ccdIdLists, scales)
def run(self, visitRef): """Main entry-point Only the master node runs this method. It will dispatch jobs to the slave nodes. """ pool = Pool("test") # Less overhead to transfer the butler once rather than in each dataRef dataIdList = dict([(ccdRef.get("ccdExposureId"), ccdRef.dataId) for ccdRef in visitRef.subItems("ccd") if ccdRef.datasetExists("raw")]) dataIdList = collections.OrderedDict(sorted(dataIdList.items())) with self.logOperation("master"): pixels = pool.map(self.read, list(dataIdList.values()), butler=visitRef.getButler()) total = sum(pp for pp in pixels if pp is not None) self.log.info("Total number of pixels read: %d" % (total,))
def runDataRef(self, Id): self.log.info('begining for group %d' % (Id)) #Prepare the storeSet pool = Pool("galsimProcessBatch") pool.cacheClear() expDir = "sim20210301/galaxy_basic_psf75" assert os.path.isdir(expDir) pool.storeSet(expDir=expDir) pool.storeSet(Id=Id) #Prepare the pool p2List = ['0000', '1111', '2222'] p1List = ['g1', 'g2'] pendList = ['%s-%s' % (i1, i2) for i1 in p1List for i2 in p2List] pool.map(self.process, pendList) self.log.info('finish group %d' % (Id)) return
def runDataRef(self, pend): self.log.info('begining for setup %s' % (pend)) #Prepare the storeSet pool = Pool("cgcSimCosmoBatch") pool.cacheClear() expDir = "galaxy_cosmoR_psf60" if not os.path.isdir(expDir): os.mkdir(expDir) pool.storeSet(expDir=expDir) pool.storeSet(pend=pend) hpList = imgSimutil.cosmoHSThpix[4:5] #TODO: remove p2List = ['0000', '2222', '2000', '0200', '0020', '0002'] p1List = ['g1'] #['g1','g2'] hpList = ['%s-%s' % (i1, i2) for i1 in p1List for i2 in p2List] pool.map(self.process, hpList) self.log.info('finish setup %s' % (pend)) return
def run(self, visitRef): """Main entry-point Only the master node runs this method. It will dispatch jobs to the slave nodes. """ pool = Pool("test") # Less overhead to transfer the butler once rather than in each dataRef dataIdList = dict([(ccdRef.get("ccdExposureId"), ccdRef.dataId) for ccdRef in visitRef.subItems("ccd") if ccdRef.datasetExists("raw")]) dataIdList = collections.OrderedDict(sorted(dataIdList.items())) with self.logOperation("master"): total = pool.reduce(operator.add, self.read, list(dataIdList.values()), butler=visitRef.getButler()) self.log.info("Total number of pixels read: %d" % (total, ))
def runDataRef(self, dataRefList): """Process a single Coadd, with scatter-gather-scatter using MPI. """ pool = Pool("all") pool.cacheClear() pool.map(self.runForced, dataRefList)
def runDataRef(self, rawRefList, butler): pool = Pool("visits") pool.cacheClear() pool.storeSet(butler=butler) #Make unique combinations of visit and CCD number: #This 4 needs to be replaced by a config parameter. visitCcdIdList = set() for rawRef in rawRefList: visitCcdIdList.add((rawRef.dataId['visit']<<4)+rawRef.dataId['ccd']) visitCcdIdList = list(visitCcdIdList) #Map visits/ccds out to separate nodes: pool.map(self.runVisit, visitCcdIdList, rawRefList)
def run(self, tractPatchRefList, butler, selectIdList=[]): """!Determine which tracts are non-empty before processing @param tractPatchRefList: List of tracts and patches to include in the coaddition @param butler: butler reference object @param selectIdList: List of data Ids (i.e. visit, ccd) to consider when making the coadd @return list of references to sel.runTract function evaluation for each tractPatchRefList member """ pool = Pool("tracts") pool.storeSet(butler=butler, skymap=butler.get( self.config.coaddName + "Coadd_skyMap")) tractIdList = [] for patchRefList in tractPatchRefList: tractSet = set([patchRef.dataId["tract"] for patchRef in patchRefList]) assert len(tractSet) == 1 tractIdList.append(tractSet.pop()) selectDataList = [data for data in pool.mapNoBalance(self.readSelection, selectIdList) if data is not None] nonEmptyList = pool.mapNoBalance( self.checkTract, tractIdList, selectDataList) tractPatchRefList = [patchRefList for patchRefList, nonEmpty in zip(tractPatchRefList, nonEmptyList) if nonEmpty] self.log.info("Non-empty tracts (%d): %s" % (len(tractPatchRefList), [patchRefList[0].dataId["tract"] for patchRefList in tractPatchRefList])) # Install the dataRef in the selectDataList for data in selectDataList: data.dataRef = getDataRef(butler, data.dataId, "calexp") # Process the non-empty tracts return [self.runTract(patchRefList, butler, selectDataList) for patchRefList in tractPatchRefList]
def runDataRef(self, index): #Prepare the pool pool = Pool("processCosmo") pool.cacheClear() fieldList = np.arange(200 * index, 200 * (index + 1)) pool.map(self.process, fieldList) return
def runDataRef(self, index): self.log.info('begining for group %d' % (index)) #Prepare the storeSet pool = Pool("cgcSimBasicBatch") pool.cacheClear() # expDir = "galaxy_basic_psf60" # expDir = "small0_psf60" #expDir = "galaxy_basic2Center_psf60" expDir = "galaxy_basic2Shift_psf60" if not os.path.isdir(expDir): os.mkdir(expDir) pool.storeSet(expDir=expDir) fieldList = np.arange(200 * index, 200 * (index + 1)) pool.map(self.process, fieldList) return
def runDataRef(self, patchRefList): """!Run multiband processing on coadds Only the master node runs this method. No real MPI communication (scatter/gather) takes place: all I/O goes through the disk. We want the intermediate stages on disk, and the component Tasks are implemented around this, so we just follow suit. @param patchRefList: Data references to run measurement """ print(len(patchRefList)) for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # Group by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) print(patches.values()) dataRefList = [ getDataRef(cache.butler, dataId, self.config.coaddName + "Coadd_calexp") for dataId in patches.values() ] pool.map(self.runAssociation, dataRefList)
def runDataRef(self, Id): self.log.info('beginning group %d' % (Id)) perGroup = self.config.perGroup fMin = perGroup * Id fMax = perGroup * (Id + 1) #Prepare the pool pool = Pool("noiSim") pool.cacheClear() fieldList = range(fMin, fMax) pool.map(self.process, fieldList) self.log.info('finish group %d' % (Id)) return
def runTract(self, patchRefList, butler, selectDataList=[]): """Run stacking on a tract This method only runs on the master node. @param patchRefList: List of patch data references for tract @param butler: Data butler @param selectDataList: List of SelectStruct for inputs """ pool = Pool("stacker") pool.cacheClear() pool.storeSet(butler=butler, warpType=self.config.coaddName + "Coadd_tempExp", coaddType=self.config.coaddName + "Coadd") patchIdList = [patchRef.dataId for patchRef in patchRefList] selectedData = pool.map(self.warp, patchIdList, selectDataList) if self.config.doBackgroundReference: self.backgroundReference.run(patchRefList, selectDataList) refNamer = lambda patchRef: tuple(map(int, patchRef.dataId["patch"].split(","))) lookup = dict(zip(map(refNamer, patchRefList), selectedData)) coaddData = [Struct(patchId=patchRef.dataId, selectDataList=lookup[refNamer(patchRef)]) for patchRef in patchRefList] pool.map(self.coadd, coaddData)
def run(self, expRef): """Measure focus for exposure This method is the top-level for running the focus measurement as a stand-alone BatchPoolTask. Only the master node runs this method. """ pool = Pool("processFocus") pool.cacheClear() pool.storeSet(butler=expRef.getButler()) dataIdList = sorted([ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists("raw") and self.isFocus(ccdRef)]) results = pool.map(self.processPool, dataIdList) camera = expRef.get("camera") plotFilename = expRef.get("focusPlot_filename") focus = self.measureFocus(results, camera, plotFilename) self.log.info("Focus result for %s: %s" % (expRef.dataId, focus)) return focus
def runDataRef(self, patchRefList): """!Run multiband processing on coadds Only the master node runs this method. No real MPI communication (scatter/gather) takes place: all I/O goes through the disk. We want the intermediate stages on disk, and the component Tasks are implemented around this, so we just follow suit. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) # MultiBand measurements require that the detection stage be completed # before measurements can be made. # # The configuration for coaddDriver.py allows detection to be turned # of in the event that fake objects are to be added during the # detection process. This allows the long co-addition process to be # run once, and multiple different MultiBand reruns (with different # fake objects) to exist from the same base co-addition. # # However, we only re-run detection if doDetection is explicitly True # here (this should always be the opposite of coaddDriver.doDetection); # otherwise we have no way to tell reliably whether any detections # present in an input repo are safe to use. if self.config.doDetection: detectionList = [] for patchRef in patchRefList: if ("detectCoaddSources" in self.reuse and patchRef.datasetExists( self.coaddType + "Coadd_calexp", write=True)): self.log.info( "Skipping detectCoaddSources for %s; output already exists." % patchRef.dataId) continue if not patchRef.datasetExists(self.coaddType + "Coadd"): self.log.debug( "Not processing %s; required input %sCoadd missing." % (patchRef.dataId, self.config.coaddName)) continue detectionList.append(patchRef) pool.map(self.runDetection, detectionList) patchRefList = [ patchRef for patchRef in patchRefList if patchRef.datasetExists(self.coaddType + "Coadd_calexp") and patchRef.datasetExists(self.config.coaddName + "Coadd_det", write=self.config.doDetection) ] dataIdList = [patchRef.dataId for patchRef in patchRefList] # Group by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runMergeDetections, patches.values()) # Deblend merged detections, and test for reprocessing # # The reprocessing allows us to have multiple attempts at deblending large footprints. Large # footprints can suck up a lot of memory in the deblender, which means that when we process on a # cluster, we want to refuse to deblend them (they're flagged "deblend.parent-too-big"). But since # they may have astronomically interesting data, we want the ability to go back and reprocess them # with a more permissive configuration when we have more memory or processing time. # # self.runDeblendMerged will return whether there are any footprints in that image that required # reprocessing. We need to convert that list of booleans into a dict mapping the patchId (x,y) to # a boolean. That tells us whether the merge measurement and forced photometry need to be re-run on # a particular patch. # # This determination of which patches need to be reprocessed exists only in memory (the measurements # have been written, clobbering the old ones), so if there was an exception we would lose this # information, leaving things in an inconsistent state (measurements, merged measurements and # forced photometry old). To attempt to preserve this status, we touch a file (dataset named # "deepCoadd_multibandReprocessing") --- if this file exists, we need to re-run the measurements, # merge and forced photometry. # # This is, hopefully, a temporary workaround until we can improve the # deblender. try: reprocessed = pool.map(self.runDeblendMerged, patches.values()) finally: if self.config.reprocessing: patchReprocessing = {} for dataId, reprocess in zip(dataIdList, reprocessed): patchId = dataId["patch"] patchReprocessing[patchId] = patchReprocessing.get( patchId, False) or reprocess # Persist the determination, to make error recover easier reprocessDataset = self.config.coaddName + "Coadd_multibandReprocessing" for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) if patchReprocessing[patchId]: filename = butler.get(reprocessDataset + "_filename", dataId)[0] open(filename, 'a').close() # Touch file elif butler.datasetExists(reprocessDataset, dataId): # We must have failed at some point while reprocessing # and we're starting over patchReprocessing[patchId] = True # Only process patches that have been identifiedz as needing it pool.map(self.runMeasurements, [ dataId1 for dataId1 in dataIdList if not self.config.reprocessing or patchReprocessing[dataId1["patch"]] ]) pool.map(self.runMergeMeasurements, [ idList for patchId, idList in patches.items() if not self.config.reprocessing or patchReprocessing[patchId] ]) pool.map(self.runForcedPhot, [ dataId1 for dataId1 in dataIdList if not self.config.reprocessing or patchReprocessing[dataId1["patch"]] ]) # Remove persisted reprocessing determination if self.config.reprocessing: for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) filename = butler.get(reprocessDataset + "_filename", dataId)[0] os.unlink(filename)
# New context: should have no 'p' fruit = ["tomato", "tomahtoe"] veges = {"potato": "potahtoe"} print(pool2.mapNoBalance(test1, dataList, *fruit, **veges)) print(pool2.mapToPrevious(test2, dataList, *fruit, **veges)) def context3(pool3): # Check cache/store functionality pool3.storeSet(p=1, q=2) print(pool1.map(test1, dataList, "foo", foo="bar")) pool3.storeDel("p") pool3.storeList() pool1.cacheList() pool1.cacheClear() pool3.storeClear() pool3.storeList() pool1 = Pool(1) context1(pool1) pool2 = Pool(2) context2(pool2) pool3 = Pool(3) context3(pool3) Pool().exit() # This is important, to bring everything down nicely; or the wheels will just keep turning # Can do stuff here, just not use any MPI because the slaves have exited. # If you want the slaves, then pass "killSlaves=False" to startPool(); they'll emerge after startPool(). print("Done.")
def runDataRef(self, expRef): """Perform sky correction on an exposure We restore the original sky, and remove it again using multiple algorithms. We optionally apply: 1. A large-scale background model. This step removes very-large-scale sky such as moonlight. 2. A sky frame. 3. A medium-scale background model. This step removes residual sky (This is smooth on the focal plane). Only the master node executes this method. The data is held on the slave nodes, which do all the hard work. Parameters ---------- expRef : `lsst.daf.persistence.ButlerDataRef` Data reference for exposure. See Also -------- ~lsst.pipe.drivers.SkyCorrectionTask.run """ if DEBUG: extension = "-%(visit)d.fits" % expRef.dataId with self.logOperation("processing %s" % (expRef.dataId,)): pool = Pool() pool.cacheClear() pool.storeSet(butler=expRef.getButler()) camera = expRef.get("camera") dataIdList = [ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists(self.config.calexpType)] exposures = pool.map(self.loadImage, dataIdList) if DEBUG: makeCameraImage(camera, exposures, "restored" + extension) exposures = pool.mapToPrevious(self.collectOriginal, dataIdList) makeCameraImage(camera, exposures, "original" + extension) exposures = pool.mapToPrevious(self.collectMask, dataIdList) makeCameraImage(camera, exposures, "mask" + extension) if self.config.doBgModel: exposures = self.focalPlaneBackground(camera, pool, dataIdList, self.config.bgModel) if self.config.doSky: measScales = pool.mapToPrevious(self.measureSkyFrame, dataIdList) scale = self.sky.solveScales(measScales) self.log.info("Sky frame scale: %s" % (scale,)) exposures = pool.mapToPrevious(self.subtractSkyFrame, dataIdList, scale) if DEBUG: makeCameraImage(camera, exposures, "skysub" + extension) calibs = pool.mapToPrevious(self.collectSky, dataIdList) makeCameraImage(camera, calibs, "sky" + extension) if self.config.doBgModel2: exposures = self.focalPlaneBackground(camera, pool, dataIdList, self.config.bgModel2) # Persist camera-level image of calexp image = makeCameraImage(camera, exposures) expRef.put(image, "calexp_camera") pool.mapToPrevious(self.write, dataIdList)
def run(self, expRef): """Perform sky correction on an exposure We restore the original sky, and remove it again using multiple algorithms. We optionally apply: 1. A large-scale background model. 2. A sky frame. Only the master node executes this method. The data is held on the slave nodes, which do all the hard work. Parameters ---------- expRef : `lsst.daf.persistence.ButlerDataRef` Data reference for exposure. """ if DEBUG: extension = "-%(visit)d.fits" % expRef.dataId with self.logOperation("processing %s" % (expRef.dataId, )): pool = Pool() pool.cacheClear() pool.storeSet(butler=expRef.getButler()) camera = expRef.get("camera") dataIdList = [ ccdRef.dataId for ccdRef in expRef.subItems("ccd") if ccdRef.datasetExists("calexp") ] exposures = pool.map(self.loadImage, dataIdList) if DEBUG: makeCameraImage(camera, exposures, "restored" + extension) exposures = pool.mapToPrevious(self.collectOriginal, dataIdList) makeCameraImage(camera, exposures, "original" + extension) exposures = pool.mapToPrevious(self.collectMask, dataIdList) makeCameraImage(camera, exposures, "mask" + extension) if self.config.doBgModel: bgModel = FocalPlaneBackground.fromCamera( self.config.bgModel, camera) data = [ Struct(dataId=dataId, bgModel=bgModel.clone()) for dataId in dataIdList ] bgModelList = pool.mapToPrevious(self.accumulateModel, data) for ii, bg in enumerate(bgModelList): self.log.info("Background %d: %d pixels", ii, bg._numbers.getArray().sum()) bgModel.merge(bg) if DEBUG: bgModel.getStatsImage().writeFits("bgModel" + extension) bgImages = pool.mapToPrevious(self.realiseModel, dataIdList, bgModel) makeCameraImage(camera, bgImages, "bgModelCamera" + extension) exposures = pool.mapToPrevious(self.subtractModel, dataIdList, bgModel) if DEBUG: makeCameraImage(camera, exposures, "modelsub" + extension) if self.config.doSky: measScales = pool.mapToPrevious(self.measureSkyFrame, dataIdList) scale = self.sky.solveScales(measScales) self.log.info("Sky frame scale: %s" % (scale, )) exposures = pool.mapToPrevious(self.subtractSkyFrame, dataIdList, scale) if DEBUG: makeCameraImage(camera, exposures, "skysub" + extension) calibs = pool.mapToPrevious(self.collectSky, dataIdList) makeCameraImage(camera, calibs, "sky" + extension) # Persist camera-level image of calexp image = makeCameraImage(camera, exposures) expRef.put(image, "calexp_camera") pool.mapToPrevious(self.write, dataIdList)
def runDataRef(self, index): #Prepare the pool pool = Pool("processBasic") pool.cacheClear() pool.storeSet(doHSM=self.config.doHSM) pool.storeSet(doFPFS=self.config.doFPFS) pool.storeSet(galDir=self.config.galDir) pool.storeSet(outDir=self.config.outDir) fieldList = np.arange(100 * index, 100 * (index + 1)) pool.map(self.process, fieldList) return
def run(self, patchRefList): """Run multiband processing on coadds All nodes execute this method, though the master and slaves take different routes through it. No real MPI communication takes place: all I/O goes through the disk. We want the intermediate stages on disk, and the component Tasks are implemented around this, so we just follow suit. @param patchRefList: Data references to run measurement """ for patchRef in patchRefList: if patchRef: butler = patchRef.getButler() break else: raise RuntimeError("No valid patches") pool = Pool("all") pool.cacheClear() pool.storeSet(butler=butler) patchRefList = [patchRef for patchRef in patchRefList if patchRef.datasetExists(self.config.coaddName + "Coadd") and patchRef.datasetExists(self.config.coaddName + "Coadd_det")] dataIdList = [patchRef.dataId for patchRef in patchRefList] # Group by patch patches = {} tract = None for patchRef in patchRefList: dataId = patchRef.dataId if tract is None: tract = dataId["tract"] else: assert tract == dataId["tract"] patch = dataId["patch"] if patch not in patches: patches[patch] = [] patches[patch].append(dataId) pool.map(self.runMergeDetections, patches.values()) # Measure merged detections, and test for reprocessing # # The reprocessing allows us to have multiple attempts at deblending large footprints. Large # footprints can suck up a lot of memory in the deblender, which means that when we process on a # cluster, we want to refuse to deblend them (they're flagged "deblend.parent-too-big"). But since # they may have astronomically interesting data, we want the ability to go back and reprocess them # with a more permissive configuration when we have more memory or processing time. # # self.runMeasureMerged will return whether there are any footprints in that image that required # reprocessing. We need to convert that list of booleans into a dict mapping the patchId (x,y) to # a boolean. That tells us whether the merge measurement and forced photometry need to be re-run on # a particular patch. # # This determination of which patches need to be reprocessed exists only in memory (the measurements # have been written, clobbering the old ones), so if there was an exception we would lose this # information, leaving things in an inconsistent state (measurements new, but merged measurements and # forced photometry old). To attempt to preserve this status, we touch a file (dataset named # "deepCoadd_multibandReprocessing") --- if this file exists, we need to re-run the merge and # forced photometry. # # This is, hopefully, a temporary workaround until we can improve the deblender. try: reprocessed = pool.map(self.runMeasureMerged, dataIdList) finally: if self.config.reprocessing: patchReprocessing = {} for dataId, reprocess in zip(dataIdList, reprocessed): patchId = dataId["patch"] patchReprocessing[patchId] = patchReprocessing.get(patchId, False) or reprocess # Persist the determination, to make error recover easier reprocessDataset = self.config.coaddName + "Coadd_multibandReprocessing" for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) if patchReprocessing[patchId]: filename = butler.get(reprocessDataset + "_filename", dataId)[0] open(filename, 'a').close() # Touch file elif butler.datasetExists(reprocessDataset, dataId): # We must have failed at some point while reprocessing and we're starting over patchReprocessing[patchId] = True # Only process patches that have been identified as needing it pool.map(self.runMergeMeasurements, [idList for patchId, idList in patches.iteritems() if not self.config.reprocessing or patchReprocessing[patchId]]) pool.map(self.runForcedPhot, [dataId for dataId in dataIdList if not self.config.reprocessing or patchReprocessing[dataId["patch"]]]) # Remove persisted reprocessing determination if self.config.reprocessing: for patchId in patchReprocessing: if not patchReprocessing[patchId]: continue dataId = dict(tract=tract, patch=patchId) filename = butler.get(reprocessDataset + "_filename", dataId)[0] os.unlink(filename)
def runDataRef(self, pend): psfFWHM = '60' #'60','HSC' npend = 'outCosmoR-var36em4' outDir = os.path.join(self.config.rootDir, npend, 'mag245-res03-bm38-dis4') if not os.path.isdir(outDir): os.mkdir(outDir) self.log.info('beginning for %s, seeing %s: ' % (pend, psfFWHM)) #Prepare the storeSet pool = Pool("reGaussCosmoMeasBatch") pool.cacheClear() pool.storeSet(pend=pend) pool.storeSet(psfFWHM=psfFWHM) pool.storeSet(npend=npend) #Prepare the pool resList = pool.map(self.process, np.arange(1000)) resList = [x for x in resList if x is not None] if len(resList) > 1: newTab = Table(rows=resList,names=('e1_z1','n_z1',\ 'e1_z2','n_z2','e1_z3','n_z3','e1_z4','n_z4')) finOname = os.path.join(outDir, 'e1_%s_psf%s.fits' % (pend, psfFWHM)) newTab.write(finOname, overwrite=True) return