def testRedir(self): """ Test redirection to stream. """ with TestRedir.StdoutCapture(self.outputFilename): log.configure() dest = io.StringIO() log_utils.enable_notebook_logging(dest) log.log(log.getDefaultLogger().getName(), log.INFO, "This is INFO") log.info(u"This is unicode INFO") log.trace("This is TRACE") log.debug("This is DEBUG") log.warn("This is WARN") log.error("This is ERROR") log.fatal("This is FATAL") log_utils.disable_notebook_logging() log.warn("Format %d %g %s", 3, 2.71828, "foo") self.assertEqual( dest.getvalue(), """root INFO: This is INFO root INFO: This is unicode INFO root WARN: This is WARN root ERROR: This is ERROR root FATAL: This is FATAL """, ) self.check( """ root WARN: Format 3 2.71828 foo """ )
def testForwardToPython(self): """Test that `lsst.log` log messages can be forwarded to `logging`.""" log.configure() # Without forwarding we only get python logger messages captured with self.assertLogs(level="WARNING") as cm: log.warn("lsst.log warning message that will not be forwarded to Python") logging.warning("Python logging message that will be captured") self.assertEqual(len(cm.output), 1) log.usePythonLogging() # With forwarding we get 2 logging messages captured with self.assertLogs(level="WARNING") as cm: log.warn("This is a warning from lsst log meant for python logging") logging.warning("Python warning log message to be captured") self.assertEqual(len(cm.output), 2) loggername = "newlogger" log2 = log.Log.getLogger(loggername) with self.assertLogs(level="INFO", logger=loggername): log2.info("Info message to non-root lsst logger") # Check that debug and info are working properly # This test should return a single log message with self.assertLogs(level="INFO", logger=loggername) as cm: log2.info("Second INFO message to non-root lsst logger") log.debug("Debug message to root lsst logger") self.assertEqual(len(cm.output), 1, f"Got output: {cm.output}") logging.shutdown()
def run(self): log.debug("VanillaCondorWorkflowMonitor Thread started") sleepInterval = 5 # we don't decide when we finish, someone else does. while True: # TODO: this timeout value should go away when the GIL lock relinquish is implemented in events. if sleepInterval != 0: time.sleep(sleepInterval) event = self._receiver.receiveEvent(1) logEvent = self._Logreceiver.receiveEvent(1) jobOfficeEvent = self._jobOfficeReceiver.receiveEvent(1) if jobOfficeEvent is not None: val = self._parent.handleJobOfficeEvent(jobOfficeEvent) if event is not None: val = self._parent.handleEvent(event) if self._parent._locked.running == False: print "and...done!" return elif logEvent is not None: val = self._parent.handleEvent(logEvent) if self._parent._locked.running == False: print "logger handled... and... done!" return if (jobOfficeEvent is not None) or (jobOfficeEvent is not None) or (jobOfficeEvent is not None): sleepInterval = 0 else: sleepInterval = 5
def submitJob(self, condorFile): """Submit a condor file, and return the job number associated with it. Parameters ---------- condorFile: `str` condor submit file. Notes ----- expected output: Submitting job(s). Logging submit event(s). 1 job(s) submitted to cluster 1317. """ log.debug("CondorJobs:submitJob") clusterexp = re.compile(r"1 job\(s\) submitted to cluster (\d+).") submitRequest = "condor_submit %s" % condorFile pop = os.popen(submitRequest, "r") line = pop.readline() line = pop.readline() line = line.decode() num = clusterexp.findall(line) if len(num) == 0: return None print("submitted job # %s as file %s" % (num[0], condorFile)) return num[0]
def _cutout_from_src(self, data_id, src_image, xy_center_x, xy_center_y, width, height, wcs): # Returns an image cutout from the source image. # srcImage - Source image. # xy_center - The center of region to cutout in pixels. # width - The width in pixels. # height - The height in pixels. # height and width to be trimmed if they go past the edge of source image. # First, center the cutout image. pix_ulx = int(xy_center_x - width / 2.0) pix_uly = int(xy_center_y - height / 2.0) xy_center = afw_geom.Point2I(pix_ulx, pix_uly) log.debug("xy_center={}".format(xy_center)) src_box = src_image.getBBox() # assuming both src_box and xy_center to be in Box2I co_box = afw_geom.Box2I(xy_center, afw_geom.Extent2I(int(width), int(height))) if co_box.overlaps(src_box): co_box.clip(src_box) else: self._log.debug( "cutout image wanted is OUTSIDE source image -> None") return None if isinstance(src_image, afw_image.ExposureF): self._log.debug( "co_box pix_ulx={} pix_end_x={} pix_uly={} pix_end_y={}". format(pix_ulx, pix_ulx + width, pix_uly, pix_uly + height)) # image will keep wcs from source image image = afw_image.ExposureF(src_image, co_box) elif isinstance(src_image, afw_image.ExposureU): image = afw_image.ExposureU(src_image, co_box) else: raise Exception("Unexpected source image object type") return image
def _cutoutbox_pixels(self, src_image, xy_center_x, xy_center_y, width, height, wcs, log): # Returns an image cutout from the source image. # srcImage - Source image. # xy_center - The center of region to cutout in pixels. # width - The width in pixels. # height - The height in pixels. # height and width will be trimmed if they go past the edge of the source image. # First, center the cutout image pix_ulx = int(xy_center_x - width / 2.0) pix_uly = int(xy_center_y - height / 2.0) xy_center = afw_geom.Point2I(pix_ulx, pix_uly) log.debug("xy_center={}".format(xy_center)) src_box = src_image.getBBox() # assuming both src_box and xy_center to be in Box2I co_box = afw_geom.Box2I(xy_center, afw_geom.Extent2I(int(width), int(height))) co_box.clip(src_box) if co_box.isEmpty(): return None pix_ulx = co_box.getBeginX() pix_end_x = co_box.getEndX() pix_uly = co_box.getBeginY() pix_end_y = co_box.getEndY() log.debug( "co_box pix_ulx={} pix_end_x={} pix_uly={} pix_end_y={}".format( pix_ulx, pix_end_x, pix_uly, pix_end_y)) if isinstance(src_image, afw_image.ExposureF): img = afw_image.ExposureF(src_image, co_box) img.setWcs(wcs) else: # for non-ExposureF, e.g. raw (DecoratedImage) img = src_image[pix_ulx:pix_end_x, pix_uly:pix_end_y].clone() return img
def _image_cutout(_request, image_db_class, units): """Get a raw image response from based on imput parameters. image_db_class should be the appropriate class (W13DeepCoadDb, W13RawDb, etc.) units should be 'pixel' or 'arcsecond' """ ra = _request.args.get('ra') dec = _request.args.get('dec') filter = _request.args.get('filter') width = _request.args.get('width') height = _request.args.get('height') # check inputs try: ra, dec, filter = _assert_ra_dec_filter(ra, dec, filter, 'irg') try: width = float(width) height = float(height) except ValueError: msg = "INVALID_INPUT width={} height={}".format(width, height) raise ValueError(msg) except ValueError as e: return _error(ValueError.__name__, e.args[0], BAD_REQUEST) log.info("raw cutout pixel ra={} dec={} filter={} width={} height={}".format( ra, dec, filter, width, height)) # fetch the image here img_getter = image_open(current_app.config["DAX_IMG_DBCONF"], image_db_class) img = img_getter.image_cutout(ra, dec, filter, width, height, units) if img is None: return _image_not_found() log.debug("Sub w={} h={}".format(img.getWidth(), img.getHeight())) return _file_response(img, "cutout.fits")
def _image_cutout_from_science_id(_request, image_db_class, science_id): """Get cutout image from the id given. image_db_class should be the appropriate class (W13CalexpDb, W13DeepCoadDb, W13RawDb, etc.) Units: arcsecond, pixel (request parameters) """ # fetch the interested parameters # Only one of (widthAng, heightAng),(widthPix, heightPix) should be valid params = ['ra', 'dec', 'widthAng', 'heightAng', 'widthPix', 'heightPix'] ra, dec, widthAng, heightAng, widthPix, heightPix = [_request.args.get(p) for p in params] try: if widthAng is not None and heightAng is not None: sId, ra, dec, width, height, units = _assert_cutout_parameters( science_id, ra, dec, widthAng, heightAng, 'arcsecond') elif widthPix is not None and heightPix is not None: sId, ra, dec, width, height, units = _assert_cutout_parameters( science_id, ra, dec, widthPix, heightPix, 'pixel') else: msg = "INVALID_INPUT no dimensions for cutout specified" raise ValueError(msg) except ValueError as e: return _error(ValueError.__name__, e.args[0], BAD_REQUEST) # fetch the image here img_getter = image_open(current_app.config["DAX_IMG_DBCONF"], image_db_class) # need to pass the source science id as string img = img_getter.imagecutout_from_science_id(science_id, ra, dec, width, height, units) if img is None: return _image_not_found() log.debug("Sub w={} h={}".format(img.getWidth(), img.getHeight())) return _file_response(img, "cutout.fits")
def checkConfiguration(self, care=1, issueExc=None): """Carry out production-wide configuration checks. Parameters ---------- care : `int` throughness level of the checks issueExc : `MultiIssueConfigurationError` an instance of MultiIssueConfigurationError to add problems to Raises ------ `MultiIssueConfigurationError` If issueExc is None, and a configuration error is detected. Notes ----- If issueExc is not None, this method will not raise an exception when problems are encountered; they will merely be added to the instance. It is assumed that the caller will raise the exception as necessary. """ log.debug("checkConfiguration") myProblems = issueExc if myProblems is None: myProblems = MultiIssueConfigurationError( "problems encountered while checking configuration") for dbconfig in self._databaseConfigurators: print("-> dbconfig = ", dbconfig) dbconfig.checkConfiguration(care, issueExc) if not issueExc and myProblems.hasProblems(): raise myProblems
def createConfigurator(self, runid, configFile): """Create the ProductionRunConfigurator specified in the config file Parameters ---------- runid : `str` run id configFile: `Config` Config file containing which ProductinRunConfigurator to create Returns ------- Initialized ProductionRunConfigurator of the type specified in configFile """ log.debug("ProductionRunManager:createConfigurator") configuratorClass = ProductionRunConfigurator configuratorClassName = None if self.config.configurationClass is not None: configuratorClassName = self.config.configurationClass if configuratorClassName is not None: classFactory = NamedClassFactory() configuratorClass = classFactory.createClass(configuratorClassName) return configuratorClass(runid, configFile, self.repository)
def launch(self, statusListener): """Launch this workflow Parameters ---------- statusListener : StatusListener status listener object """ log.debug("CondorWorkflowLauncher:launch") # start the monitor # Launch process startDir = os.getcwd() os.chdir(self.localStagingDir) cj = CondorJobs() condorDagId = cj.condorSubmitDag(self.dagFile) log.debug("Condor dag submitted as job %s", condorDagId) os.chdir(startDir) # workflow monitor for HTCondor jobs self.workflowMonitor = CondorWorkflowMonitor(condorDagId, self.monitorConfig) if statusListener is not None: self.workflowMonitor.addStatusListener(statusListener) self.workflowMonitor.startMonitorThread() return self.workflowMonitor
def remoteChmodX(self, remoteName): log.debug("VanillaCondorWorkflowConfigurator:remoteChmodX") cmd = "gsissh %s chmod +x %s" % (self.remoteLoginName, remoteName) pid = os.fork() if not pid: os.execvp("gsissh",cmd.split()) os.wait()[0]
def copyToRemote(self, localName, remoteName): log.debug("VanillaCondorWorkflowConfigurator:copyToRemote") localNameURL = "%s%s" % ("file://",localName) remoteNameURL = "%s%s" % (self.transferProtocolPrefix, remoteName) cmd = "globus-url-copy -r -vb -cd %s %s " % (localNameURL, remoteNameURL) print cmd # perform this copy from the local machine to the remote machine pid = os.fork() if not pid: # when forking stuff, gotta close *BOTH* the python and C level # file descriptors. not strictly needed here, since we're just # shutting off stdout and stderr, but a good habit to be in. # TODO: Change this to add a check to not close file descriptors # if verbosity is set high enough, so you can see the output of the # globus-url-copy sys.stdin.close() sys.stdout.close() sys.stderr.close() os.close(0) os.close(1) os.close(2) os.execvp("globus-url-copy",cmd.split()) os.wait()[0]
def run(self): """Set Manager and serve requests until complete. """ self.server.setManager(self._parent) self.server.serve() log.debug("Everything shutdown - All finished")
def createConfigurator(self, runid, repository, wfName, wfConfig, prodConfig): """Create a Workflow configurator for this workflow. Parameters ---------- runid : `str` the production run id repository : `str` the directory location of the repository wfName : `str` the workflow name wfConfig : Config the config describing the workflow prodConfig : Config the config describing the overall production. This provides common data that needs to be shared with all pipelines. Returns ------- WorkflowConfigurator """ log.debug("WorkflowManager:createConfigurator") className = wfConfig.configurationClass classFactory = NamedClassFactory() configuratorClass = classFactory.createClass(className) configurator = configuratorClass(self.runid, repository, prodConfig, wfConfig, wfName) return configurator
def __init__(self, name, runid, repository, prodConfig, wfConfig): # _locked: a container for data to be shared across threads that # have access to this object. self._locked = SharedData(False) ## workflow name self.name = "unnamed" if name != None: self.name = name ## run id of this workflow self.runid = runid ## repository where the configuration is kept self.repository = repository ## workflow configuration self.wfConfig = wfConfig ## production configuration self.prodConfig = prodConfig self._workflowConfigurator = None log.debug("WorkflowManager:__init__") ## the urgency level of how fast to stop the workflow self.urgency = 0 self._launcher = None self._monitor = None
def checkConfiguration(self, care=1, issueExc=None): # care - level of "care" in checking the configuration to take. In # general, the higher the number, the more checks that are made. log.debug("checkConfiguration") if not self._workflowManagers: msg = "%s: production has not been configured yet" % self.runid if self._name: msg = "%s %s" % (self._name, msg) if issueExc is None: raise ConfigurationError(msg) else: issueExc.addProblem(msg) return myProblems = issueExc if myProblems is None: myProblems = MultiIssueConfigurationError("problems encountered while checking configuration") # check production-wide configuration self._productionRunConfigurator.checkConfiguration(care, myProblems) # check configuration for each workflow for workflow in self._workflowManagers["__order"]: workflowMgr = self._workflowManagers[workflow] workflowMgr.checkConfiguration(care, myProblems) if not issueExc and myProblems.hasProblems(): raise myProblems
def createDatabaseConfigurator(self, databaseConfig): log.debug("ProductionRunConfigurator:createDatabaseConfigurator") className = databaseConfig.configurationClass classFactory = NamedClassFactory() configurationClass = classFactory.createClass(className) configurator = configurationClass(self.runid, databaseConfig, self.prodConfig, None) return configurator
def waitForFirstFile(self): log.debug("GenericFileWaiter:waitForFirstFile") print "waiting for log file to be created to confirm launch." while os.path.exists(self.fileNames[0]) == False: time.sleep(1) return
def configure(self, workflowVerbosity): """Configure this production run Parameters ---------- workflowVerbosity : `int` verbosity level of the workflows Returns ------- mgrs : [ wfMgr1, wfMgr2 ] list of workflow managers, one per workflow """ log.debug("ProductionRunConfigurator:configure") # TODO - IMPORTANT - NEXT TWO LINES ARE FOR PROVENANCE # -------------- # self._provSetup = ProvenanceSetup() # self._provSetup.addAllProductionConfigFiles(self._prodConfigFile, self.repository) # -------------- # # setup the database for each database listed in production config. # cache the configurators in case we want to check the configuration # later. # databaseConfigs = self.prodConfig.database for databaseName in databaseConfigs: databaseConfig = databaseConfigs[databaseName] cfg = self.createDatabaseConfigurator(databaseConfig) cfg.setup(self._provSetup) self._databaseConfigurators.append(cfg) # # do specialized production level configuration, if it exists # if self.prodConfig.production.configuration.configurationClass is not None: specialConfigurationConfig = self.prodConfig.production.configuration # XXX - specialConfigurationConfig maybe? self.specializedConfigure(specialConfigurationConfig) workflowConfigs = self.prodConfig.workflow workflowManagers = [] for wfName in workflowConfigs: wfConfig = workflowConfigs[wfName] # copy in appropriate production level info into workflow Node -- ? workflowManager = self.createWorkflowManager( self.prodConfig, wfName, wfConfig) workflowLauncher = workflowManager.configure( self._provSetup, workflowVerbosity) if workflowLauncher is None: raise MultiIssueConfigurationError( "error configuring workflowLauncher") workflowManagers.append(workflowManager) return workflowManagers
def __init__(self, runid, repository, prodConfig, wfConfig, wfName): log.debug("GenericPipelineWorkflowConfigurator:__init__") ## run id for this workflow self.runid = runid ## production configuration self.prodConfig = prodConfig ## workflow configuration self.wfConfig = wfConfig ## workflow name self.wfName = wfName ## repository location self.repository = repository ## workflow logging verbosity level self.wfVerbosity = None ## nodes used in this workflow self.nodes = None ## directories specified self.directories = None ## @deprecated directories specified self.dirs = None ## the default directory used at the beginning of the run self.defaultRunDir = None ## list of log files self.logFileNames = [] ## list of pipeline names self.pipelineNames = [] ## host name of the event broker self.eventBrokerHost = None
def configure(self, provSetup=None, workflowVerbosity=None): """Prepare a workflow for launching Parameters ---------- provSetup : `object` A provenance setup object to pass to Configurator instances. workflowVerbosity : `int` The logging verbosity level to set for workflows Returns ------- WorkflowLauncher """ log.debug("WorkflowManager:configure") if self._workflowConfigurator: log.info("production has already been configured.") return # lock this branch of code try: self._locked.acquire() self._workflowConfigurator = self.createConfigurator( self.runid, self.repository, self.name, self.wfConfig, self.prodConfig) self._workflowLauncher = self._workflowConfigurator.configure(provSetup, workflowVerbosity) finally: self._locked.release() # do specialized workflow level configuration here, this may include # calling ProvenanceSetup.getWorkflowCommands() return self._workflowLauncher
def waitForAllJobsToRun(self, numList): log.debug("CondorJobs:waitForAllJobsToRun") queueExp = re.compile("\S+") jobList = list(numList) while 1: pop = os.popen("condor_q", "r") while 1: line = pop.readline() if not line: break values = queueExp.findall(line) if len(values) == 0: continue jobNum = values[0] runstate = values[5] for jobEntry in jobList: jobId = "%s.0" % jobEntry if (jobNum == jobId) and (runstate == 'R'): jobList = [job for job in jobList if job[:] != jobEntry] if len(jobList) == 0: return break else: continue if (jobNum == jobEntry) and (runstate == 'H'): pop.close() # throw exception here return pop.close() time.sleep(1)
def deployData(self, wfConfig): log.debug("GenericPipelineWorkflowConfigurator:deployData") # add data deploy here if wfConfig.configuration["generic"] != None: configuration = wfConfig.configuration["generic"] if configuration.deployData != None: deployConfig = configuration.deployData dataRepository = deployConfig.dataRepository dataRepository = EnvString.resolve(dataRepository) deployScript = deployConfig.script deployScript = EnvString.resolve(deployScript) collection = deployConfig.collection if os.path.isfile(deployScript) == True: runDir = self.directories.getDefaultRunDir() deployCmd = [deployScript, runDir, dataRepository, collection] print ">>> ",deployCmd pid = os.fork() if not pid: os.execvp(deployCmd[0], deployCmd) os.wait()[0] else: log.debug("GenericPipelineWorkflowConfigurator:deployData: warning: script '%s' doesn't exist" % deployScript)
def __init__(self): # _locked: a container for data to be shared across threads that # have access to this object. self._locked = SharedData.SharedData(False, {"running": False, "done": False}) log.debug("WorkflowMonitor:__init__") self._statusListeners = []
def initAuthInfo(self, dbConfig): host = dbConfig.system.authInfo.host if host == None: raise RuntimeError("database host must be specified in config") port = dbConfig.system.authInfo.port if port == None: raise RuntimeError("database port must be specified in config") dbAuthFile = os.path.join(os.environ["HOME"], ".lsst/db-auth.py") authConfig = AuthConfig() authConfig.load(dbAuthFile) #authInfo = authConfig.database.authInfo #print "authInfo = ",authInfo #authNames = authConfig.database.authInfo.active for authName in authConfig.database.authInfo: auth = authConfig.database.authInfo[authName] #print "authName = ",authName #print "auth = ",auth if (auth.host == host) and (auth.port == port): log.debug("using host %s at port %d" % (host, port)) ## database host name self.dbHost = auth.host ## database server port number self.dbPort = auth.port ## database user name self.dbUser = auth.user ## database authentication self.dbPassword = auth.password return raise RuntimeError("couldn't find any matching authorization for host %s and port %d " % (host, port))
def testForwardToPython(self): """Test that `lsst.log` log messages can be forwarded to `logging`.""" log.configure() # Without forwarding we only get python logger messages captured with self.assertLogs(level="WARNING") as cm: log.warn( "lsst.log warning message that will not be forwarded to Python" ) logging.warning("Python logging message that will be captured") self.assertEqual(len(cm.output), 1) log.usePythonLogging() # With forwarding we get 2 logging messages captured with self.assertLogs(level="WARNING") as cm: log.warn( "This is a warning from lsst log meant for python logging") logging.warning("Python warning log message to be captured") self.assertEqual(len(cm.output), 2) loggername = "newlogger" log2 = log.Log.getLogger(loggername) with self.assertLogs(level="INFO", logger=loggername): log2.info("Info message to non-root lsst logger") # Check that debug and info are working properly # This test should return a single log message with self.assertLogs(level="INFO", logger=loggername) as cm: log2.info("Second INFO message to non-root lsst logger") log.debug("Debug message to root lsst logger") self.assertEqual(len(cm.output), 1, f"Got output: {cm.output}") logging.shutdown()
def testBasic(self): """ Test basic log output with default configuration. Since the default threshold is INFO, the DEBUG or TRACE message is not emitted. """ with TestLog.StdoutCapture(self.outputFilename): log.configure() log.log(log.getDefaultLogger(), log.INFO, "This is INFO") log.info(u"This is unicode INFO") log.trace("This is TRACE") log.debug("This is DEBUG") log.warn("This is WARN") log.error("This is ERROR") log.fatal("This is FATAL") log.critical("This is CRITICAL") log.warning("Format %d %g %s", 3, 2.71828, "foo") self.check(""" root INFO: This is INFO root INFO: This is unicode INFO root WARN: This is WARN root ERROR: This is ERROR root FATAL: This is FATAL root FATAL: This is CRITICAL root WARN: Format 3 2.71828 foo """)
def checkConfiguration(self, care=1, issueExc=None): """Carry out production-wide configuration checks. Parameters ---------- care : `int` throughness level of the checks issueExc : `MultiIssueConfigurationError` an instance of MultiIssueConfigurationError to add problems to Raises ------ `MultiIssueConfigurationError` If issueExc is None, and a configuration error is detected. Notes ----- If issueExc is not None, this method will not raise an exception when problems are encountered; they will merely be added to the instance. It is assumed that the caller will raise the exception as necessary. """ log.debug("checkConfiguration") myProblems = issueExc if myProblems is None: myProblems = MultiIssueConfigurationError("problems encountered while checking configuration") for dbconfig in self._databaseConfigurators: print("-> dbconfig = ", dbconfig) dbconfig.checkConfiguration(care, issueExc) if not issueExc and myProblems.hasProblems(): raise myProblems
def launch(self, statusListener, loggerManagers): log.debug("CondorWorkflowLauncher:launch") # start the monitor first, because we want to catch any pipeline # events that might be sent from expiring pipelines. eventBrokerHost = self.prodConfig.production.eventBrokerHost shutdownTopic = self.prodConfig.production.productionShutdownTopic # Launch process startDir = os.getcwd() os.chdir(self.localStagingDir) cj = CondorJobs() condorDagId = cj.condorSubmitDag(self.dagFile) print "Condor dag submitted as job ",condorDagId os.chdir(startDir) ## workflow monitor for HTCondor jobs self.workflowMonitor = CondorWorkflowMonitor(eventBrokerHost, shutdownTopic, self.runid, condorDagId, loggerManagers, self.monitorConfig) if statusListener != None: self.workflowMonitor.addStatusListener(statusListener) self.workflowMonitor.startMonitorThread(self.runid) return self.workflowMonitor
def condorSubmitDag(self, filename): log.debug("CondorJobs: submitCondorDag "+filename) # Just a note about why this was done this way... # There's something wierd about how "condor_submit_dag" prints it's output. # If you run it on the command line, it'll print the "1 job(s) submitted" # message as one of the last lines of output. # If you redirect output, even on the command line, to a file, it will # be one of the first lines. # In an effort to avoid having to fix any output behavior issues in the # future, we just try and match every line of output with "1 jobs(s) submitted" # and if we find, it, we grab the cluster id out of that line. clusterexp = re.compile("1 job\(s\) submitted to cluster (\d+).") cmd = "condor_submit_dag %s" % filename print cmd process = subprocess.Popen(cmd.split(), shell=False, stdout=subprocess.PIPE) output = [] line = process.stdout.readline() i = 0 while line != "": line = line.strip() output.append(line) line = process.stdout.readline() i += 1 for line in output: num = clusterexp.findall(line) if len(num) != 0: # read the rest (if any) and terminate stdoutdata, stderrdata = process.communicate() return num[0] # read the rest (if any) and terminate stdoutdata, stderrdata = process.communicate() return -1
def runApVerify(cmdLine=None): """Execute the AP pipeline while handling metrics. This is the main function for ``ap_verify``, and handles logging, command-line argument parsing, pipeline execution, and metrics generation. Parameters ---------- cmdLine : `list` of `str` an optional command line used to execute `runApVerify` from other Python code. If `None`, `sys.argv` will be used. """ lsst.log.configure() log = lsst.log.Log.getLogger('ap.verify.ap_verify.main') # TODO: what is LSST's policy on exceptions escaping into main()? args = _ApVerifyParser().parse_args(args=cmdLine) log.debug('Command-line arguments: %s', args) workspace = Workspace(args.output) ingestDataset(args.dataset, workspace) log.info('Running pipeline...') expandedDataIds = runApPipe(workspace, args) computeMetrics(workspace, expandedDataIds, args)
def _scanHdus(self, filename, detectorId): """Scan through a file for the HDU containing data from one detector. Parameters ---------- filename : `str` The file to search through. detectorId : `int` The detector id to search for. Returns ------- index : `int` The index of the HDU with the requested data. metadata: `lsst.daf.base.PropertyList` The metadata read from the header for that detector id. Raises ------ ValueError Raised if detectorId is not found in any of the file HDUs """ log = lsst.log.Log.getLogger("VircamRawFormatter") log.debug("Did not find detector=%s at expected HDU=%s in %s: scanning through all HDUs.", detectorId, detectorId, detector_to_hdu[detectorId], filename) fitsData = lsst.afw.fits.Fits(filename, 'r') # NOTE: The primary header (HDU=0) does not contain detector data. for i in range(1, fitsData.countHdus()): fitsData.setHdu(i) metadata = fitsData.readMetadata() if metadata['ESO DET CHIP NO'] == detectorId: return i, metadata else: raise ValueError(f"Did not find detectorId={detectorId} as CCDNUM in any HDU of {filename}.")
def checkConfiguration(self, care=1, issueExc=None): """Runs checks that ensure that the Workflow has been properly set up. Raises ------ MultiIssueConfigurationError if problems are found Parameters ---------- care : `int` the thoroughness of the checks. In general, a higher number will result in more checks. issueExc : `MultiIssueConfigurationError` An instance of MultiIssueConfigurationError to add problems to. If not None, this function will not raise an exception when problems are encountered; they will merely be added to the instance. It is assumed that the caller will raise that exception is necessary. """ log.debug("WorkflowManager:createConfiguration") myProblems = issueExc if myProblems is None: myProblems = MultiIssueConfigurationError("problems encountered while checking configuration") # do the checks # raise exception if problems found if not issueExc and myProblems.hasProblems(): raise myProblems
def runApVerify(cmdLine=None): """Execute the AP pipeline while handling metrics. This is the main function for ``ap_verify``, and handles logging, command-line argument parsing, pipeline execution, and metrics generation. Parameters ---------- cmdLine : `list` of `str` an optional command line used to execute `runApVerify` from other Python code. If `None`, `sys.argv` will be used. Returns ------- nFailed : `int` The number of data IDs that were not successfully processed, up to 127, or 127 if the task runner framework failed. """ _configure_logger() log = _LOG.getChild('main') # TODO: what is LSST's policy on exceptions escaping into main()? args = _ApVerifyParser().parse_args(args=cmdLine) log.debug('Command-line arguments: %s', args) workspace = WorkspaceGen3(args.output) ingestDatasetGen3(args.dataset, workspace, processes=args.processes) log.info('Running pipeline...') # Gen 3 pipeline includes both AP and metrics return runApPipeGen3(workspace, args, processes=args.processes)
def createDatabaseConfigurator(self, databaseConfig): log.debug("WorkflowConfigurator:createDatabaseConfigurator") className = databaseConfig.configurationClass classFactory = NamedClassFactory() configurationClass = classFactory.createClass(className) configurator = configurationClass(self.runid, databaseConfig) return configurator
def __init__(self, name, runid, repository, prodConfig, wfConfig): # _locked: a container for data to be shared across threads that # have access to this object. self._locked = SharedData.SharedData(False) # workflow name self.name = "unnamed" if name is not None: self.name = name # run id of this workflow self.runid = runid # repository where the configuration is kept self.repository = repository # workflow configuration self.wfConfig = wfConfig # production configuration self.prodConfig = prodConfig self._workflowConfigurator = None log.debug("WorkflowManager:__init__") # the urgency level of how fast to stop the workflow self.urgency = 0 self._launcher = None self._monitor = None
def _getIFull(_request, W13db): ''' Get a full image from the input paramters. W13db should be the appropriate class (W13DeepCoadDb, W13RawDb, etc.) ''' raIn = _request.args.get('ra') decIn = _request.args.get('dec') filt = _request.args.get('filter') # check inputs valid, ra, dec, filt, msg = checkRaDecFilter(raIn, decIn, filt, 'irg') if not valid: # TODO: use HTTP errors DM-1980 resp = "INVALID_INPUT {}".format(msg) return resp log.info("raw ra={} dec={} filt={}".format(ra, dec, filt)) # fetch the image here w13db = dbOpen("~/.lsst/dbAuth-dbServ.ini", W13db) imgFull = w13db.getImageFull(ra, dec) if imgFull is None: return _imageNotFound() log.debug("Full w=%d h=%d", imgFull.getWidth(), imgFull.getHeight()) tmpPath = tempfile.mkdtemp() fileName = os.path.join(tmpPath, "fullImage.fits") log.info("temporary fileName=%s", fileName) imgFull.writeFits(fileName) w13db.closeConnection() resp = responseFile(fileName) os.remove(fileName) os.removedirs(tmpPath) return resp
def launch(self, statusListener): """Launch this workflow Parameters ---------- statusListener : StatusListener status listener object """ log.debug("PegasusWorkflowLauncher:launch") # start the monitor # Launch process startDir = os.getcwd() os.chdir(self.localStagingDir) pj = PegasusJobs() condorDagId, statusInfo, removeInfo = pj.pegasusSubmitDax(self.sitesXMLFile, self.transformFile, self.daxFile) if statusInfo is not None: print("Pegasus workspace: %s" % statusInfo[0]) os.chdir(startDir) # workflow monitor for HTCondor jobs self.workflowMonitor = CondorWorkflowMonitor(condorDagId, self.monitorConfig) if statusListener is not None: self.workflowMonitor.addStatusListener(statusListener) self.workflowMonitor.startMonitorThread() return self.workflowMonitor
def writeCondorFile(self, launchNamePrefix, launchScriptName): log.debug("VanillaCondorWorkflowConfigurator:writeCondorFile") condorJobFile = os.path.join(self.localWorkDir, launchNamePrefix) condorJobFile = os.path.join(condorJobFile, launchNamePrefix+".condor") clist = [] clist.append("universe=vanilla\n") clist.append("executable=%s/%s\n" % (launchNamePrefix, launchScriptName)) clist.append("transfer_executable=false\n") clist.append("output=%s/%s/Condor.out\n" % (self.localWorkDir, launchNamePrefix)) clist.append("error=%s/%s/Condor.err\n" % (self.localWorkDir, launchNamePrefix)) clist.append("log=%s/%s/Condor.log\n" % (self.localWorkDir, launchNamePrefix)) clist.append("should_transfer_files = YES\n") clist.append("when_to_transfer_output = ON_EXIT\n") clist.append("remote_initialdir="+self.dirs.get("workDir")+"\n") clist.append("Requirements = (FileSystemDomain != \"dummy\") && (Arch != \"dummy\") && (OpSys != \"dummy\") && (Disk != -1) && (Memory != -1)\n") clist.append("queue\n") # Create a file object: in "write" mode condorFILE = open(condorJobFile,"w") condorFILE.writelines(clist) condorFILE.close() return condorJobFile
def configure(self, workflowVerbosity): log.debug("ProductionRunConfigurator:configure") # TODO - IMPORTANT - NEXT TWO LINES ARE FOR PROVENANCE # -------------- #self._provSetup = ProvenanceSetup() #self._provSetup.addAllProductionConfigFiles(self._prodConfigFile, self.repository) # -------------- # # setup the database for each database listed in production config. # cache the configurators in case we want to check the configuration # later. # #databaseConfigNames = self.prodConfig.databaseConfigNames databaseConfigs = self.prodConfig.database #for databaseName in databaseConfigNames: for databaseName in databaseConfigs: databaseConfig = databaseConfigs[databaseName] cfg = self.createDatabaseConfigurator(databaseConfig) cfg.setup(self._provSetup) dbInfo = cfg.getDBInfo() # check to see if we're supposed to launch a logging daemon if databaseConfig.logger != None: loggerConfig = databaseConfig.logger if loggerConfig.launch != None: launch = loggerConfig.launch loggerManager = None if launch == True: loggerManager = LoggerManager(self.eventBrokerHost, self.runid, dbInfo["host"], dbInfo["port"], dbInfo["dbrun"]) else: loggerManager = LoggerManager(self.eventBrokerHost, self.runid) if loggerManager is not None: self._loggerManagers.append(loggerManager) self._databaseConfigurators.append(cfg) # # do specialized production level configuration, if it exists # if self.prodConfig.production.configuration.configurationClass != None: specialConfigurationConfig = self.prodConfig.production.configuration # XXX - specialConfigurationConfig maybe? self.specializedConfigure(specialConfigurationConfig) #workflowNames = self.prodConfig.workflowNames workflowConfigs = self.prodConfig.workflow workflowManagers = [] for wfName in workflowConfigs: wfConfig = workflowConfigs[wfName] # copy in appropriate production level info into workflow Node -- ? workflowManager = self.createWorkflowManager(self.prodConfig, wfName, wfConfig) workflowLauncher = workflowManager.configure(self._provSetup, workflowVerbosity) workflowManagers.append(workflowManager) return workflowManagers
def waitForFirstFile(self): log.debug("FileWaiter:waitForFirstFile") print "waiting for log file to be created to confirm launch." cmd = "gsissh %s %s -f %s" % (self.remoteNode, self.remoteFileWaiter, self.fileListName) pid = os.fork() if not pid: os.execvp("gsissh",cmd.split()) os.wait()[0]
def remoteMkdir(self, remoteDir): log.debug("VanillaCondorWorkflowConfigurator:remoteMkdir") cmd = "gsissh %s mkdir -p %s" % (self.remoteLoginName, remoteDir) print "running: "+cmd pid = os.fork() if not pid: os.execvp("gsissh",cmd.split()) os.wait()[0]
def __init__(self, remoteNode, remoteFileWaiter, fileListName, logger = None): log.debug("FileWaiter:__init__") ## name of the remote node to execute on self.remoteNode = remoteNode ## name of the remote file list file self.fileListName = fileListName ## name of the remote file waiter script self.remoteFileWaiter = remoteFileWaiter
def stopWorkflow(self, urgency): """Stop the workflow Parameters ---------- urgency : `int` The severity level of how to handle stopping the workflow """ log.debug("WorkflowMonitor:stopWorkflow")
def stopWorkflow(self, urgency): """Stop the workflow """ log.debug("CondorWorkflowMonitor:stopWorkflow") # do a condor_rm on the cluster id for the dag we submitted. print("shutdown request received: stopping workflow") cj = CondorJobs() cj.killCondorId(self.condorDagId)
def handleRequest(self, request): """Act on a request Parameters ---------- request : dict a dictionary containing a REST request """ log.debug("WorkflowMonitor:handleRequest")
def _file_response(img, file_name): tmp_path = tempfile.mkdtemp() file_path = os.path.join(tmp_path, file_name) log.debug("temporary file_path=%s", file_path) img.writeFits(file_path) resp = _make_file_response(file_path) os.remove(file_path) os.removedirs(tmp_path) return resp
def __init__(self, prodConfig, wfConfig, runid, localStagingDir, dagFile, monitorConfig): log.debug("CondorWorkflowLauncher:__init__") self.prodConfig = prodConfig self.wfConfig = wfConfig self.runid = runid self.localStagingDir = localStagingDir self.dagFile = dagFile self.monitorConfig = monitorConfig
def __init__(self): # _locked: a container for data to be shared across threads that # have access to this object. self._locked = SharedData.SharedData(False, { "running": False, "done": False }) log.debug("WorkflowMonitor:__init__") self._statusListeners = []
def isDone(self): """Report if the workflow has completed Returns ------- done : `bool` True if the workflow being monitored has completed """ log.debug("WorkflowMonitor:isDone") return self._locked.done
def addStatusListener(self, statusListener): """Add a status listener to this monitor Parameters ---------- statusListener: StatusListener an object that implements the statusListener interface """ log.debug("WorkflowMonitor:addStatusListener") self._statusListeners.append(statusListener)
def launch(self, statusListener): log.debug("WorkflowLauncher:launch") # monitors status of the workflow self.workflowMonitor = WorkflowMonitor() if statusListener is not None: self.workflowMonitor.addStatusListener(statusListener) # returns WorkflowMonitor return self.workflowMonitor