def __init__(self, dictName, dictRegistry=NamedDicts): """Set up for the protocol by opening the named persistent dictionary. """ self.dictName = dictName try: self.dbFile = dictRegistry[dictName]['dbFile'] self.port = dictRegistry[dictName]['port'] if self.dbFile: dbHome = os.path.split(self.dbFile)[0] if not os.path.exists(dbHome): os.makedirs(dbHome, 0o777) self.dbHome = dbHome logFile = dictRegistry[dictName]['logFile'] if not logFile.startswith('/'): logFile = os.path.join(dbHome, logFile) self.logFile = logFile except: raise PersistentDictFactoryException( 'Error, no dict of that name: %s' % dictName) validateDirectory(os.path.dirname(self.logFile)) log.startLogging(open(self.logFile, 'w')) if dictName == 'Test': self.dict = _TestDict else: self.dict = dbshelve.open(self.dbFile) os.chmod(self.dbFile, 0o666)
def setStore(self, wuid, cachedStore=False): # create store if wuid is None: return None args = [self._storeName, self._storeFieldsList] dbHome = os.path.join(self._storeArgs[0], 'wuidStores', wuid) validateDirectory(dbHome, noExceptionRaise=True) args.append(dbHome) args.extend(self._storeArgs[1:]) if cachedStore: self._cachedStore = self._storeClass(*args) else: self._store = self._storeClass(*args) return True
def __init__(self, name, fieldsList, dbHome, dbName): """Constructor.""" # call super() super(BsddbStore, self).__init__(name, fieldsList) # set db attributes self._dbHome = dbHome self._dbName = dbName # make sure dbHome directory exists if not validateDirectory(self._dbHome): raise BsddbStoreError("Couldn't create dbHome directory: %s." % self._dbHome) # create flag if os.path.isfile(os.path.join(self._dbHome, self._dbName)): createFlag = 0 else: createFlag = 1 # get db self._dbHandle = dbtablesCDB.bsdTableDB( self._dbName, dbhome=self._dbHome, create=createFlag) # , dbflags=db.DB_INIT_CDB) # create table with columns if it doesn't exist if self._name not in self._dbHandle.ListTables(): try: self._dbHandle.CreateTable(self._name, self._fieldsList) except dbtablesCDB.TableAlreadyExists: pass
def updateJson(jsonFile, obj, stringifyKeys=[], ubt=None, publicizeKeys=[], pickleKeys=[]): """Write obj in JSON format to file or update it.""" # publicize if isinstance(obj, dict) and ubt is not None and \ len(publicizeKeys) > 0: obj = copy.deepcopy(obj) for k in publicizeKeys: obj[k] = publicizeResultFiles(obj[k], ubt) # make sure result is stringified if isinstance(obj, dict) and len(stringifyKeys) > 0: obj = copy.deepcopy(obj) for k in stringifyKeys: if obj.get(k, None) is not None: obj[k] = str(obj[k]) # pickle keys if isinstance(obj, dict) and len(pickleKeys) > 0: obj = copy.deepcopy(obj) for k in pickleKeys: if obj.get(k, None) is not None: obj[k] = pickleThis(obj[k]) validateDirectory(os.path.dirname(jsonFile)) with open(jsonFile, 'w') as f: try: json.dump(obj, f) except: print("Got exception dumping json:\n{}".format( pformat(obj, indent=2))) raise
def getWorkUnit(wuConfig, configFile=None, configDict={}): """Return work unit id and WorkUnit object from wuConfig. Localizes any stage files.""" workDir = GridServiceConfig(configFile).getWorkUnitWorkDir() validateDirectory(workDir) procId = wuConfig.getId() wuType = wuConfig.getType() wuClass = WorkUnitTypeMapping.get(wuType, None) wuid = generateWorkUnitId() wuWorkDir = os.path.join(workDir, wuid) copyToDir(wuConfig.getStageFiles(), wuWorkDir, unpackBundles=True) hex = wuConfig.getHexDigest() if wuClass is None: raise RuntimeError("Unimplemented WorkUnit subclass: %s" % wuType) if wuClass == 'sciflo': workUnit = wuClass(wuConfig.getCall(), wuConfig.getArgs(), wuWorkDir, wuid=wuid, procId=procId, hexDigest=hex, scifloid=generateScifloId(), configDict=configDict) else: workUnit = wuClass(wuConfig.getCall(), wuConfig.getArgs(), wuWorkDir, wuid=wuid, procId=procId, hexDigest=hex, configDict=configDict) workUnit.setInfoItem('typ', wuType) workUnit.setInfoItem('status', readyStatus) workUnit.setInfoItem('procCount', wuConfig.getProcCount()) return workUnit
def getRootWorkDirFromConfiguration(file=None): """Return the dir path for workUnit's work directory.""" # get config parser configParser = ScifloConfigParser(file) # root directory for work dirs workUnitRootWorkDir = configParser.getMandatoryParameter( 'workUnitRootWorkDir') # print "##########################################validating:", workUnitRootWorkDir # validate if not validateDirectory(workUnitRootWorkDir): raise RuntimeError("Couldn't access/create workUnitRootWorkDir %s." % workUnitRootWorkDir) return workUnitRootWorkDir
def getScheduleConfigFromConfiguration(file=None): """Return the ScheduleConfig object as defined by the sciflo configuration xml file. """ # get config parser configParser = ScifloConfigParser(file) # get schedule store type scheduleStoreType = configParser.getMandatoryParameter('scheduleStoreType') # print "scheduleStoreType:",scheduleStoreType # get params based on store type if scheduleStoreType in StoreTypeMapping: # get home and, if bsddb, validate that it is a directory scheduleStoreHome = configParser.getMandatoryParameter( 'scheduleStoreHome') if scheduleStoreType == 'bsddb' and not validateDirectory( scheduleStoreHome): raise RuntimeError("Couldn't access/create bsddb home %s." % scheduleStoreHome) # get filename for bsddb and validate scheduleStoreDb = configParser.getMandatoryParameter('scheduleStoreDb') # get name scheduleStoreName = configParser.getMandatoryParameter( 'scheduleStoreName') # store config for workunit schedule store scheduleStoreConfig = StoreConfig(scheduleStoreType, scheduleStoreName, scheduleStoreFieldsList, scheduleStoreHome, scheduleStoreDb) return scheduleStoreConfig else: raise RuntimeError("Unknown scheduleStoreType %s in configuration." % scheduleStoreType)
def __init__(self, call, args, workDir, verbose=False, wuid=None, procId=None, hexDigest=None, configDict={}): """Save call, args, and working directory.""" self._call = call self._args = args self._workDir = workDir self._verbose = verbose self._wuid = wuid self._procId = procId self._hexDigest = hexDigest self._configDict = configDict self._cancelFlag = False if not validateDirectory(self._workDir): # make sure workDir exists raise WorkUnitError( "Couldn't create work unit work directory: %s." % self._workDir) self._jsonFile = os.path.join(self._workDir, 'workunit.json') self._logFile = os.path.join(self._workDir, 'wu_execution.log') self._pidFile = os.path.join(self._workDir, 'workunit.pid') self._info = workUnitInfo(None, call=self._call, args=self._args, workDir=self._workDir, wuid=self._wuid, procId=self._procId, hex=self._hexDigest, cancelFlag=self._cancelFlag, jsonFile=self._jsonFile, pidFile=self._pidFile, executionLog=self._logFile)
def _execute(self, result, workDir): """Perform the post execution function. Pass in the entire result of the work unit and this handler will run the conversion function on the specified indexed result or on the entire result (if the resultIndex is None). Return the result of the function. """ # get the result to perform function on if self._resultIndex is None: inputResult = result else: inputResult = result[self._resultIndex] # special case conversions xpathMatch = re.search(r'^xpath:(.+)$', self._conversionFuncStr) if xpathMatch: resultDoc, resultNs = getXmlEtree(inputResult) return resultDoc.xpath(xpathMatch.group(1), namespaces=resultNs) # eval the conversion function convFunc = getFunction(self._conversionFuncStr, addToSysPath=getUserConversionFunctionsDir()) # if FunctionWrapper, get local files tempDir = None tmpInputRes = [] if isinstance(convFunc, LocalizingFunctionWrapper): tempDir = os.path.join(workDir, 'fileConversions') validateDirectory(tempDir) if not isinstance(inputResult, (list, tuple)): singleArgFlag = True tmpInput = [inputResult] else: singleArgFlag = False tmpInput = inputResult for ip in tmpInput: if not isinstance(ip, str): raise PostExecutionHandlerError( "Cannot localize input %s. Please check return value of work unit." % str(ip)) match = re.search(r'^\w*?://', ip) if match: filebase = urllib.parse.urlparse(ip)[2].split('/')[-1] else: filebase = os.path.basename(ip) tempFile = os.path.join(tempDir, filebase) (ip, headers) = urllib.request.urlretrieve(ip, tempFile) tmpInputRes.append(tempFile) if singleArgFlag: inputResult = tmpInputRes[0] else: inputResult = tmpInputRes # do conversion if isinstance(convFunc, FileConversionFunction): if singleArgFlag: retVal = convFunc(inputResult) else: retVal = [convFunc(ip) for ip in inputResult] else: retVal = convFunc(inputResult) return retVal
def __init__(self, sflString, args={}, workers=4, workerTimeout=None, logLevel=logging.DEBUG, workDir=None, cacheName="WorkUnitCache", outputDir=None, scifloid=None, publicize=False, configFile=None, lookupCache=True, configDict={}, writeGraph=True, statusUpdateFunc=None, emailNotify=None, outputUrl=None): """Constructor.""" import multiprocessing as mp self.sflString = sflString self.args = normalizeScifloArgs(args) if isinstance(self.args, dict): self.sciflo = Sciflo(self.sflString, globalInputDict=self.args) elif isinstance(self.args, (list, tuple)): self.sciflo = Sciflo(self.sflString, self.args) else: raise ScifloExecutorError("Unrecognized type for args: %s" % type(self.args)) self.sciflo.resolve() self.scifloName = self.sciflo.getName() self.wuConfigs = self.sciflo.getWorkUnitConfigs() if scifloid is None: self.scifloid = generateScifloId() else: self.scifloid = scifloid self.manager = mp.Manager() self.procIds = [] self.applyResultsDict = {} self.resultsDict = {} self.postExecResultsDict = {} self.doneDict = {} self.executionError = None if workers > 50: raise ScifloExecutorError("Cannot specify workers > 50.") self.workers = workers self.pool = ScifloPool(self.workers) self.configDict = configDict #self.lock = self.manager.RLock() self.lock = threading.RLock() self.event = self.manager.Event() self.waiterProcess = mp.Process(target=waiter, name="waiter", args=[self.event]) self.logLevel = logLevel if DEBUG_PROCESSING: self.logger = mp.process.getLogger() else: self.logger = logging.getLogger(self.scifloName) self.logger.setLevel(self.logLevel) self.pid = os.getpid() self.writeGraph = writeGraph self.statusUpdateFunc = statusUpdateFunc self.emailNotify = emailNotify # config file and GridServiceConfig self.configFile = configFile self.gsc = GridServiceConfig(self.configFile) # set worker timeout from config file and override self.workerTimeout = workerTimeout if self.workerTimeout is None: self.workerTimeout = self.gsc.getWorkerTimeout() # sciflo work dir if workDir is None: workDir = self.gsc.getWorkUnitWorkDir() self.workDir = os.path.abspath(workDir) validateDirectory(self.workDir) # sciflo output dir self.outputDir = outputDir if self.outputDir is None: self.outputDir = os.path.join(self.workDir, self.scifloid) validateDirectory(self.outputDir) self.outputUrl = outputUrl # send logging messages to an execution log also (in addition to console) self.logFile = os.path.join(self.outputDir, 'sfl_execution.log') fh = logging.FileHandler(self.logFile) fh.setLevel(self.logLevel) fh.setFormatter(logging.Formatter(LOG_FMT)) self.logger.addHandler(fh) # cache related attrs self.cacheName = cacheName self.lookupCache = lookupCache if self.cacheName is None: self.pdict = None else: try: self.pdict = PersistentDict(self.cacheName, pickleVals=True) except Exception as e: self.logger.debug("Got exception trying to get PersistentDict \ for sciflo '%s': %s. No cache will be used." % (self.scifloName, e), extra={'id': self.scifloid}) self.cacheName = None self.pdict = None self.hexDict = {} # annotated doc self.annDoc = AnnotatedDoc(self.sciflo, self.outputDir) # json file self.jsonFile = os.path.join(self.outputDir, 'sciflo.json') # svgfile self.svgFile = os.path.join(self.outputDir, 'scifloGraph.svg') if self.writeGraph: self.sciflo.writeGraph(self.svgFile) # configFile, publicize, grid service config, base url and url base # tracker self.publicize = publicize self.baseUrl = self.gsc.getBaseUrl() if self.baseUrl is None: if self.publicize: self.baseUrl = self.gsc.getGridBaseUrl() else: self.baseUrl = "file://%s%s" % (getfqdn(), self.workDir) self.ubt = UrlBaseTracker(self.workDir, self.baseUrl) if self.publicize: self.publicizeUbt = self.ubt else: self.publicizeUbt = None # sciflo procId->wuid map self.procIdWuidMap = {} # build deferred ids, dict, and results dict for w in self.wuConfigs: procId = w.getId() # check if all args are resolved resolved = self.resolveArgs(w) # if all args are resolved, get work unit if resolved: self.hexDict[procId] = w.getHexDigest() try: wu = getWorkUnit(w, configFile=self.configFile, configDict=self.configDict) except Exception as e: raise ScifloExecutorError("Encountered error calling \ getWorkUnit(): %s\n%s" % (str(e), getTb())) wuid = wu.getWuid() appRes = WuReady(wu) # update info in work unit json for monitoring updateJson(wu.getJsonFile(), wu.getInfo(), stringifyKeys=STRINGIFY_FIELDS, ubt=self.publicizeUbt, publicizeKeys=WORK_UNIT_PUBLICIZE_FIELDS, pickleKeys=PICKLE_FIELDS) self.updateStatus( 'WorkUnit status for "%s": %s' % (procId, readyStatus), wu.getInfo()) else: wuid = None appRes = w self.updateStatus( 'WorkUnit status for "%s": %s' % (procId, waitingStatus), wu.getInfo()) self.procIdWuidMap[procId] = wuid self.procIds.append(procId) self.applyResultsDict[procId] = appRes self.resultsDict[procId] = NoResult() self.postExecResultsDict[procId] = w.getPostExecutionTypeList() self.output = self.sciflo.getFlowOutputConfigs() # sciflo info self.scifloInfo = scifloInfo(None, scifloid=self.scifloid, scifloName=self.scifloName, call=self.sflString, args=self.args, workDir=self.workDir, status=sentStatus, pid=self.pid, procIds=self.procIds, procIdWuidMap=self.procIdWuidMap, outputDir=self.outputDir, jsonFile=self.jsonFile, svgFile=self.svgFile, executionLog=self.logFile) # update json updateJson(self.jsonFile, self.scifloInfo, stringifyKeys=STRINGIFY_FIELDS, ubt=self.publicizeUbt, publicizeKeys=SCIFLO_PUBLICIZE_FIELDS, pickleKeys=PICKLE_FIELDS)