def main(): appName = sys.argv[0] logging.basicConfig( # filename = './log/' + appName + '_' + time.strftime("%Y%m%d_%H%M%S") + '.log', datefmt = '%Y-%m%d %H:%M:%S', format = '%(asctime)s | %(levelname)s | %(name)s | %(message)s', level = logging.INFO ) func_name = sys._getframe().f_code.co_name logger = logging.getLogger(func_name) logger.info('Started') parseArgs() logger.debug('requests.session') session = requests.session() # `mount` a custom adapter that retries failed connections for HTTP and HTTPS requests. session.mount("http://", requests.adapters.HTTPAdapter(max_retries=10)) logger.debug('session.post') r = session.post(LOGIN_URL, data = LOGIN_PARAM) idResultEnd = idResult idResultBegin = idResult - quantResult # 1) Init a Thread pool with the desired number of threads logger.debug('ThreadPool') pool = ThreadPool(10) logger.debug('for idAtual in xrange(%d, %d, -1)' % (idResultEnd, idResultBegin)) for idAtual in xrange(idResultEnd, idResultBegin, -1): # 2) Add the task to the queue pool.add_task(downloadResult, session, idAtual) # 3) Wait for completion pool.wait_completion() ### logger.info('Finished')
def main(condicion_venta, estado, threads): start = datetime.now() try: condicion_venta = condicion_venta.replace('-', '_') estado = estado.replace('-', '_') condicion_venta = CondicionVenta[condicion_venta] estado = Estado[estado] max_threads = int(threads) print('Parámetros de búsqueda') print('Venta : {0}'.format(condicion_venta.value)) print('Vigente: {0}'.format(estado.value)) except KeyError: print( 'No fue posible determinar la condicion de venta o estado de medicamentos a procesar' ) return 1 except ValueError: print('No se proporcionó un número de hilos de ejecución válido') return 1 thread = IspParser(sale_terms=condicion_venta, status=estado) max_pages = thread.pages_count pool = ThreadPool(max_threads, IspParser) for i in range(1, max_pages + 1): pool.add_task({ 'sale_terms': condicion_venta, 'status': estado, 'page_number': i }) pool.wait_completion() end = datetime.now() print('Tiempo transcurrido: {0}'.format(end - start))
def test_thread_pool(): pool = ThreadPool(2, 0.5) for i in range(2): handle = login.login('ssh', 'root', 'n', '10.20.60.23') task = (handle.execute_cmd, ('uname -a',), {}) pool.add_task(task) print pool.get_result() pool.task_join()
def test_task_execution(self): global cnt_handle thread_pool = ThreadPool() task_list = [] total_cnt_expected = 10 for i in range(total_cnt_expected): task_list.append(TaskTest("task %s" % i, self.task_handle)) for task in task_list: thread_pool.add_task(task) time_start = time.time() while (True): if time.time() - time_start > 1.0: break if cnt_handle == total_cnt_expected: break self.assertEqual(cnt_handle, total_cnt_expected)
class EventStager(object): def __init__(self, workDir, setup, esPath, token, experiment, userid, sitename, outputDir=None, yodaToOS=False, threads=10, isDaemon=False, process=0, totalProcess=1): self.__workDir = workDir self.__updateEventRangesDir = os.path.join(self.__workDir, 'updateEventRanges_%s' % process) if not os.path.exists(self.__updateEventRangesDir): os.makedirs(self.__updateEventRangesDir) self.__logFile = os.path.join(workDir, 'EventStager.log') self.__setup = setup self.__siteMover = S3ObjectstoreSiteMover(setup, useTimerCommand=False) self.__esPath = esPath self.__token = token self.__experiment = experiment self.__outputDir = outputDir self.__userid = userid self.__sitename = sitename self.__report = getInitialTracingReport(userid=self.__userid, sitename=self.__sitename, dsname=None, eventType="objectstore", analysisJob=False, jobId=None, jobDefId=None, dn=self.__userid) self.__num_stagingFile = 0 self.__eventRanges = {} self.__eventRanges_staged = {} self.__eventRanges_faileStaged = {} self.__eventStager = None self.__canFinish = False self.__status = 'new' self.__threads = threads self.__isDaemon = isDaemon self.__startTime = time.time() self.__processedJobs = [] self.__handlingOthers = 0 self.__otherProcesses = [] self.__startWait = None self.__waitTime = 15 * 60 # 15 minutes self.__yodaToOS = yodaToOS if not os.environ.has_key('PilotHomeDir'): os.environ['PilotHomeDir'] = os.path.dirname(__file__) self.__process = process self.__totalProcess = totalProcess self.__siteMover.setup(experiment) self.__threadpool = ThreadPool(self.__threads) logging.info("Init EventStager workDir %s setup %s esPath %s token %s experiment %s userid %s sitename %s threads %s outputDir %s isDaemond %s" % (self.__workDir, self.__setup, self.__esPath, self.__token, self.__experiment, self.__userid, self.__sitename, self.__threads, self.__outputDir, self.__isDaemon)) def renewEventStagerStatus(self): canFinish_file = os.path.join(self.__workDir, 'EventStagerStatusCan.json') finished_file = os.path.join(self.__workDir, 'EventStagerStatus.json') if self.__isDaemon: if os.path.exists(canFinish_file): #with open(canFinish_file) as inputFile: # origin_status = json.load(inputFile) # self.__canFinish = origin_status['canFinish'] self.__canFinish = True if self.__status == "finished": status = {'status': self.__status} with open(finished_file, 'w') as outputFile: json.dump(status, outputFile) elif os.path.exists(finished_file): os.remove(finished_file) else: if os.path.exists(finished_file): #with open(finished_file) as inputFile: # origin_status = json.load(inputFile) # self.__status = origin_status['status'] self.__status = "finished" if self.__canFinish: status = {'canFinish': self.__canFinish} with open(canFinish_file, 'w') as outputFile: json.dump(status, outputFile) elif os.path.exists(canFinish_file): os.remove(canFinish_file) def start(self): try: self.renewEventStagerStatus() if self.__outputDir: stageCmd = "MVEventStager.py" else: stageCmd = "EventStager.py" yoda_to_os = '' if self.__yodaToOS: yoda_to_os = '--YodaToOS ' if self.__setup and len(self.__setup.strip()): cmd = 'python %s/%s --workDir %s --setup %s --esPath %s --token %s --experiment %s --userid %s --sitename %s --threads %s --outputDir %s %s--isDaemon 2>&1 1>>%s' % (self.__workDir, stageCmd, self.__workDir, self.__setup, self.__esPath, self.__token, self.__experiment, self.__userid, self.__sitename, self.__threads, self.__outputDir, yoda_to_os, self.__logFile) else: cmd = 'python %s/%s --workDir %s --esPath %s --token %s --experiment %s --userid %s --sitename %s --threads %s --outputDir %s %s--isDaemon 2>&1 1>>%s' % (self.__workDir, stageCmd, self.__workDir, self.__esPath, self.__token, self.__experiment, self.__userid, self.__sitename, self.__threads, self.__outputDir, yoda_to_os, self.__logFile) pUtil.tolog("Start Event Stager: %s" % cmd) self.__eventStager = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout, shell=True) except: pUtil.tolog("Failed to start Event Stager: %s" % traceback.format_exc()) def getLog(self): return self.__logFile def monitor(self): try: self.renewEventStagerStatus() if not self.isFinished() and self.__eventStager is None or self.__eventStager.poll() is not None: pUtil.tolog("Event Stager failed. Try to start it.") self.start() except: pUtil.tolog("Failed to monitor Event Stager: %s" % traceback.format_exc()) def finish(self): try: pUtil.tolog("Tell Event Stager to finish after finishing staging out all events") self.__canFinish = True self.renewEventStagerStatus() except: pUtil.tolog("Failed to monitor Event Stager: %s" % traceback.format_exc()) def terminate(self): try: pUtil.tolog("Terminate Event Stager") self.__eventStager.terminate() except: pUtil.tolog("Failed to terminate Event Stager: %s" % traceback.format_exc()) def isFinished(self): if self.__canFinish and self.__status == 'finished': return True return False def stageOutEvent(self, output_info): filename, jobId, eventRangeID, status, output = output_info try: if status == 'failed': self.__eventRanges_staged[filename].append((jobId, eventRangeID, status, output)) if eventRangeID not in self.__eventRanges[filename]: logging.warning("stageOutEvent: %s is not in eventRanges" % eventRangeID) else: del self.__eventRanges[filename][eventRangeID] if status == 'finished': if not os.path.exists(output): if eventRangeID in self.__eventRanges[filename]: del self.__eventRanges[filename][eventRangeID] return ret_status, pilotErrorDiag, surl, size, checksum, self.arch_type = self.__siteMover.put_data(output, os.path.join(self.__esPath, os.path.basename(output)), lfn=os.path.basename(output), report=self.__report, token=self.__token, experiment=self.__experiment) if ret_status == 0: try: self.__eventRanges_staged[filename].append((jobId, eventRangeID, status, output)) if eventRangeID not in self.__eventRanges[filename]: logging.warning("stageOutEvent: %s is not in eventRanges" % eventRangeID) else: del self.__eventRanges[filename][eventRangeID] #logging.info("Remove staged out output file: %s" % output) #os.remove(output) except Exception, e: logging.info("!!WARNING!!2233!! remove ouput file threw an exception: %s" % (e)) else: logging.info("!!WARNING!!1164!! Failed to upload file to objectstore: %d, %s" % (ret_status, pilotErrorDiag)) self.__eventRanges_faileStaged[filename].append((jobId, eventRangeID, status, output)) except: logging.warning(traceback.format_exc()) self.__eventRanges_faileStaged[filename].append((jobId, eventRangeID, status, output)) def sort_file_by_mtime(self, path, files): mtime = lambda f: os.stat(os.path.join(path, f)).st_mtime return list(sorted(files, key=mtime)) def getUnstagedOutputFiles(self, ext=".dump"): outputFiles = [] all_files = os.listdir(self.__workDir) if ext == ".dump": for file in all_files: if file.endswith(ext): if (int(hashlib.sha1(file).hexdigest(),16) % self.__totalProcess) == self.__process: filename = os.path.join(self.__workDir, file) outputFiles.append(file) else: for file in all_files: if file.endswith(ext): if self.__process == 0: filename = os.path.join(self.__workDir, file) outputFiles.append(file) if outputFiles: outputFiles = self.sort_file_by_mtime(self.__workDir, outputFiles) logging.info("UnStaged Output files: %s" % outputFiles) return outputFiles def updateEventRange(self, event_range_id, status='finished'): """ Update an event range on the Event Server """ pUtil.tolog("Updating an event range..") message = "" # url = "https://aipanda007.cern.ch:25443/server/panda" url = "https://pandaserver.cern.ch:25443/server/panda" node = {} node['eventRangeID'] = event_range_id # node['cpu'] = eventRangeList[1] # node['wall'] = eventRangeList[2] node['eventStatus'] = status # tolog("node = %s" % str(node)) # open connection ret = pUtil.httpConnect(node, url, path=self.__updateEventRangesDir, mode="UPDATEEVENTRANGE") # response = ret[1] if ret[0]: # non-zero return code message = "Failed to update event range - error code = %d" % (ret[0]) else: message = "" return ret[0], message def updateEventRanges(self, event_ranges): """ Update an event range on the Event Server """ pUtil.tolog("Updating event ranges..") message = "" #url = "https://aipanda007.cern.ch:25443/server/panda" url = "https://pandaserver.cern.ch:25443/server/panda" # eventRanges = [{'eventRangeID': '4001396-1800223966-4426028-1-2', 'eventStatus':'running'}, {'eventRangeID': '4001396-1800223966-4426028-2-2','eventStatus':'running'}] node={} node['eventRanges']=json.dumps(event_ranges) # open connection ret = pUtil.httpConnect(node, url, path=self.__updateEventRangesDir, mode="UPDATEEVENTRANGES") # response = json.loads(ret[1]) status = ret[0] if ret[0]: # non-zero return code message = "Failed to update event range - error code = %d, error: " % (ret[0], ret[1]) else: response = json.loads(json.dumps(ret[1])) status = int(response['StatusCode']) message = json.dumps(response['Returns']) return status, message def cleanStagingFiles(self, older=None): if older == None: self.__eventRanges = {} self.__eventRanges_staged = {} self.__eventRanges_faileStaged = {} all_files = os.listdir(self.__workDir) for file in all_files: if older == None: if file.endswith(".dump.staging"): origin_file = file.replace(".dump.staging", ".dump") if (int(hashlib.sha1(origin_file).hexdigest(),16) % self.__totalProcess) == self.__process: filepath = os.path.join(self.__workDir, file) os.rename(filepath, filepath.replace(".dump.staging", ".dump")) if file.endswith("cmdcopying"): origin_file = file.replace("cmdcopying", "cmd") if self.__process == 0: filepath = os.path.join(self.__workDir, file) os.rename(filepath, filepath.replace("cmdcopying", "cmd")) else: if file.endswith("cmdcopying"): present = time.time() origin_file = file.replace("cmdcopying", "cmd") if self.__process == 0: filepath = os.path.join(self.__workDir, file) if (present - os.path.getmtime(filepath)) > older: os.rename(filepath, filepath.replace("cmdcopying", "cmd")) def getEventRanges(self): if len(self.__eventRanges.keys()) > 5: return outputFiles = self.getUnstagedOutputFiles() for file in outputFiles: if len(self.__eventRanges.keys()) > 5: return self.__startWait = None self.__eventRanges[file] = {} self.__eventRanges_staged[file] = [] self.__eventRanges_faileStaged[file] = [] filepath = os.path.join(self.__workDir, file) handle = open(filepath) for line in handle: if len(line.strip()) == 0: continue line = line.replace(" ", " ") jobId, eventRange, status, output = line.split(" ") output = output.split(",")[0] self.__eventRanges[file][eventRange] = {'retry':0, 'event': (jobId, eventRange, status, output)} self.__threadpool.add_task(self.stageOutEvent, (file, jobId, eventRange, status, output)) if jobId not in self.__processedJobs: self.__processedJobs.append(jobId) handle.close() os.rename(filepath, filepath + ".staging") def checkMissedStagingFiles(self): all_files = os.listdir(self.__workDir) for file in all_files: try: if file.endswith(".dump.staged.reported"): origin_file = file.replace(".dump.staged.reported", ".dump") filepath = os.path.join(self.__workDir, file) size = os.path.getsize(filepath) if size == 0: if (int(hashlib.sha1(origin_file).hexdigest(),16) % self.__totalProcess) == self.__process: back_file = filepath.replace(".dump.staged.reported", ".dump.BAK") origin_file = filepath.replace(".dump.staged.reported", ".dump") staging_file = filepath.replace(".dump.staged.reported", ".dump.staging") if not os.path.exists(back_file) and not os.path.exists(origin_file) and not os.path.exists(staging_file): os.remove(filepath) os.rename(back_file, origin_file) except: logging.warning("Failed to rename %s to %s: %s" % (back_file, origin_file, traceback.format_exc())) def checkFailedStagingFiles(self): for file in self.__eventRanges_faileStaged: while self.__eventRanges_faileStaged[file]: jobId, eventRangeID, status, output = self.__eventRanges_faileStaged[file].pop() if eventRangeID not in self.__eventRanges[file]: logging.warning("checkFailedStagingFiles: %s is not in eventRanges" % eventRangeID) else: if self.__eventRanges[file][eventRangeID]['retry'] < 3: self.__eventRanges[file][eventRangeID]['retry'] += 1 self.__threadpool.add_task(self.stageOutEvent, (file, jobId, eventRangeID, status, output)) else: self.__eventRanges_staged[file].append((jobId, eventRangeID, 'failed', output)) del self.__eventRanges[file][eventRangeID] def checkFinishedStagingFiles(self): finishedFiles = [] for file in self.__eventRanges: try: if len(self.__eventRanges[file].keys()) == 0: filepath = os.path.join(self.__workDir, file) handle = open(filepath + ".staged.reported", 'w') finishedEventRanges = [] for chunk in pUtil.chunks(self.__eventRanges_staged[file], 100): try: eventRanges = [] for outputEvents in chunk: jobId, eventRangeID, status, output = outputEvents if eventRangeID not in finishedEventRanges: finishedEventRanges.append(eventRangeID) if status == 'finished': eventRanges.append({"eventRangeID": eventRangeID, "eventStatus": status}) if status.startswith("ERR"): eventRanges.append({"eventRangeID": eventRangeID, "eventStatus": 'failed'}) update_status, update_output = self.updateEventRanges(eventRanges) logging.info("update Event Range: status: %s, output: %s" % (update_status, update_output)) if update_status: update_status, update_output = self.updateEventRanges(eventRanges) logging.info("update Event retry Range: status: %s, output: %s" % (update_status, update_output)) if update_status == 0: try: ret_outputs = json.loads(json.loads(update_output)) if len(ret_outputs) == len(chunk): for i in range(len(ret_outputs)): try: if ret_outputs[i]: jobId, eventRangeID, status, output = chunk[i] logging.info("Remove %s" % output) os.remove(output) handle.write('{0} {1} {2} {3}\n'.format(jobId, eventRangeID, status, output)) except: logging.warning("Failed to remove %s: %s" % (output, traceback.format_exc())) except: logging.warning(traceback.format_exc()) except: logging.warning(traceback.format_exc()) handle.close() os.rename(filepath + ".staging", filepath + ".BAK") finishedFiles.append(file) except: logging.warning(traceback.format_exc()) for file in finishedFiles: del self.__eventRanges[file] del self.__eventRanges_staged[file] del self.__eventRanges_faileStaged[file] def checkLostEvents(self): for file in self.__eventRanges: for eventRange in self.__eventRanges[file]: jobId, eventRange, status, output = self.__eventRanges[file][eventRange]['event'] self.__threadpool.add_task(self.stageOutEvent, (file, jobId, eventRange, status, output)) def handleGfalFile(self, gfalFile): try: for i in range(3): gfalFile = os.path.join(self.__workDir, gfalFile) os.rename(gfalFile, gfalFile + "copying") handle = open(gfalFile + "copying") cmd = handle.read() handle.close() cmd = cmd.replace(" -t 3600 ", " -t 300 ") logging.info("Execute command: %s" % cmd) # status, output = commands.getstatusoutput(cmd) status, output = TimerCommand(cmd).run(600) logging.info("Status %s output %s" % (status, output)) if status == 0: os.rename(gfalFile + "copying", gfalFile + "finished") return else: os.rename(gfalFile + "copying", gfalFile) os.rename(gfalFile, gfalFile + "failed") except: logging.error("handleGfalFile %s" % traceback.format_exc()) finally: self.__handlingOthers -= 1 def handleS3File(self, s3File): try: s3File = os.path.join(self.__workDir, s3File) os.rename(s3File, s3File + "copying") handle = open(s3File + "copying") cmd = handle.read() handle.close() source, destination = cmd.split(" ") logging.info("S3 stage out from %s to %s" % (source, destination)) ret_status, pilotErrorDiag, surl, size, checksum, self.arch_type = self.__siteMover.put_data(source, destination, lfn=os.path.basename(destination), report=self.__report, token=self.__token, experiment=self.__experiment, timeout=300) logging.info("Status %s output %s" % (ret_status, pilotErrorDiag)) if ret_status == 0: os.rename(s3File + "copying", s3File + "finished") else: os.rename(s3File + "copying", s3File) except: logging.error("handleS3File %s" % traceback.format_exc()) finally: self.__handlingOthers -= 1 def handleOtherFiles(self): gfalFiles = self.getUnstagedOutputFiles(".gfalcmd") for gfalFile in gfalFiles: p = multiprocessing.Process(target=self.handleGfalFile, args=(gfalFile,)) p.start() self.__otherProcesses.append(p) self.__handlingOthers += 1 self.__startWait = None s3Files = self.getUnstagedOutputFiles(".s3cmd") for s3File in s3Files: p = multiprocessing.Process(target=self.handleS3File, args=(s3File,)) p.start() self.__otherProcesses.append(p) self.__handlingOthers += 1 self.__startWait = None termProcesses = [] for p in self.__otherProcesses: if not p.is_alive(): termProcesses.append(p) for p in termProcesses: self.__otherProcesses.remove(p) def killStallProcess(self): command = "find /proc -maxdepth 1 -user wguan -type d -mmin +1 -exec basename {} \; | xargs ps | grep EventStager.py | awk '{ print $1 }' | grep -v " + str(os.getpid()) + "|xargs kill" print command status, output = commands.getstatusoutput(command) print status print output def run(self): logging.info("Start to run") self.cleanStagingFiles() timeStart = time.time() - 60 while not self.isFinished(): try: if (time.time() - timeStart) > 60: self.renewEventStagerStatus() self.cleanStagingFiles(20*60) # self.checkMissedStagingFiles() self.getEventRanges() self.checkFailedStagingFiles() self.checkFinishedStagingFiles() if self.__canFinish and len(self.__eventRanges.keys()) == 0: self.__status = 'finished' self.renewEventStagerStatus() if self.__threadpool.is_empty(): self.checkLostEvents() timeStart = time.time() self.handleOtherFiles() time.sleep(30) logging.debug("len(eventranges:%s)" % len(self.__eventRanges.keys())) #logging.debug("%s" % self.__eventRanges) logging.debug("otherProcesses:%s" % len(self.__otherProcesses)) if len(self.__eventRanges.keys()) == 0 and len(self.__otherProcesses) == 0: self.cleanStagingFiles() if self.__startWait == None: self.__startWait = time.time() self.killStallProcess() if self.__startWait and (time.time() - self.__startWait) > self.__waitTime: break except: logging.info(traceback.format_exc()) #sys.exit(1) logging.info("Finished to run")
def matchAll(phase, suffix): global basics global rules global matches global failures global predicates global locators global noFiles global noFilesAffected global noUnits global noPatternConstraints global noPatternConstraintsOk global noContentConstraints global noContentConstraintsOk global noPredicateConstraints global noPredicateConstraintsOk global noFragments if (phase!="basics"): basics = tools101.getBasics() rules = json.load(open(const101.rulesDump, 'r'))["results"]["rules"] matches = list() failures = list() predicates = set() locators = set() noFiles = 0 noUnits = 0 noFilesAffected = 0 noPatternConstraints = 0 noPatternConstraintsOk = 0 noContentConstraints = 0 noContentConstraintsOk = 0 noPredicateConstraints = 0 noPredicateConstraintsOk = 0 noFragments = 0 pool = ThreadPool(4) print "Matching 101meta metadata on 101repo (phase \"" + str(phase)+ "\")." for root, dirs, files in os.walk(os.path.join(const101.sRoot, "contributions"), followlinks=True): if not root.startswith(os.path.join(const101.sRoot, ".git")+os.sep): for basename in files: noFiles += 1 if not basename in [".gitignore"]: dirname = root[len(const101.sRoot)+1:] pool.add_task(handleFile, phase, dirname, basename, suffix) #handleFile(phase, dirname, basename, suffix) sys.stdout.write('\n') pool.wait_completion() mr = dict() mr["matches"] = matches mr["failures"] = failures mr["rules"] = rules if phase=="predicates": mr["predicates"] = list(predicates) if phase=="fragments": mr["locators"] = list(locators) print str(noFiles) + " files examined." print str(noFilesAffected) + " files affected." print str(len(failures)) + " failures encountered." print str(noUnits) + " metadata units attached." print str(noContentConstraints) + " content constraints checked." print str(noContentConstraintsOk) + " content constraints succeeded." print str(noPatternConstraints) + " filename-pattern constraints checked." print str(noPatternConstraintsOk) + " filename-pattern constraints succeeded." if phase=="predicates": print str(noPredicateConstraints) + " predicate constraints checked." print str(noPredicateConstraintsOk) + " predicate constraints succeeded." if phase=="fragments": print str(len(locators)) + " fragment locators exercised." print str(noFragments) + " fragment descriptions checked." return mr
def runHPCEvent(self): tolog("runHPCEvent") self.__job.jobState = "running" self.__job.setState([self.__job.jobState, 0, 0]) self.__job.pilotErrorDiag = None rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") defRes = self.getDefaultResources() if defRes['copy_input_files'] == 'true': self.__copyInputFiles = True else: self.__copyInputFiles = False status, output, hpcJob = self.prepareHPCJob() if status == 0: tolog("HPC Job: %s " % hpcJob) else: tolog("failed to create the Tag file") self.failJob(0, PilotErrors.ERR_UNKNOWN, self.__job, pilotErrorDiag=output) return self.__hpcStatus = None self.__hpcLog = None logFileName = None tolog("runJobHPCEvent.getPilotLogFilename=%s"% self.getPilotLogFilename()) if self.getPilotLogFilename() != "": logFileName = self.getPilotLogFilename() hpcManager = HPCManager(globalWorkingDir=self.__job.workdir, logFileName=logFileName, poolFileCatalog=self.__poolFileCatalogTemp, inputFiles=self.__inputFilesGlobal, copyInputFiles=self.__copyInputFiles) self.__hpcManager = hpcManager self.HPCMode = "HPC_" + hpcManager.getMode(defRes) self.__job.setMode(self.HPCMode) self.__job.setHpcStatus('waitingResource') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) hpcManager.getFreeResources(defRes) self.__job.coreCount = hpcManager.getCoreCount() self.__job.setHpcStatus('gettingEvents') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) numRanges = hpcManager.getEventsNumber() tolog("HPC Manager needs events: %s, max_events: %s; use the smallest one." % (numRanges, defRes['max_events'])) if numRanges > int(defRes['max_events']): numRanges = int(defRes['max_events']) eventRanges = self.getEventRanges(numRanges=numRanges) #tolog("Event Ranges: %s " % eventRanges) if len(eventRanges) == 0: tolog("Get no Event ranges. return") return for eventRange in eventRanges: self.__eventRanges[eventRange['eventRangeID']] = 'new' # setup stage out self.setupStageOutHPCEvent() hpcManager.initJob(hpcJob) hpcManager.initEventRanges(eventRanges) hpcManager.submit() threadpool = ThreadPool(defRes['stageout_threads']) old_state = None time_start = time.time() while not hpcManager.isFinished(): state = hpcManager.poll() self.__job.setHpcStatus(state) if old_state is None or old_state != state or time.time() > (time_start + 60*10): old_state = state time_start = time.time() tolog("HPCManager Job stat: %s" % state) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) if state and state == 'Complete': break outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) time.sleep(30) self.updateHPCEventRanges() tolog("HPCManager Job Finished") self.__job.setHpcStatus('stagingOut') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) self.updateHPCEventRanges() threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 1") half_stageout_threads = defRes['stageout_threads'] / 2 if half_stageout_threads < 1: half_stageout_threads = 1 threadpool = ThreadPool(half_stageout_threads) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 2") threadpool = ThreadPool(1) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() self.__job.setHpcStatus('finished') self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog() tolog("HPC job log status: %s, job log error: %s" % (self.__hpcStatus, self.__hpcLog))
def main(argv): #Parse commandline arguments #http://www.tutorialspoint.com/python/python_command_line_arguments.htm update_existing = False try: opts, args = getopt.getopt(argv,"h:u:") except getopt.GetoptError: print "run_processing.py -u <True/False>" sys.exit(2) for opt, arg in opts: if opt == "-h": print "run_processing.py -u <True/False>" elif opt == "-u": if arg == "True": update_existing = True projects = [d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS) if os.path.isdir(os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d))] if update_existing: print "Running in-place update." manager = None else: manager = consumerDatabaseThreadManager("knowledge") pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS) tip_hashes = {} all_files = [] last_line = [] #iterate through all projects for project in projects: if project in devknowledge.settings.PROJECT_FOLDERS: all_files, last_lines = Util.returnFilesLastLine(project) if update_existing: #there were issues with threads duplicating authors/files so we just mass add them at the beginning print "Starting initial processing of authors/files." Git.createFilesAuthorsInIndex(project, all_files) print "Finished creating authors/files." number_to_process = len(all_files) print "Number of files to process: ", number_to_process processed_files = 0 for i in range(0, len(all_files)): file = all_files[i] last_line = last_lines[i] print "Project: ", project, " ", file, " number lines: ", last_line project_type = Util.returnProjectType(devknowledge.settings.VERSION_CONTROL_REPOS+project) if project_type == "git": #run Git analysis if project not in tip_hashes: #create new head hash cache tip_hashes[project] = Util.returnHeadHash(devknowledge.settings.VERSION_CONTROL_REPOS+project) if update_existing: Git.storeKnowledge(manager, project, file, 1, last_line, update_existing, tip_hashes[project]) else: pool.add_task(Git.storeKnowledge, manager, project, file, 1, last_line, update_existing, tip_hashes[project]) elif project_type == "hg": #run Mercurial analysis if project not in tip_hashes: #create new tip hash cache tip_hashes[project] = Mercurial.returnTipHash(devknowledge.settings.VERSION_CONTROL_REPOS+project) pool.add_task(Mercurial.storeKnowledge, manager, project, file, 1, last_line, tip_hashes[project]) processed_files += 1 print "Percent done: %.2f %%" % float(float(processed_files)/float(number_to_process) * 100) Util.checkDatabaseFolderSize() print "Finishing up writing data to database." pool.wait_completion() if update_existing: #prune database of stale file and author nodes print "Starting prune of database." if project_type == "git": Git.pruneDatabaseStaleFiles(all_files, project) Git.pruneDatabaseStaleAuthors() Git.pruneAllOtherNodes() elif project_type == "hg": print "Not yet implemented." if manager: manager.markForFinish() print "Done. Exiting."
def compare_site_thread(old_url, new_url, progress_var=None, step=100.0, thread_pool_csv=None): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # checking multiple sites mode if thread_pool_csv: thread_pool = thread_pool_csv else: thread_pool = ThreadPool(settings["THREADPOOL_SIZE"]) create_path() ind = 0 old_url = old_url.strip() new_url = new_url.strip() # remove the "/" at the end of the url if old_url[-1] == '/': old_url = old_url[:-1] if new_url[-1] == '/': new_url = new_url[:-1] # add "http://" before url if not old_url.startswith("http"): old_url = "http://" + old_url if not new_url.startswith("http"): new_url = "http://" + new_url # print out the information for old and new sites entry_print("-----------------------------------------------------", True) entry_print("Old URL: " + old_url, True) entry_print("New URL: " + new_url, True) entry_print("-----------------------------------------------------", True) setup_step = step * 0.01 if progress_var: progress_var.set(progress_var.get() + setup_step) # check if the new site needs login new_test = get_soup(new_url) if new_test: title = new_test.find("title") if title and title.get_text().strip() == "Login": entry_print( "New site needs login. Please use login mode to check this site!\n", True) return -1 setup_step = step * 0.01 if progress_var: progress_var.set(progress_var.get() + setup_step) # get the subpages of old and new sites try: sites = get_sites(old_url) except AttributeError: entry_print( "Can't find the site map from " + old_url + ". Please check if the url is valid!", True) thread_pool.destroy() return old_blog = get_blog_site(old_url) new_blog = get_blog_site(new_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: thread_pool.destroy() return blog_exists = False if old_blog and new_blog: blog_exists = True # if urls for subpages are not found if sites is None: record_error(new_url, "sites") if progress_var: progress_var.set(progress_var.get() + step) return False # if blog page is not found if old_blog is not None and new_blog is None: record_error(new_url, "blog") elif old_blog is None and new_blog is not None: record_error(old_url, "blog") setup_step = step * 0.02 if progress_var: progress_var.set(progress_var.get() + setup_step) # print out site information entry_print("Site Information: ", True) # calculate the step for each page step *= 0.96 if blog_exists: page_step = step / 2 / (len(sites) + 1) entry_print("Old Blog: " + old_blog, True) entry_print("New Blog: " + new_blog, True) else: page_step = step / (len(sites) + 1) entry_print("Number of non-blog pages: " + str(len(sites)), True) # check the homepage thread_pool.add_task(compare_homepage, old_url=old_url, new_url=new_url, progress_var=progress_var, step=page_step) # check all the sites in sitemap for site in sites: ind += 1 if site.startswith("/home") or site.startswith("/main"): continue old_link = old_url + site new_link = new_url + site thread_pool.add_task(compare_page, old_url=old_link, new_url=new_link, progress_var=progress_var, step=page_step) # check all the blog pages if blog_exists: old_blog_soup = get_soup(old_blog) new_blog_soup = get_soup(new_blog) compare_blog(old_blog_soup, new_blog_soup, old_blog, new_blog, progress_var=progress_var, step=step / 2) # single site mode if not thread_pool_csv: thread_pool.wait_completion() thread_pool.destroy() entry_print("-----------------------------------------------------\n") return True
def main(argv): #Parse commandline arguments #http://www.tutorialspoint.com/python/python_command_line_arguments.htm update_existing = False try: opts, args = getopt.getopt(argv, "h:u:") except getopt.GetoptError: print "run_dependencies.py -u <True/False>" sys.exit(2) for opt, arg in opts: if opt == "-h": print "run_dependencies.py -u <True/False>" elif opt == "-u": if arg == "True": update_existing = True projects = [ d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS) if os.path.isdir( os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d)) ] if update_existing: print "Running in-place update." manager = None else: manager = consumerDatabaseThreadManager("dependencies") pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS) #iterate through all projects for project in projects: if project in devknowledge.settings.PROJECT_FOLDERS: all_files, last_lines = Util.returnFilesLastLine(project) c_files = Util.returnCandCPlusPlusFiles(all_files, project) number_to_process = len(c_files) print "Number of files to process: ", number_to_process processed_files = 0 for file in c_files: last_line = last_lines[processed_files] #print "Project: ", project, " ", file, " number lines: ", last_line if update_existing: FileDependencies.parseFileDependencies( manager, project, file, c_files, update_existing) else: pool.add_task(FileDependencies.parseFileDependencies, manager, project, file, c_files, update_existing) processed_files += 1 print "Percent done: %.2f %%" % float( float(processed_files) / float(number_to_process) * 100) Util.checkDatabaseFolderSize() print "Finishing up writing data to database." pool.wait_completion() if not update_existing: manager.markForFinish() print "Done. Exiting."
def runHPCEvent(self): tolog("runHPCEvent") self.__job.jobState = "running" self.__job.setState([self.__job.jobState, 0, 0]) self.__job.pilotErrorDiag = None rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") defRes = self.getDefaultResources() if defRes['copy_input_files'] == 'true': self.__copyInputFiles = True else: self.__copyInputFiles = False status, output, hpcJob = self.prepareHPCJob() if status == 0: tolog("HPC Job: %s " % hpcJob) else: tolog("failed to create the Tag file") self.failJob(0, PilotErrors.ERR_UNKNOWN, self.__job, pilotErrorDiag=output) return self.__hpcStatus = None self.__hpcLog = None logFileName = None tolog("runJobHPCEvent.getPilotLogFilename=%s" % self.getPilotLogFilename()) if self.getPilotLogFilename() != "": logFileName = self.getPilotLogFilename() hpcManager = HPCManager(globalWorkingDir=self.__job.workdir, logFileName=logFileName, poolFileCatalog=self.__poolFileCatalogTemp, inputFiles=self.__inputFilesGlobal, copyInputFiles=self.__copyInputFiles) self.__hpcManager = hpcManager self.HPCMode = "HPC_" + hpcManager.getMode(defRes) self.__job.setMode(self.HPCMode) self.__job.setHpcStatus('waitingResource') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) hpcManager.getFreeResources(defRes) self.__job.coreCount = hpcManager.getCoreCount() self.__job.setHpcStatus('gettingEvents') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) numRanges = hpcManager.getEventsNumber() tolog( "HPC Manager needs events: %s, max_events: %s; use the smallest one." % (numRanges, defRes['max_events'])) if numRanges > int(defRes['max_events']): numRanges = int(defRes['max_events']) eventRanges = self.getEventRanges(numRanges=numRanges) #tolog("Event Ranges: %s " % eventRanges) if len(eventRanges) == 0: tolog("Get no Event ranges. return") return for eventRange in eventRanges: self.__eventRanges[eventRange['eventRangeID']] = 'new' # setup stage out self.setupStageOutHPCEvent() hpcManager.initJob(hpcJob) hpcManager.initEventRanges(eventRanges) hpcManager.submit() threadpool = ThreadPool(defRes['stageout_threads']) old_state = None time_start = time.time() while not hpcManager.isFinished(): state = hpcManager.poll() self.__job.setHpcStatus(state) if old_state is None or old_state != state or time.time() > ( time_start + 60 * 10): old_state = state time_start = time.time() tolog("HPCManager Job stat: %s" % state) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) if state and state == 'Complete': break outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) time.sleep(30) self.updateHPCEventRanges() tolog("HPCManager Job Finished") self.__job.setHpcStatus('stagingOut') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) self.updateHPCEventRanges() threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 1") half_stageout_threads = defRes['stageout_threads'] / 2 if half_stageout_threads < 1: half_stageout_threads = 1 threadpool = ThreadPool(half_stageout_threads) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 2") threadpool = ThreadPool(1) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() self.__job.setHpcStatus('finished') self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog() tolog("HPC job log status: %s, job log error: %s" % (self.__hpcStatus, self.__hpcLog))
class DroidStager(threading.Thread): def __init__(self, globalWorkingDir, localWorkingDir, outputs=None, job=None, esJobManager=None, outputDir=None, rank=None, logger=None): threading.Thread.__init__(self) self.__globalWorkingDir = globalWorkingDir self.__localWorkingDir = localWorkingDir self.__currentDir = None self.__rank = rank if logger and False: self.__tmpLog = logger else: curdir = _abspath(self.__localWorkingDir) wkdirname = "rank_%s" % str(self.__rank) wkdir = _abspath(_join(curdir, wkdirname)) self.__tmpLog = Logger.Logger( filename=os.path.join(wkdir, 'Droid.log')) self.__job = job self.__esJobManager = esJobManager self.__stop = threading.Event() self.__isFinished = False self.__tmpLog.info("Rank %s: Global working dir: %s" % (self.__rank, self.__globalWorkingDir)) os.environ['PilotHomeDir'] = os.path.dirname(self.__globalWorkingDir) self.__jobId = None self.__copyOutputToGlobal = False self.__outputDir = outputDir self.__hostname = socket.getfqdn() self.__outputs = outputs self.__threadpool = None self.setup(job) def setup(self, job): try: self.__jobId = job.get("JobId", None) self.__yodaToOS = job.get('yodaToOS', False) self.__yodaToZip = job.get('yodaToZip', False) self.__zipFileName = job.get('zipFileName', None) self.__zipEventRangesName = job.get('zipEventRangesName', None) self.__tmpLog.debug("Rank %s: zip file %s" % (self.__rank, self.__zipFileName)) self.__tmpLog.debug("Rank %s: zip event range file %s" % (self.__rank, self.__zipEventRangesName)) if self.__zipFileName is None or self.__zipEventRangesName is None: self.__tmpLog.debug( "Rank %s: either zipFileName(%s) is None or zipEventRanagesName(%s) is None, will not use zip output" % (self.__rank, self.__zipFileName, self.__zipEventRangesName)) self.__yodaToZip = False self.__copyOutputToGlobal = job.get('copyOutputToGlobal', False) if self.__yodaToOS: setup = job.get('setup', None) self.__esPath = job.get('esPath', None) self.__os_bucket_id = job.get('os_bucket_id', None) self.__report = getInitialTracingReport( userid='Yoda', sitename='Yoda', dsname=None, eventType="objectstore", analysisJob=False, jobId=None, jobDefId=None, dn='Yoda') self.__siteMover = objectstoreSiteMover(setup, useTimerCommand=False) self.__cores = int(job.get('ATHENA_PROC_NUMBER', 1)) self.__tmpLog.debug("Rank %s: start threadpool" % (self.__rank)) self.__threadpool = ThreadPool(self.__cores / 8) except: self.__tmpLog.error("Failed to setup Droid stager: %s" % str(traceback.format_exc())) def copyOutput(self, output, outputs): if self.__outputDir: for filename in outputs: #filename = output.split(",")[0] base_filename = os.path.basename(filename) new_file_name = os.path.join(self.__outputDir, base_filename) is_copied = False try: os.rename(filename, new_file_name) is_copied = True except: self.__tmpLog.debug( "Rank %s: failed to move output %s to %s, %s" % (self.__rank, filename, new_file_name, str(traceback.format_exc()))) is_copied = False if not is_copied: shutil.copy(filename, new_file_name) os.remove(filename) output = output.replace(filename, new_file_name) return 0, output elif self.__copyOutputToGlobal: for filename in outputs: #filename = output.split(",")[0] base_filename = os.path.basename(filename) new_file_name = os.path.join(self.__globalWorkingDir, base_filename) is_copied = False try: os.rename(filename, new_file_name) is_copied = True except: self.__tmpLog.debug( "Rank %s: failed to move output %s to %s, %s" % (self.__rank, filename, new_file_name, str(traceback.format_exc()))) is_copied = False if not is_copied: shutil.copy(filename, new_file_name) os.remove(filename) output = output.replace(filename, new_file_name) return 0, output else: if self.__localWorkingDir == self.__globalWorkingDir: return 0, output for filename in outputs: #filename = output.split(",")[0] new_file_name = filename.replace(self.__localWorkingDir, self.__globalWorkingDir) dirname = os.path.dirname(new_file_name) if not os.path.exists(dirname): os.makedirs(dirname) shutil.copy(filename, new_file_name) os.remove(filename) output = output.replace(filename, new_file_name) return 0, output def stageOutToOS(self, outputs): ret_status = 0 ret_outputs = [] try: for filename in outputs: ret_status, pilotErrorDiag, surl, size, checksum, arch_type = self.__siteMover.put_data( filename, self.__esPath, lfn=os.path.basename(filename), report=self.__report, token=None, experiment='ATLAS') if ret_status == 0: os.remove(filename) ret_outputs.append(surl) else: self.__tmpLog.debug("Failed to stageout %s: %s %s" % (filename, ret_status, pilotErrorDiag)) return ret_status, pilotErrorDiag except: self.__tmpLog.warning( "Rank %s: Droid throws exception when staging out: %s" % (self.__rank, traceback.format_exc())) ret_status = -1 return ret_status, ret_outputs def zipOutputs(self, eventRangeID, eventStatus, outputs): try: for filename in outputs: command = "tar -rf " + self.__zipFileName + " --directory=%s %s" % ( os.path.dirname(filename), os.path.basename(filename)) status, ret = commands.getstatusoutput(command) if status: self.__tmpLog.debug("Failed to zip %s: %s, %s" % (filename, status, ret)) return status, ret else: os.remove(filename) except: self.__tmpLog.warning( "Rank %s: Droid throws exception when zipping out: %s" % (self.__rank, traceback.format_exc())) return -1, "Failed to zip outputs" else: handler = open(self.__zipEventRangesName, "a") handler.write("%s %s %s\n" % (eventRangeID, eventStatus, outputs)) handler.close() return 0, outputs def stageOut(self, eventRangeID, eventStatus, output, retries=0): if eventStatus.startswith("ERR"): request = { "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: outputs = output.split(",")[:-3] if self.__yodaToZip: self.__tmpLog.debug("Rank %s: start to zip outputs: %s" % (self.__rank, outputs)) retStatus, retOutput = self.zipOutputs(eventRangeID, eventStatus, outputs) if retStatus != 0: self.__tmpLog.error( "Rank %s: failed to zip outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: self.__tmpLog.info( "Rank %s: finished to zip outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': 'zipped', "output": retOutput } elif self.__yodaToOS: self.__tmpLog.debug( "Rank %s: start to stage out outputs to objectstore: %s" % (self.__rank, outputs)) retStatus, retOutput = self.stageOutToOS(outputs) if retStatus != 0: self.__tmpLog.error( "Rank %s: failed to stagout outputs %s to objectstore: %s" % (self.__rank, outputs, retOutput)) if retries < 1: self.stageOut(eventRangeID, eventStatus, output, retries=retries + 1) request = None else: request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: self.__tmpLog.info( "Rank %s: finished to stageout outputs %s to objectstore: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': 'stagedOut', "output": retOutput, 'objstoreID': self.__os_bucket_id } else: self.__tmpLog.debug("Rank %s: start to copy outputs: %s" % (self.__rank, outputs)) retStatus, retOutput = self.copyOutputs(output, outputs) if retStatus != 0: self.__tmpLog.error( "Rank %s: failed to copy outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: self.__tmpLog.info( "Rank %s: finished to copy outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": retOutput } if request: self.__outputs.put(request) def stop(self): self.__stop.set() def isFinished(self): return self.__isFinished def run(self): while True: try: outputs = self.__esJobManager.getOutputs() if outputs: self.__tmpLog.debug("Rank %s: getOutputs: %s" % (self.__rank, outputs)) for outputMsg in outputs: try: eventRangeID, eventStatus, output = outputMsg if self.__threadpool: self.__tmpLog.debug( "Rank %s: add event output to threadpool: %s" % (self.__rank, outputMsg)) self.__threadpool.add_task(self.stageOut, eventRangeID, eventStatus, output, retries=0) else: self.stageOut(eventRangeID, eventStatus, output, retries=0) except: self.__tmpLog.warning( "Rank %s: error message: %s" % (self.__rank, traceback.format_exc())) continue except: self.__tmpLog.error("Rank %s: Stager Thread failed: %s" % (self.__rank, traceback.format_exc())) if self.__stop.isSet(): if self.__threadpool: self.__tmpLog.warning( "Rank %s: wait threadpool to finish" % (self.__rank)) self.__threadpool.wait_completion() self.__tmpLog.warning("Rank %s: threadpool finished" % (self.__rank)) break time.sleep(1) self.__isFinished = True