Ejemplo n.º 1
0
def main():
	appName = sys.argv[0]
	logging.basicConfig(
#		filename = './log/' + appName + '_' + time.strftime("%Y%m%d_%H%M%S") + '.log',
		datefmt = '%Y-%m%d %H:%M:%S',
		format = '%(asctime)s | %(levelname)s | %(name)s | %(message)s',
		level = logging.INFO
	)
	func_name = sys._getframe().f_code.co_name
	logger = logging.getLogger(func_name)
	logger.info('Started')

	parseArgs()

	logger.debug('requests.session')
	session = requests.session()
	# `mount` a custom adapter that retries failed connections for HTTP and HTTPS requests.
	session.mount("http://", requests.adapters.HTTPAdapter(max_retries=10))

	logger.debug('session.post')
	r = session.post(LOGIN_URL, data = LOGIN_PARAM)

	idResultEnd = idResult
	idResultBegin = idResult - quantResult
	# 1) Init a Thread pool with the desired number of threads
	logger.debug('ThreadPool')
	pool = ThreadPool(10)
	logger.debug('for idAtual in xrange(%d, %d, -1)' % (idResultEnd, idResultBegin))
	for idAtual in xrange(idResultEnd, idResultBegin, -1):
		# 2) Add the task to the queue
		pool.add_task(downloadResult, session, idAtual)
	# 3) Wait for completion
	pool.wait_completion()
	###
	logger.info('Finished')
Ejemplo n.º 2
0
def main(condicion_venta, estado, threads):
    start = datetime.now()
    try:
        condicion_venta = condicion_venta.replace('-', '_')
        estado = estado.replace('-', '_')
        condicion_venta = CondicionVenta[condicion_venta]
        estado = Estado[estado]
        max_threads = int(threads)
        print('Parámetros de búsqueda')
        print('Venta : {0}'.format(condicion_venta.value))
        print('Vigente: {0}'.format(estado.value))
    except KeyError:
        print(
            'No fue posible determinar la condicion de venta o estado de medicamentos a procesar'
        )
        return 1
    except ValueError:
        print('No se proporcionó un número de hilos de ejecución válido')
        return 1

    thread = IspParser(sale_terms=condicion_venta, status=estado)
    max_pages = thread.pages_count

    pool = ThreadPool(max_threads, IspParser)
    for i in range(1, max_pages + 1):
        pool.add_task({
            'sale_terms': condicion_venta,
            'status': estado,
            'page_number': i
        })
    pool.wait_completion()
    end = datetime.now()
    print('Tiempo transcurrido: {0}'.format(end - start))
Ejemplo n.º 3
0
def test_thread_pool():
    pool = ThreadPool(2, 0.5)
    for i in range(2):
        handle = login.login('ssh', 'root', 'n', '10.20.60.23')
        task = (handle.execute_cmd, ('uname -a',), {})
        pool.add_task(task)
    print pool.get_result()
    pool.task_join()
Ejemplo n.º 4
0
    def test_task_execution(self):
        global cnt_handle
        thread_pool = ThreadPool()
        task_list = []
        total_cnt_expected = 10
        for i in range(total_cnt_expected):
            task_list.append(TaskTest("task %s" % i, self.task_handle))
        for task in task_list:
            thread_pool.add_task(task)

        time_start = time.time()
        while (True):
            if time.time() - time_start > 1.0:
                break
            if cnt_handle == total_cnt_expected:
                break

        self.assertEqual(cnt_handle, total_cnt_expected)
Ejemplo n.º 5
0
class EventStager(object):
    def __init__(self, workDir, setup, esPath, token, experiment, userid, sitename, outputDir=None, yodaToOS=False, threads=10, isDaemon=False, process=0, totalProcess=1):
        self.__workDir = workDir
        self.__updateEventRangesDir = os.path.join(self.__workDir, 'updateEventRanges_%s' % process)
        if not os.path.exists(self.__updateEventRangesDir):
            os.makedirs(self.__updateEventRangesDir)
        self.__logFile = os.path.join(workDir, 'EventStager.log')
        self.__setup = setup
        self.__siteMover = S3ObjectstoreSiteMover(setup, useTimerCommand=False)
        self.__esPath = esPath
        self.__token = token
        self.__experiment = experiment
        self.__outputDir = outputDir

        self.__userid = userid
        self.__sitename = sitename

        self.__report =  getInitialTracingReport(userid=self.__userid, sitename=self.__sitename, dsname=None, eventType="objectstore", analysisJob=False, jobId=None, jobDefId=None, dn=self.__userid)

        self.__num_stagingFile = 0
        self.__eventRanges = {}
        self.__eventRanges_staged = {}
        self.__eventRanges_faileStaged = {}

        self.__eventStager = None
        self.__canFinish = False
        self.__status = 'new'
        self.__threads = threads
        self.__isDaemon = isDaemon
        self.__startTime = time.time()

        self.__processedJobs = []
        self.__handlingOthers = 0
        self.__otherProcesses = []
        self.__startWait = None
        self.__waitTime = 15 * 60 # 15 minutes

        self.__yodaToOS = yodaToOS

        if not os.environ.has_key('PilotHomeDir'):
            os.environ['PilotHomeDir'] = os.path.dirname(__file__)

        self.__process = process
        self.__totalProcess = totalProcess

        self.__siteMover.setup(experiment)

        self.__threadpool = ThreadPool(self.__threads)
        logging.info("Init EventStager workDir %s setup %s esPath %s token %s experiment %s userid %s sitename %s threads %s outputDir %s isDaemond %s" % (self.__workDir, self.__setup, self.__esPath, self.__token, self.__experiment, self.__userid, self.__sitename, self.__threads, self.__outputDir, self.__isDaemon))

    def renewEventStagerStatus(self):
        canFinish_file = os.path.join(self.__workDir, 'EventStagerStatusCan.json')
        finished_file = os.path.join(self.__workDir, 'EventStagerStatus.json')
        if self.__isDaemon:
            if os.path.exists(canFinish_file):
                #with open(canFinish_file) as inputFile:
                #    origin_status = json.load(inputFile)
                #    self.__canFinish = origin_status['canFinish']
                self.__canFinish = True
            if self.__status == "finished":
                status = {'status': self.__status}
                with open(finished_file, 'w') as outputFile:
                    json.dump(status, outputFile)
            elif os.path.exists(finished_file):
                os.remove(finished_file)
        else:
            if os.path.exists(finished_file):
                #with open(finished_file) as inputFile:
                #    origin_status = json.load(inputFile)
                #    self.__status = origin_status['status']
                self.__status = "finished"
            if self.__canFinish:
                status = {'canFinish': self.__canFinish}
                with open(canFinish_file, 'w') as outputFile:
                    json.dump(status, outputFile)
            elif os.path.exists(canFinish_file):
                os.remove(canFinish_file)

    def start(self):
        try:
            self.renewEventStagerStatus()
            if self.__outputDir:
                stageCmd = "MVEventStager.py"
            else:
                stageCmd = "EventStager.py"

            yoda_to_os = ''
            if self.__yodaToOS:
                yoda_to_os = '--YodaToOS '
            if self.__setup and len(self.__setup.strip()):
                cmd = 'python %s/%s --workDir %s --setup %s --esPath %s --token %s --experiment %s --userid %s --sitename %s --threads %s --outputDir %s %s--isDaemon 2>&1 1>>%s' % (self.__workDir, stageCmd, self.__workDir, self.__setup, self.__esPath, self.__token, self.__experiment, self.__userid, self.__sitename, self.__threads, self.__outputDir, yoda_to_os, self.__logFile)
            else:
                cmd = 'python %s/%s --workDir %s --esPath %s --token %s --experiment %s --userid %s --sitename %s --threads %s --outputDir %s %s--isDaemon 2>&1 1>>%s' % (self.__workDir, stageCmd, self.__workDir, self.__esPath, self.__token, self.__experiment, self.__userid, self.__sitename, self.__threads, self.__outputDir, yoda_to_os, self.__logFile)
            pUtil.tolog("Start Event Stager: %s" % cmd)
            self.__eventStager = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout, shell=True)
        except:
            pUtil.tolog("Failed to start Event Stager: %s" % traceback.format_exc())

    def getLog(self):
        return self.__logFile

    def monitor(self):
        try:
            self.renewEventStagerStatus()
            if not self.isFinished() and self.__eventStager is None or self.__eventStager.poll() is not None:
                pUtil.tolog("Event Stager failed. Try to start it.")
                self.start()
        except:
            pUtil.tolog("Failed to monitor Event Stager: %s" % traceback.format_exc())

    def finish(self):
        try:
            pUtil.tolog("Tell Event Stager to finish after finishing staging out all events")
            self.__canFinish = True
            self.renewEventStagerStatus()
        except:
            pUtil.tolog("Failed to monitor Event Stager: %s" % traceback.format_exc())

    def terminate(self):
        try:
            pUtil.tolog("Terminate Event Stager")
            self.__eventStager.terminate()
        except:
            pUtil.tolog("Failed to terminate Event Stager: %s" % traceback.format_exc())

    def isFinished(self):
        if self.__canFinish and self.__status == 'finished':
            return True
        return False

    def stageOutEvent(self, output_info):
        filename, jobId, eventRangeID, status, output = output_info

        try:
            if status == 'failed':
                self.__eventRanges_staged[filename].append((jobId, eventRangeID, status, output))
                if eventRangeID not in self.__eventRanges[filename]:
                    logging.warning("stageOutEvent: %s is not in eventRanges" % eventRangeID)
                else:
                    del self.__eventRanges[filename][eventRangeID]
            if status == 'finished':
                if not os.path.exists(output):
                    if eventRangeID in self.__eventRanges[filename]:
                        del self.__eventRanges[filename][eventRangeID]
                        return

                ret_status, pilotErrorDiag, surl, size, checksum, self.arch_type = self.__siteMover.put_data(output, os.path.join(self.__esPath, os.path.basename(output)), lfn=os.path.basename(output), report=self.__report, token=self.__token, experiment=self.__experiment)
                if ret_status == 0:
                    try:
                        self.__eventRanges_staged[filename].append((jobId, eventRangeID, status, output))
                
                        if eventRangeID not in self.__eventRanges[filename]:
                            logging.warning("stageOutEvent: %s is not in eventRanges" % eventRangeID)
                        else:
                            del self.__eventRanges[filename][eventRangeID]
                        #logging.info("Remove staged out output file: %s" % output)
                        #os.remove(output)
                    except Exception, e:
                        logging.info("!!WARNING!!2233!! remove ouput file threw an exception: %s" % (e))
                else:
                    logging.info("!!WARNING!!1164!! Failed to upload file to objectstore: %d, %s" % (ret_status, pilotErrorDiag))
                    self.__eventRanges_faileStaged[filename].append((jobId, eventRangeID, status, output))
        except:
            logging.warning(traceback.format_exc())
            self.__eventRanges_faileStaged[filename].append((jobId, eventRangeID, status, output))

    def sort_file_by_mtime(self, path, files):
        mtime = lambda f: os.stat(os.path.join(path, f)).st_mtime
        return list(sorted(files, key=mtime))

    def getUnstagedOutputFiles(self, ext=".dump"):
        outputFiles = []
        all_files = os.listdir(self.__workDir)
        if ext == ".dump":
            for file in all_files:
                if file.endswith(ext):
                    if (int(hashlib.sha1(file).hexdigest(),16) % self.__totalProcess) == self.__process:
                        filename = os.path.join(self.__workDir, file)
                        outputFiles.append(file)
        else:
            for file in all_files:
                if file.endswith(ext):
                    if self.__process == 0:
                        filename = os.path.join(self.__workDir, file)
                        outputFiles.append(file)
        if outputFiles:
            outputFiles = self.sort_file_by_mtime(self.__workDir, outputFiles)
            logging.info("UnStaged Output files: %s" % outputFiles)
        return outputFiles

    def updateEventRange(self, event_range_id, status='finished'):
        """ Update an event range on the Event Server """
        pUtil.tolog("Updating an event range..")

        message = ""
        # url = "https://aipanda007.cern.ch:25443/server/panda"
        url = "https://pandaserver.cern.ch:25443/server/panda"
        node = {}
        node['eventRangeID'] = event_range_id

        # node['cpu'] =  eventRangeList[1]
        # node['wall'] = eventRangeList[2]
        node['eventStatus'] = status
        # tolog("node = %s" % str(node))

        # open connection
        ret = pUtil.httpConnect(node, url, path=self.__updateEventRangesDir, mode="UPDATEEVENTRANGE")
        # response = ret[1]

        if ret[0]: # non-zero return code
            message = "Failed to update event range - error code = %d" % (ret[0])
        else:
            message = ""

        return ret[0], message

    def updateEventRanges(self, event_ranges):
        """ Update an event range on the Event Server """
        pUtil.tolog("Updating event ranges..")

        message = ""
        #url = "https://aipanda007.cern.ch:25443/server/panda"
        url = "https://pandaserver.cern.ch:25443/server/panda"
        # eventRanges = [{'eventRangeID': '4001396-1800223966-4426028-1-2', 'eventStatus':'running'}, {'eventRangeID': '4001396-1800223966-4426028-2-2','eventStatus':'running'}]

        node={}
        node['eventRanges']=json.dumps(event_ranges)

        # open connection
        ret = pUtil.httpConnect(node, url, path=self.__updateEventRangesDir, mode="UPDATEEVENTRANGES")
        # response = json.loads(ret[1])

        status = ret[0]
        if ret[0]: # non-zero return code
            message = "Failed to update event range - error code = %d, error: " % (ret[0], ret[1])
        else:
            response = json.loads(json.dumps(ret[1]))
            status = int(response['StatusCode'])
            message = json.dumps(response['Returns'])

        return status, message

    def cleanStagingFiles(self, older=None):
        if older == None:
            self.__eventRanges = {}
            self.__eventRanges_staged = {}
            self.__eventRanges_faileStaged = {}
        all_files = os.listdir(self.__workDir)
        for file in all_files:
            if older == None:
                if file.endswith(".dump.staging"):
                    origin_file = file.replace(".dump.staging", ".dump")
                    if (int(hashlib.sha1(origin_file).hexdigest(),16) % self.__totalProcess) == self.__process:
                        filepath = os.path.join(self.__workDir, file)
                        os.rename(filepath, filepath.replace(".dump.staging", ".dump"))

                if file.endswith("cmdcopying"):
                    origin_file = file.replace("cmdcopying", "cmd")
                    if self.__process == 0:
                        filepath = os.path.join(self.__workDir, file)
                        os.rename(filepath, filepath.replace("cmdcopying", "cmd"))
            else:
                if file.endswith("cmdcopying"):
                    present = time.time()
                    origin_file = file.replace("cmdcopying", "cmd")
                    if self.__process == 0:
                        filepath = os.path.join(self.__workDir, file)
                        if (present - os.path.getmtime(filepath)) > older:
                            os.rename(filepath, filepath.replace("cmdcopying", "cmd"))


    def getEventRanges(self):
        if len(self.__eventRanges.keys()) > 5:
            return
        outputFiles = self.getUnstagedOutputFiles()
        for file in outputFiles:
            if len(self.__eventRanges.keys()) > 5:
                return
            self.__startWait = None
            self.__eventRanges[file] = {}
            self.__eventRanges_staged[file] = []
            self.__eventRanges_faileStaged[file] = []

            filepath = os.path.join(self.__workDir, file)
            handle = open(filepath)
            for line in handle:
                if len(line.strip()) == 0:
                    continue
                line = line.replace("  ", " ")
                jobId, eventRange, status, output = line.split(" ")
                output = output.split(",")[0]
                self.__eventRanges[file][eventRange] = {'retry':0, 'event': (jobId, eventRange, status, output)}
                self.__threadpool.add_task(self.stageOutEvent, (file, jobId, eventRange, status, output))
                if jobId not in self.__processedJobs:
                    self.__processedJobs.append(jobId)
            handle.close()
            os.rename(filepath, filepath + ".staging")

    def checkMissedStagingFiles(self):
        all_files = os.listdir(self.__workDir)
        for file in all_files:
            try:
                if file.endswith(".dump.staged.reported"):
                    origin_file = file.replace(".dump.staged.reported", ".dump")
                    filepath = os.path.join(self.__workDir, file)
                    size = os.path.getsize(filepath)
                    if size == 0:
                        if (int(hashlib.sha1(origin_file).hexdigest(),16) % self.__totalProcess) == self.__process:
                            back_file = filepath.replace(".dump.staged.reported", ".dump.BAK")
                            origin_file = filepath.replace(".dump.staged.reported", ".dump")
                            staging_file = filepath.replace(".dump.staged.reported", ".dump.staging")
                            if not os.path.exists(back_file) and not os.path.exists(origin_file) and not os.path.exists(staging_file):
                                os.remove(filepath)
                            os.rename(back_file, origin_file)
            except:
                logging.warning("Failed to rename %s to %s: %s" % (back_file, origin_file, traceback.format_exc()))

    def checkFailedStagingFiles(self):
        for file in self.__eventRanges_faileStaged:
            while self.__eventRanges_faileStaged[file]:
                jobId, eventRangeID, status, output = self.__eventRanges_faileStaged[file].pop()
                if eventRangeID not in self.__eventRanges[file]:
                    logging.warning("checkFailedStagingFiles: %s is not in eventRanges" % eventRangeID)
                else:
                    if  self.__eventRanges[file][eventRangeID]['retry'] < 3:
                        self.__eventRanges[file][eventRangeID]['retry'] += 1
                        self.__threadpool.add_task(self.stageOutEvent, (file, jobId, eventRangeID, status, output))
                    else:
                        self.__eventRanges_staged[file].append((jobId, eventRangeID, 'failed', output))
                        del self.__eventRanges[file][eventRangeID]

    def checkFinishedStagingFiles(self):
        finishedFiles = []
        for file in self.__eventRanges:
            try:
                if len(self.__eventRanges[file].keys()) == 0:
                    filepath = os.path.join(self.__workDir, file)
                    handle = open(filepath + ".staged.reported", 'w')
                    finishedEventRanges = []
                    for chunk in pUtil.chunks(self.__eventRanges_staged[file], 100):
                        try:
                            eventRanges = []
                            for outputEvents in chunk:
                                jobId, eventRangeID, status, output = outputEvents
                                if eventRangeID not in finishedEventRanges:
                                    finishedEventRanges.append(eventRangeID)
                                    if status == 'finished':
                                        eventRanges.append({"eventRangeID": eventRangeID, "eventStatus": status})
                                    if status.startswith("ERR"):
                                        eventRanges.append({"eventRangeID": eventRangeID, "eventStatus": 'failed'})

                            update_status, update_output = self.updateEventRanges(eventRanges)
                            logging.info("update Event Range: status: %s, output: %s" % (update_status, update_output))
                            if update_status:
                                update_status, update_output = self.updateEventRanges(eventRanges)
                                logging.info("update Event retry Range: status: %s, output: %s" % (update_status, update_output))
                            if update_status == 0:
                                try:
                                    ret_outputs = json.loads(json.loads(update_output))
                                    if len(ret_outputs) == len(chunk):
                                        for i in range(len(ret_outputs)):
                                            try:
                                                if ret_outputs[i]:
                                                    jobId, eventRangeID, status, output = chunk[i]
                                                    logging.info("Remove %s" % output)
                                                    os.remove(output)
                                                    handle.write('{0} {1} {2} {3}\n'.format(jobId, eventRangeID, status, output))

                                            except:
                                                logging.warning("Failed to remove %s: %s" % (output, traceback.format_exc()))
                                except:
                                    logging.warning(traceback.format_exc())
                        except:
                            logging.warning(traceback.format_exc())
                    handle.close()
                    os.rename(filepath + ".staging", filepath + ".BAK")
                    finishedFiles.append(file)
            except:
                logging.warning(traceback.format_exc())
        for file in finishedFiles:
            del self.__eventRanges[file]
            del self.__eventRanges_staged[file]
            del self.__eventRanges_faileStaged[file]

    def checkLostEvents(self):
        for file in self.__eventRanges:
            for eventRange in self.__eventRanges[file]:
                jobId, eventRange, status, output = self.__eventRanges[file][eventRange]['event']
                self.__threadpool.add_task(self.stageOutEvent, (file, jobId, eventRange, status, output))

    def handleGfalFile(self, gfalFile):
        try:
            for i in range(3):
                gfalFile = os.path.join(self.__workDir, gfalFile)
                os.rename(gfalFile, gfalFile + "copying")
                handle = open(gfalFile + "copying")
                cmd = handle.read()
                handle.close()
                cmd = cmd.replace(" -t 3600 ", " -t 300 ")
                logging.info("Execute command: %s" % cmd)
                # status, output = commands.getstatusoutput(cmd)
                status, output = TimerCommand(cmd).run(600)
                logging.info("Status %s output %s" % (status, output))
                if status == 0:
                    os.rename(gfalFile + "copying", gfalFile + "finished")
                    return
                else:
                    os.rename(gfalFile + "copying", gfalFile)

            os.rename(gfalFile, gfalFile + "failed")
        except:
            logging.error("handleGfalFile %s" % traceback.format_exc())
        finally:
            self.__handlingOthers -= 1

    def handleS3File(self, s3File):
        try:
            s3File = os.path.join(self.__workDir, s3File)
            os.rename(s3File, s3File + "copying")
            handle = open(s3File + "copying")
            cmd = handle.read()
            handle.close()
            source, destination = cmd.split(" ")
            logging.info("S3 stage out from %s to %s" % (source, destination))
            ret_status, pilotErrorDiag, surl, size, checksum, self.arch_type = self.__siteMover.put_data(source, destination, lfn=os.path.basename(destination), report=self.__report, token=self.__token, experiment=self.__experiment, timeout=300)
            logging.info("Status %s output %s" % (ret_status, pilotErrorDiag))
            if ret_status == 0:
                os.rename(s3File + "copying", s3File + "finished")
            else:
                os.rename(s3File + "copying", s3File)
        except:
            logging.error("handleS3File %s" % traceback.format_exc())
        finally:
            self.__handlingOthers -= 1

    def handleOtherFiles(self):
        gfalFiles = self.getUnstagedOutputFiles(".gfalcmd")
        for gfalFile in gfalFiles:
            p = multiprocessing.Process(target=self.handleGfalFile, args=(gfalFile,))
            p.start()
            self.__otherProcesses.append(p)
            self.__handlingOthers += 1
            self.__startWait = None

        s3Files = self.getUnstagedOutputFiles(".s3cmd")
        for s3File in s3Files:
            p = multiprocessing.Process(target=self.handleS3File, args=(s3File,))
            p.start()
            self.__otherProcesses.append(p)
            self.__handlingOthers += 1
            self.__startWait = None

        termProcesses = []
        for p in self.__otherProcesses:
            if not p.is_alive():
                termProcesses.append(p)
        for p in termProcesses:
            self.__otherProcesses.remove(p)

    def killStallProcess(self):
        command = "find /proc -maxdepth 1 -user wguan -type d -mmin +1 -exec basename {} \; | xargs ps | grep EventStager.py | awk '{ print $1 }' | grep -v " + str(os.getpid()) + "|xargs kill"
        print command
        status, output = commands.getstatusoutput(command)
        print status
        print output

    def run(self):
        logging.info("Start to run")
        self.cleanStagingFiles()
        timeStart = time.time() - 60
        while not self.isFinished():
            try:
                if (time.time() - timeStart) > 60:
                    self.renewEventStagerStatus()
                    self.cleanStagingFiles(20*60)
                    # self.checkMissedStagingFiles()
                    self.getEventRanges()
                    self.checkFailedStagingFiles()
                    self.checkFinishedStagingFiles()
                    if self.__canFinish and len(self.__eventRanges.keys()) == 0:
                        self.__status = 'finished'
                        self.renewEventStagerStatus()
                    if self.__threadpool.is_empty():
                        self.checkLostEvents()
                    timeStart = time.time()
                self.handleOtherFiles()
                time.sleep(30)
                logging.debug("len(eventranges:%s)" % len(self.__eventRanges.keys()))
                #logging.debug("%s" % self.__eventRanges)
                logging.debug("otherProcesses:%s" % len(self.__otherProcesses))
                if len(self.__eventRanges.keys()) == 0 and len(self.__otherProcesses) == 0:
                    self.cleanStagingFiles()
                    if self.__startWait == None:
                        self.__startWait = time.time()
                self.killStallProcess()
                if self.__startWait and (time.time() - self.__startWait) > self.__waitTime:
                    break
            except:
                logging.info(traceback.format_exc())
                #sys.exit(1)
        logging.info("Finished to run")
Ejemplo n.º 6
0
def matchAll(phase, suffix):
    global basics
    global rules
    global matches
    global failures
    global predicates
    global locators
    global noFiles
    global noFilesAffected
    global noUnits
    global noPatternConstraints
    global noPatternConstraintsOk
    global noContentConstraints
    global noContentConstraintsOk
    global noPredicateConstraints
    global noPredicateConstraintsOk
    global noFragments
    if (phase!="basics"):
        basics = tools101.getBasics()
    rules = json.load(open(const101.rulesDump, 'r'))["results"]["rules"]
    matches = list()
    failures = list()
    predicates = set()
    locators = set()
    noFiles = 0
    noUnits = 0
    noFilesAffected = 0
    noPatternConstraints = 0
    noPatternConstraintsOk = 0
    noContentConstraints = 0
    noContentConstraintsOk = 0
    noPredicateConstraints = 0
    noPredicateConstraintsOk = 0
    noFragments = 0

    pool = ThreadPool(4)

    print "Matching 101meta metadata on 101repo (phase \"" + str(phase)+ "\")."
    for root, dirs, files in os.walk(os.path.join(const101.sRoot, "contributions"), followlinks=True):
        if not root.startswith(os.path.join(const101.sRoot, ".git")+os.sep):
            for basename in files:
                noFiles += 1
                if not basename in [".gitignore"]:
                    dirname = root[len(const101.sRoot)+1:]
                    pool.add_task(handleFile, phase, dirname, basename, suffix)
                    #handleFile(phase, dirname, basename, suffix)

    sys.stdout.write('\n')

    pool.wait_completion()

    mr = dict()
    mr["matches"] = matches
    mr["failures"] = failures
    mr["rules"] = rules
    if phase=="predicates":
        mr["predicates"] = list(predicates)
    if phase=="fragments":
        mr["locators"] = list(locators)
    print str(noFiles) + " files examined."
    print str(noFilesAffected) + " files affected."
    print str(len(failures)) + " failures encountered."
    print str(noUnits) + " metadata units attached."
    print str(noContentConstraints) + " content constraints checked."
    print str(noContentConstraintsOk) + " content constraints succeeded."
    print str(noPatternConstraints) + " filename-pattern constraints checked."
    print str(noPatternConstraintsOk) + " filename-pattern constraints succeeded."
    if phase=="predicates":
        print str(noPredicateConstraints) + " predicate constraints checked."
        print str(noPredicateConstraintsOk) + " predicate constraints succeeded."
    if phase=="fragments":
        print str(len(locators)) + " fragment locators exercised."
        print str(noFragments) + " fragment descriptions checked."
    return mr
Ejemplo n.º 7
0
    def runHPCEvent(self):
        tolog("runHPCEvent")
        self.__job.jobState = "running"
        self.__job.setState([self.__job.jobState, 0, 0])
        self.__job.pilotErrorDiag = None
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test")

        defRes = self.getDefaultResources()
        if defRes['copy_input_files'] == 'true':
            self.__copyInputFiles = True
        else:
            self.__copyInputFiles = False

        status, output, hpcJob = self.prepareHPCJob()
        if status == 0:
            tolog("HPC Job: %s " % hpcJob)
        else:
            tolog("failed to create the Tag file")
            self.failJob(0, PilotErrors.ERR_UNKNOWN, self.__job, pilotErrorDiag=output)
            return 


        self.__hpcStatus = None
        self.__hpcLog = None

        logFileName = None
        tolog("runJobHPCEvent.getPilotLogFilename=%s"% self.getPilotLogFilename())
        if self.getPilotLogFilename() != "":
            logFileName = self.getPilotLogFilename()
        hpcManager = HPCManager(globalWorkingDir=self.__job.workdir, logFileName=logFileName, poolFileCatalog=self.__poolFileCatalogTemp, inputFiles=self.__inputFilesGlobal, copyInputFiles=self.__copyInputFiles)

        self.__hpcManager = hpcManager
        self.HPCMode = "HPC_" + hpcManager.getMode(defRes)
        self.__job.setMode(self.HPCMode)
        self.__job.setHpcStatus('waitingResource')
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

        hpcManager.getFreeResources(defRes)
        self.__job.coreCount = hpcManager.getCoreCount()
        self.__job.setHpcStatus('gettingEvents')
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

        numRanges = hpcManager.getEventsNumber()
        tolog("HPC Manager needs events: %s, max_events: %s; use the smallest one." % (numRanges, defRes['max_events']))
        if numRanges > int(defRes['max_events']):
            numRanges = int(defRes['max_events'])
        eventRanges = self.getEventRanges(numRanges=numRanges)
        #tolog("Event Ranges: %s " % eventRanges)
        if len(eventRanges) == 0:
            tolog("Get no Event ranges. return")
            return
        for eventRange in eventRanges:
            self.__eventRanges[eventRange['eventRangeID']] = 'new'

        # setup stage out
        self.setupStageOutHPCEvent()

        hpcManager.initJob(hpcJob)
        hpcManager.initEventRanges(eventRanges)
        
        hpcManager.submit()
        threadpool = ThreadPool(defRes['stageout_threads'])

        old_state = None
        time_start = time.time()
        while not hpcManager.isFinished():
            state = hpcManager.poll()
            self.__job.setHpcStatus(state)
            if old_state is None or old_state != state or time.time() > (time_start + 60*10):
                old_state = state
                time_start = time.time()
                tolog("HPCManager Job stat: %s" % state)
                self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test")
                rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
                self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

            if state and state == 'Complete':
                break
            outputs = hpcManager.getOutputs()
            for output in outputs:
                #self.stageOutHPCEvent(output)
                threadpool.add_task(self.stageOutHPCEvent, output)

            time.sleep(30)
            self.updateHPCEventRanges()

        tolog("HPCManager Job Finished")
        self.__job.setHpcStatus('stagingOut')
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

        outputs = hpcManager.getOutputs()
        for output in outputs:
            #self.stageOutHPCEvent(output)
            threadpool.add_task(self.stageOutHPCEvent, output)

        self.updateHPCEventRanges()
        threadpool.wait_completion()
        self.updateHPCEventRanges()


        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 1")
            half_stageout_threads = defRes['stageout_threads'] / 2
            if half_stageout_threads < 1:
                half_stageout_threads = 1
            threadpool = ThreadPool(half_stageout_threads)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 2")
            threadpool = ThreadPool(1)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        self.__job.setHpcStatus('finished')
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)
        self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog()
        tolog("HPC job log status: %s, job log error: %s" % (self.__hpcStatus, self.__hpcLog))
Ejemplo n.º 8
0
def main(argv):
	#Parse commandline arguments
	#http://www.tutorialspoint.com/python/python_command_line_arguments.htm
	update_existing = False
	try:
		opts, args = getopt.getopt(argv,"h:u:")
	except getopt.GetoptError:
		print "run_processing.py -u <True/False>"
		sys.exit(2)
	for opt, arg in opts:
		if opt == "-h":
			print "run_processing.py -u <True/False>"
		elif opt == "-u":
			if arg == "True":
				update_existing = True

	projects = [d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS) if os.path.isdir(os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d))]

	if update_existing:
		print "Running in-place update."
		manager = None
	else:
		manager = consumerDatabaseThreadManager("knowledge")

	pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS)

	tip_hashes = {}

	all_files = []
	last_line = []

	#iterate through all projects
	for project in projects:
		if project in devknowledge.settings.PROJECT_FOLDERS:
			all_files, last_lines = Util.returnFilesLastLine(project)
			if update_existing:
				#there were issues with threads duplicating authors/files so we just mass add them at the beginning
				print "Starting initial processing of authors/files."
				Git.createFilesAuthorsInIndex(project, all_files)
				print "Finished creating authors/files."
			number_to_process = len(all_files)
			print "Number of files to process: ", number_to_process
			processed_files = 0
			for i in range(0, len(all_files)):
				file = all_files[i]
				last_line = last_lines[i]
				print "Project: ", project, " ", file, " number lines: ", last_line
				project_type = Util.returnProjectType(devknowledge.settings.VERSION_CONTROL_REPOS+project)
				if project_type == "git":
					#run Git analysis
					if project not in tip_hashes:
						#create new head hash cache
						tip_hashes[project] = Util.returnHeadHash(devknowledge.settings.VERSION_CONTROL_REPOS+project)

					if update_existing:
						Git.storeKnowledge(manager, project, file, 1, last_line, update_existing, tip_hashes[project])
					else:
						pool.add_task(Git.storeKnowledge, manager, project, file, 1, last_line, update_existing, tip_hashes[project])
				elif project_type == "hg":
					#run Mercurial analysis
					if project not in tip_hashes:
						#create new tip hash cache
						tip_hashes[project] = Mercurial.returnTipHash(devknowledge.settings.VERSION_CONTROL_REPOS+project)

					pool.add_task(Mercurial.storeKnowledge, manager, project, file, 1, last_line, tip_hashes[project])

				processed_files += 1
				print "Percent done: %.2f %%" % float(float(processed_files)/float(number_to_process) * 100)
				Util.checkDatabaseFolderSize()
			print "Finishing up writing data to database."
			pool.wait_completion()
			if update_existing:
				#prune database of stale file and author nodes
				print "Starting prune of database."
				if project_type == "git":
					Git.pruneDatabaseStaleFiles(all_files, project)
					Git.pruneDatabaseStaleAuthors()
					Git.pruneAllOtherNodes()
				elif project_type == "hg":
					print "Not yet implemented."

	if manager:
		manager.markForFinish()

	print "Done.  Exiting."
Ejemplo n.º 9
0
def compare_site_thread(old_url,
                        new_url,
                        progress_var=None,
                        step=100.0,
                        thread_pool_csv=None):
    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    # checking multiple sites mode
    if thread_pool_csv:
        thread_pool = thread_pool_csv
    else:
        thread_pool = ThreadPool(settings["THREADPOOL_SIZE"])
    create_path()
    ind = 0

    old_url = old_url.strip()
    new_url = new_url.strip()

    # remove the "/" at the end of the url
    if old_url[-1] == '/':
        old_url = old_url[:-1]
    if new_url[-1] == '/':
        new_url = new_url[:-1]

    # add "http://" before url
    if not old_url.startswith("http"):
        old_url = "http://" + old_url
    if not new_url.startswith("http"):
        new_url = "http://" + new_url

    # print out the information for old and new sites
    entry_print("-----------------------------------------------------", True)
    entry_print("Old URL: " + old_url, True)
    entry_print("New URL: " + new_url, True)
    entry_print("-----------------------------------------------------", True)

    setup_step = step * 0.01
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # check if the new site needs login
    new_test = get_soup(new_url)
    if new_test:
        title = new_test.find("title")
        if title and title.get_text().strip() == "Login":
            entry_print(
                "New site needs login. Please use login mode to check this site!\n",
                True)
            return -1

    setup_step = step * 0.01
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # get the subpages of old and new sites
    try:
        sites = get_sites(old_url)
    except AttributeError:
        entry_print(
            "Can't find the site map from " + old_url +
            ". Please check if the url is valid!", True)
        thread_pool.destroy()
        return
    old_blog = get_blog_site(old_url)
    new_blog = get_blog_site(new_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        thread_pool.destroy()
        return

    blog_exists = False
    if old_blog and new_blog:
        blog_exists = True

    # if urls for subpages are not found
    if sites is None:
        record_error(new_url, "sites")
        if progress_var:
            progress_var.set(progress_var.get() + step)
        return False

    # if blog page is not found
    if old_blog is not None and new_blog is None:
        record_error(new_url, "blog")
    elif old_blog is None and new_blog is not None:
        record_error(old_url, "blog")

    setup_step = step * 0.02
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # print out site information
    entry_print("Site Information: ", True)

    # calculate the step for each page
    step *= 0.96
    if blog_exists:
        page_step = step / 2 / (len(sites) + 1)
        entry_print("Old Blog: " + old_blog, True)
        entry_print("New Blog: " + new_blog, True)
    else:
        page_step = step / (len(sites) + 1)

    entry_print("Number of non-blog pages: " + str(len(sites)), True)

    # check the homepage
    thread_pool.add_task(compare_homepage,
                         old_url=old_url,
                         new_url=new_url,
                         progress_var=progress_var,
                         step=page_step)

    # check all the sites in sitemap
    for site in sites:
        ind += 1
        if site.startswith("/home") or site.startswith("/main"):
            continue

        old_link = old_url + site
        new_link = new_url + site

        thread_pool.add_task(compare_page,
                             old_url=old_link,
                             new_url=new_link,
                             progress_var=progress_var,
                             step=page_step)

    # check all the blog pages
    if blog_exists:
        old_blog_soup = get_soup(old_blog)
        new_blog_soup = get_soup(new_blog)
        compare_blog(old_blog_soup,
                     new_blog_soup,
                     old_blog,
                     new_blog,
                     progress_var=progress_var,
                     step=step / 2)

    # single site mode
    if not thread_pool_csv:
        thread_pool.wait_completion()
        thread_pool.destroy()

    entry_print("-----------------------------------------------------\n")

    return True
Ejemplo n.º 10
0
def main(argv):
    #Parse commandline arguments
    #http://www.tutorialspoint.com/python/python_command_line_arguments.htm
    update_existing = False
    try:
        opts, args = getopt.getopt(argv, "h:u:")
    except getopt.GetoptError:
        print "run_dependencies.py -u <True/False>"
        sys.exit(2)
    for opt, arg in opts:
        if opt == "-h":
            print "run_dependencies.py -u <True/False>"
        elif opt == "-u":
            if arg == "True":
                update_existing = True

    projects = [
        d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS)
        if os.path.isdir(
            os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d))
    ]

    if update_existing:
        print "Running in-place update."
        manager = None
    else:
        manager = consumerDatabaseThreadManager("dependencies")

    pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS)

    #iterate through all projects
    for project in projects:
        if project in devknowledge.settings.PROJECT_FOLDERS:
            all_files, last_lines = Util.returnFilesLastLine(project)
            c_files = Util.returnCandCPlusPlusFiles(all_files, project)
            number_to_process = len(c_files)
            print "Number of files to process: ", number_to_process
            processed_files = 0
            for file in c_files:
                last_line = last_lines[processed_files]
                #print "Project: ", project, " ", file, " number lines: ", last_line

                if update_existing:
                    FileDependencies.parseFileDependencies(
                        manager, project, file, c_files, update_existing)
                else:
                    pool.add_task(FileDependencies.parseFileDependencies,
                                  manager, project, file, c_files,
                                  update_existing)

                processed_files += 1

                print "Percent done: %.2f %%" % float(
                    float(processed_files) / float(number_to_process) * 100)
                Util.checkDatabaseFolderSize()
            print "Finishing up writing data to database."
            pool.wait_completion()

    if not update_existing:
        manager.markForFinish()
    print "Done.  Exiting."
Ejemplo n.º 11
0
    def runHPCEvent(self):
        tolog("runHPCEvent")
        self.__job.jobState = "running"
        self.__job.setState([self.__job.jobState, 0, 0])
        self.__job.pilotErrorDiag = None
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updateJobStateTest(self.__job,
                                     self.__jobSite,
                                     self.__node,
                                     mode="test")

        defRes = self.getDefaultResources()
        if defRes['copy_input_files'] == 'true':
            self.__copyInputFiles = True
        else:
            self.__copyInputFiles = False

        status, output, hpcJob = self.prepareHPCJob()
        if status == 0:
            tolog("HPC Job: %s " % hpcJob)
        else:
            tolog("failed to create the Tag file")
            self.failJob(0,
                         PilotErrors.ERR_UNKNOWN,
                         self.__job,
                         pilotErrorDiag=output)
            return

        self.__hpcStatus = None
        self.__hpcLog = None

        logFileName = None
        tolog("runJobHPCEvent.getPilotLogFilename=%s" %
              self.getPilotLogFilename())
        if self.getPilotLogFilename() != "":
            logFileName = self.getPilotLogFilename()
        hpcManager = HPCManager(globalWorkingDir=self.__job.workdir,
                                logFileName=logFileName,
                                poolFileCatalog=self.__poolFileCatalogTemp,
                                inputFiles=self.__inputFilesGlobal,
                                copyInputFiles=self.__copyInputFiles)

        self.__hpcManager = hpcManager
        self.HPCMode = "HPC_" + hpcManager.getMode(defRes)
        self.__job.setMode(self.HPCMode)
        self.__job.setHpcStatus('waitingResource')
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)

        hpcManager.getFreeResources(defRes)
        self.__job.coreCount = hpcManager.getCoreCount()
        self.__job.setHpcStatus('gettingEvents')
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)

        numRanges = hpcManager.getEventsNumber()
        tolog(
            "HPC Manager needs events: %s, max_events: %s; use the smallest one."
            % (numRanges, defRes['max_events']))
        if numRanges > int(defRes['max_events']):
            numRanges = int(defRes['max_events'])
        eventRanges = self.getEventRanges(numRanges=numRanges)
        #tolog("Event Ranges: %s " % eventRanges)
        if len(eventRanges) == 0:
            tolog("Get no Event ranges. return")
            return
        for eventRange in eventRanges:
            self.__eventRanges[eventRange['eventRangeID']] = 'new'

        # setup stage out
        self.setupStageOutHPCEvent()

        hpcManager.initJob(hpcJob)
        hpcManager.initEventRanges(eventRanges)

        hpcManager.submit()
        threadpool = ThreadPool(defRes['stageout_threads'])

        old_state = None
        time_start = time.time()
        while not hpcManager.isFinished():
            state = hpcManager.poll()
            self.__job.setHpcStatus(state)
            if old_state is None or old_state != state or time.time() > (
                    time_start + 60 * 10):
                old_state = state
                time_start = time.time()
                tolog("HPCManager Job stat: %s" % state)
                self.__JR.updateJobStateTest(self.__job,
                                             self.__jobSite,
                                             self.__node,
                                             mode="test")
                rt = RunJobUtilities.updatePilotServer(self.__job,
                                                       self.getPilotServer(),
                                                       self.getPilotPort())
                self.__JR.updatePandaServer(self.__job, self.__jobSite,
                                            self.__node, 25443)

            if state and state == 'Complete':
                break
            outputs = hpcManager.getOutputs()
            for output in outputs:
                #self.stageOutHPCEvent(output)
                threadpool.add_task(self.stageOutHPCEvent, output)

            time.sleep(30)
            self.updateHPCEventRanges()

        tolog("HPCManager Job Finished")
        self.__job.setHpcStatus('stagingOut')
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)

        outputs = hpcManager.getOutputs()
        for output in outputs:
            #self.stageOutHPCEvent(output)
            threadpool.add_task(self.stageOutHPCEvent, output)

        self.updateHPCEventRanges()
        threadpool.wait_completion()
        self.updateHPCEventRanges()

        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 1")
            half_stageout_threads = defRes['stageout_threads'] / 2
            if half_stageout_threads < 1:
                half_stageout_threads = 1
            threadpool = ThreadPool(half_stageout_threads)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 2")
            threadpool = ThreadPool(1)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        self.__job.setHpcStatus('finished')
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)
        self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog()
        tolog("HPC job log status: %s, job log error: %s" %
              (self.__hpcStatus, self.__hpcLog))
Ejemplo n.º 12
0
class DroidStager(threading.Thread):
    def __init__(self,
                 globalWorkingDir,
                 localWorkingDir,
                 outputs=None,
                 job=None,
                 esJobManager=None,
                 outputDir=None,
                 rank=None,
                 logger=None):
        threading.Thread.__init__(self)
        self.__globalWorkingDir = globalWorkingDir
        self.__localWorkingDir = localWorkingDir
        self.__currentDir = None
        self.__rank = rank
        if logger and False:
            self.__tmpLog = logger
        else:
            curdir = _abspath(self.__localWorkingDir)
            wkdirname = "rank_%s" % str(self.__rank)
            wkdir = _abspath(_join(curdir, wkdirname))
            self.__tmpLog = Logger.Logger(
                filename=os.path.join(wkdir, 'Droid.log'))
        self.__job = job
        self.__esJobManager = esJobManager
        self.__stop = threading.Event()
        self.__isFinished = False
        self.__tmpLog.info("Rank %s: Global working dir: %s" %
                           (self.__rank, self.__globalWorkingDir))
        os.environ['PilotHomeDir'] = os.path.dirname(self.__globalWorkingDir)

        self.__jobId = None
        self.__copyOutputToGlobal = False
        self.__outputDir = outputDir

        self.__hostname = socket.getfqdn()

        self.__outputs = outputs
        self.__threadpool = None
        self.setup(job)

    def setup(self, job):
        try:
            self.__jobId = job.get("JobId", None)
            self.__yodaToOS = job.get('yodaToOS', False)
            self.__yodaToZip = job.get('yodaToZip', False)
            self.__zipFileName = job.get('zipFileName', None)
            self.__zipEventRangesName = job.get('zipEventRangesName', None)
            self.__tmpLog.debug("Rank %s: zip file %s" %
                                (self.__rank, self.__zipFileName))
            self.__tmpLog.debug("Rank %s: zip event range file %s" %
                                (self.__rank, self.__zipEventRangesName))
            if self.__zipFileName is None or self.__zipEventRangesName is None:
                self.__tmpLog.debug(
                    "Rank %s: either zipFileName(%s) is None or zipEventRanagesName(%s) is None, will not use zip output"
                    % (self.__rank, self.__zipFileName,
                       self.__zipEventRangesName))
                self.__yodaToZip = False
            self.__copyOutputToGlobal = job.get('copyOutputToGlobal', False)

            if self.__yodaToOS:
                setup = job.get('setup', None)
                self.__esPath = job.get('esPath', None)
                self.__os_bucket_id = job.get('os_bucket_id', None)
                self.__report = getInitialTracingReport(
                    userid='Yoda',
                    sitename='Yoda',
                    dsname=None,
                    eventType="objectstore",
                    analysisJob=False,
                    jobId=None,
                    jobDefId=None,
                    dn='Yoda')
                self.__siteMover = objectstoreSiteMover(setup,
                                                        useTimerCommand=False)
                self.__cores = int(job.get('ATHENA_PROC_NUMBER', 1))

                self.__tmpLog.debug("Rank %s: start threadpool" %
                                    (self.__rank))
                self.__threadpool = ThreadPool(self.__cores / 8)

        except:
            self.__tmpLog.error("Failed to setup Droid stager: %s" %
                                str(traceback.format_exc()))

    def copyOutput(self, output, outputs):
        if self.__outputDir:
            for filename in outputs:
                #filename = output.split(",")[0]
                base_filename = os.path.basename(filename)
                new_file_name = os.path.join(self.__outputDir, base_filename)
                is_copied = False
                try:
                    os.rename(filename, new_file_name)
                    is_copied = True
                except:
                    self.__tmpLog.debug(
                        "Rank %s: failed to move output %s to %s, %s" %
                        (self.__rank, filename, new_file_name,
                         str(traceback.format_exc())))
                    is_copied = False
                if not is_copied:
                    shutil.copy(filename, new_file_name)
                    os.remove(filename)
                output = output.replace(filename, new_file_name)
            return 0, output
        elif self.__copyOutputToGlobal:
            for filename in outputs:
                #filename = output.split(",")[0]
                base_filename = os.path.basename(filename)
                new_file_name = os.path.join(self.__globalWorkingDir,
                                             base_filename)
                is_copied = False
                try:
                    os.rename(filename, new_file_name)
                    is_copied = True
                except:
                    self.__tmpLog.debug(
                        "Rank %s: failed to move output %s to %s, %s" %
                        (self.__rank, filename, new_file_name,
                         str(traceback.format_exc())))
                    is_copied = False
                if not is_copied:
                    shutil.copy(filename, new_file_name)
                    os.remove(filename)
                output = output.replace(filename, new_file_name)
            return 0, output
        else:
            if self.__localWorkingDir == self.__globalWorkingDir:
                return 0, output

            for filename in outputs:
                #filename = output.split(",")[0]
                new_file_name = filename.replace(self.__localWorkingDir,
                                                 self.__globalWorkingDir)
                dirname = os.path.dirname(new_file_name)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                shutil.copy(filename, new_file_name)
                os.remove(filename)
                output = output.replace(filename, new_file_name)
            return 0, output

    def stageOutToOS(self, outputs):
        ret_status = 0
        ret_outputs = []
        try:
            for filename in outputs:
                ret_status, pilotErrorDiag, surl, size, checksum, arch_type = self.__siteMover.put_data(
                    filename,
                    self.__esPath,
                    lfn=os.path.basename(filename),
                    report=self.__report,
                    token=None,
                    experiment='ATLAS')
                if ret_status == 0:
                    os.remove(filename)
                    ret_outputs.append(surl)
                else:
                    self.__tmpLog.debug("Failed to stageout %s: %s %s" %
                                        (filename, ret_status, pilotErrorDiag))
                    return ret_status, pilotErrorDiag
        except:
            self.__tmpLog.warning(
                "Rank %s: Droid throws exception when staging out: %s" %
                (self.__rank, traceback.format_exc()))
            ret_status = -1
        return ret_status, ret_outputs

    def zipOutputs(self, eventRangeID, eventStatus, outputs):
        try:
            for filename in outputs:
                command = "tar -rf " + self.__zipFileName + " --directory=%s %s" % (
                    os.path.dirname(filename), os.path.basename(filename))
                status, ret = commands.getstatusoutput(command)
                if status:
                    self.__tmpLog.debug("Failed to zip %s: %s, %s" %
                                        (filename, status, ret))
                    return status, ret
                else:
                    os.remove(filename)
        except:
            self.__tmpLog.warning(
                "Rank %s: Droid throws exception when zipping out: %s" %
                (self.__rank, traceback.format_exc()))
            return -1, "Failed to zip outputs"
        else:
            handler = open(self.__zipEventRangesName, "a")
            handler.write("%s %s %s\n" % (eventRangeID, eventStatus, outputs))
            handler.close()
        return 0, outputs

    def stageOut(self, eventRangeID, eventStatus, output, retries=0):
        if eventStatus.startswith("ERR"):
            request = {
                "eventRangeID": eventRangeID,
                'eventStatus': eventStatus,
                "output": output
            }
        else:
            outputs = output.split(",")[:-3]
            if self.__yodaToZip:
                self.__tmpLog.debug("Rank %s: start to zip outputs: %s" %
                                    (self.__rank, outputs))
                retStatus, retOutput = self.zipOutputs(eventRangeID,
                                                       eventStatus, outputs)
                if retStatus != 0:
                    self.__tmpLog.error(
                        "Rank %s: failed to zip outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': eventStatus,
                        "output": output
                    }
                else:
                    self.__tmpLog.info(
                        "Rank %s: finished to zip outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': 'zipped',
                        "output": retOutput
                    }
            elif self.__yodaToOS:
                self.__tmpLog.debug(
                    "Rank %s: start to stage out outputs to objectstore: %s" %
                    (self.__rank, outputs))
                retStatus, retOutput = self.stageOutToOS(outputs)
                if retStatus != 0:
                    self.__tmpLog.error(
                        "Rank %s: failed to stagout outputs %s to objectstore: %s"
                        % (self.__rank, outputs, retOutput))
                    if retries < 1:
                        self.stageOut(eventRangeID,
                                      eventStatus,
                                      output,
                                      retries=retries + 1)
                        request = None
                    else:
                        request = {
                            "jobId": self.__jobId,
                            "eventRangeID": eventRangeID,
                            'eventStatus': eventStatus,
                            "output": output
                        }
                else:
                    self.__tmpLog.info(
                        "Rank %s: finished to stageout outputs %s to objectstore: %s"
                        % (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': 'stagedOut',
                        "output": retOutput,
                        'objstoreID': self.__os_bucket_id
                    }
            else:
                self.__tmpLog.debug("Rank %s: start to copy outputs: %s" %
                                    (self.__rank, outputs))
                retStatus, retOutput = self.copyOutputs(output, outputs)
                if retStatus != 0:
                    self.__tmpLog.error(
                        "Rank %s: failed to copy outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': eventStatus,
                        "output": output
                    }
                else:
                    self.__tmpLog.info(
                        "Rank %s: finished to copy outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': eventStatus,
                        "output": retOutput
                    }
        if request:
            self.__outputs.put(request)

    def stop(self):
        self.__stop.set()

    def isFinished(self):
        return self.__isFinished

    def run(self):
        while True:
            try:
                outputs = self.__esJobManager.getOutputs()
                if outputs:
                    self.__tmpLog.debug("Rank %s: getOutputs: %s" %
                                        (self.__rank, outputs))
                    for outputMsg in outputs:
                        try:
                            eventRangeID, eventStatus, output = outputMsg
                            if self.__threadpool:
                                self.__tmpLog.debug(
                                    "Rank %s: add event output to threadpool: %s"
                                    % (self.__rank, outputMsg))
                                self.__threadpool.add_task(self.stageOut,
                                                           eventRangeID,
                                                           eventStatus,
                                                           output,
                                                           retries=0)
                            else:
                                self.stageOut(eventRangeID,
                                              eventStatus,
                                              output,
                                              retries=0)
                        except:
                            self.__tmpLog.warning(
                                "Rank %s: error message: %s" %
                                (self.__rank, traceback.format_exc()))
                            continue
            except:
                self.__tmpLog.error("Rank %s: Stager Thread failed: %s" %
                                    (self.__rank, traceback.format_exc()))
            if self.__stop.isSet():
                if self.__threadpool:
                    self.__tmpLog.warning(
                        "Rank %s: wait threadpool to finish" % (self.__rank))
                    self.__threadpool.wait_completion()
                    self.__tmpLog.warning("Rank %s: threadpool finished" %
                                          (self.__rank))
                break
            time.sleep(1)
        self.__isFinished = True