def trigger_stage_out(self, jobspec): """Trigger the stage-out procedure for the job. Create a dummy output file to force harvester to wait until aCT job is done :param jobspec: job specifications :type jobspec: JobSpec :return: A tuple of return code (True: success, False: fatal failure, None: temporary failure) and error dialog :rtype: (bool, string) """ fileSpec = FileSpec() fileSpec.PandaID = jobspec.PandaID fileSpec.taskID = jobspec.taskID fileSpec.lfn = 'dummy.{0}'.format(jobspec.PandaID) fileSpec.scope = 'dummy' fileSpec.fileType = 'output' jobspec.add_in_file(fileSpec) return True, ''
def run(self): while True: mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run') mainLog.debug('getting number of jobs to be fetched') # get number of jobs to be fetched nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(harvester_config.jobfetcher.nQueues, harvester_config.jobfetcher.lookupTime) mainLog.debug('got {0} queues'.format(len(nJobsPerQueue))) # loop over all queues for queueName, nJobs in iteritems(nJobsPerQueue): # check queue if not self.queueConfigMapper.has_queue(queueName): continue tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName), method_name='run') # get queue queueConfig = self.queueConfigMapper.get_queue(queueName) # upper limit if nJobs > harvester_config.jobfetcher.maxJobs: nJobs = harvester_config.jobfetcher.maxJobs # get jobs tmpLog.debug('getting {0} jobs'.format(nJobs)) sw = core_utils.get_stopwatch() siteName = queueConfig.siteName jobs, errStr = self.communicator.get_jobs(siteName, self.nodeName, queueConfig.get_source_label(), self.nodeName, nJobs, queueConfig.getJobCriteria) tmpLog.info('got {0} jobs with {1} {2}'.format(len(jobs), errStr, sw.get_elapsed_time())) # convert to JobSpec if len(jobs) > 0: jobSpecs = [] fileStatMap = dict() sw_startconvert = core_utils.get_stopwatch() for job in jobs: timeNow = datetime.datetime.utcnow() jobSpec = JobSpec() jobSpec.convert_job_json(job) jobSpec.computingSite = queueName jobSpec.status = 'starting' jobSpec.subStatus = 'fetched' jobSpec.creationTime = timeNow jobSpec.stateChangeTime = timeNow jobSpec.configID = queueConfig.configID jobSpec.set_one_attribute('schedulerID', 'harvester-{0}'.format(harvester_config.master.harvester_id)) if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None: jobSpec.zipPerMB = queueConfig.zipPerMB for tmpLFN, fileAttrs in iteritems(jobSpec.get_input_file_attributes()): # check file status if tmpLFN not in fileStatMap: fileStatMap[tmpLFN] = self.dbProxy.get_file_status(tmpLFN, 'input', queueConfig.ddmEndpointIn, 'starting') # make file spec fileSpec = FileSpec() fileSpec.PandaID = jobSpec.PandaID fileSpec.taskID = jobSpec.taskID fileSpec.lfn = tmpLFN fileSpec.endpoint = queueConfig.ddmEndpointIn fileSpec.scope = fileAttrs['scope'] # set preparing to skip stage-in if the file is (being) taken care of by another job if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \ or 'to_prepare' in fileStatMap[tmpLFN]: fileSpec.status = 'preparing' else: fileSpec.status = 'to_prepare' if fileSpec.status not in fileStatMap[tmpLFN]: fileStatMap[tmpLFN][fileSpec.status] = 0 fileStatMap[tmpLFN][fileSpec.status] += 1 fileSpec.fileType = 'input' jobSpec.add_in_file(fileSpec) jobSpec.trigger_propagation() jobSpecs.append(jobSpec) # insert to DB tmpLog.debug("Converting of {0} jobs {1}".format(len(jobs),sw_startconvert.get_elapsed_time())) sw_insertdb =core_utils.get_stopwatch() self.dbProxy.insert_jobs(jobSpecs) tmpLog.debug('Insert of {0} jobs {1}'.format(len(jobSpecs), sw_insertdb.get_elapsed_time())) mainLog.debug('done') # check if being terminated if self.terminated(harvester_config.jobfetcher.sleepTime): mainLog.debug('terminated') return
queueConfig = queueConfigMapper.get_queue(queueName) print queueConfig.stager print queueConfig.stager['Globus_srcPath'] print queueConfig.stager['srcEndpoint'] print queueConfig.stager['Globus_dstPath'] print queueConfig.stager['dstEndpoint'] print queueConfig.stager['zipDir'] print "Initial queueConfig.stager = ", queueConfig.stager queueConfig.stager['module'] = 'pandaharvester.harvesterstager.go_stager' queueConfig.stager['name'] = 'GlobusStager' print "Modified queueConfig.stager = ", queueConfig.stager scope = 'panda' fileSpec = FileSpec() fileSpec.fileType = 'es_output' fileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex + '.gz' fileSpec.fileAttributes = {} assFileSpec = FileSpec() assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex assFileSpec.fileType = 'es_output' assFileSpec.fsize = random.randint(10, 100) # create source file hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) assFileSpec.path = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format( endPoint=queueConfig.stager['Globus_srcPath'], scope=correctedscope,
file_prefix = 'panda.sgotest.' def exit_func(): for f in os.listdir('.'): if f.startswith(file_prefix): os.remove(f) atexit.register(exit_func) queueName = sys.argv[1] queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(queueName) fileSpec = FileSpec() fileSpec.fileType = 'output' fileSpec.lfn = file_prefix + uuid.uuid4().hex + '.gz' fileSpec.fileAttributes = {'guid': str(uuid.uuid4())} fileSpec.checksum = '0d439274' assFileSpec = FileSpec() assFileSpec.lfn = file_prefix + uuid.uuid4().hex assFileSpec.fileType = 'es_output' assFileSpec.fsize = random.randint(10, 100) assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn oFile = open(assFileSpec.lfn, 'w') oFile.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize))) oFile.close() fileSpec.add_associated_file(assFileSpec) jobSpec = JobSpec() jobSpec.jobParams = {'outFiles': fileSpec.lfn + ',log',
jobSpec.jobParams = { 'scopeLog': 'panda', 'logFile': 'log', } jobSpec.computingSite = queueName jobSpec.PandaID = job_id jobSpec.modificationTime = datetime.datetime.now() realDataset = 'panda.sgotest.' + uuid.uuid4().hex ddmEndPointOut = 'BNL-OSG2_DATADISK' outFiles_scope_str = '' outFiles_str = '' realDatasets_str = '' ddmEndPointOut_str = '' # create up 5 files for output for index in range(random.randint(1, 5)): fileSpec = FileSpec() assFileSpec = FileSpec() fileSpec.fileType = 'es_output' assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex fileSpec.lfn = assFileSpec.lfn + '.gz' fileSpec.scope = 'panda' outFiles_scope_str += 'panda,' outFiles_str += fileSpec.lfn + ',' realDatasets_str += realDataset + "," ddmEndPointOut_str += ddmEndPointOut + "," assFileSpec.fileType = 'es_output' assFileSpec.fsize = random.randint(10, 100) # create source file hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest()
def run(self): while True: mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run') mainLog.debug('getting number of jobs to be fetched') # get number of jobs to be fetched nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch( harvester_config.jobfetcher.nQueues, harvester_config.jobfetcher.lookupTime) mainLog.debug('got {0} queues'.format(len(nJobsPerQueue))) # loop over all queues for queueName, nJobs in iteritems(nJobsPerQueue): # check queue if not self.queueConfigMapper.has_queue(queueName): continue tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName), method_name='run') # get queue queueConfig = self.queueConfigMapper.get_queue(queueName) # upper limit if nJobs > harvester_config.jobfetcher.maxJobs: nJobs = harvester_config.jobfetcher.maxJobs # get jobs default_prodSourceLabel = queueConfig.get_source_label() pdpm = getattr(queueConfig, 'prodSourceLabelRandomWeightsPermille', {}) choice_list = core_utils.make_choice_list( pdpm=pdpm, default=default_prodSourceLabel) prodSourceLabel = random.choice(choice_list) tmpLog.debug('getting {0} jobs for prodSourceLabel {1}'.format( nJobs, prodSourceLabel)) sw = core_utils.get_stopwatch() siteName = queueConfig.siteName jobs, errStr = self.communicator.get_jobs( siteName, self.nodeName, prodSourceLabel, self.nodeName, nJobs, queueConfig.getJobCriteria) tmpLog.info('got {0} jobs with {1} {2}'.format( len(jobs), errStr, sw.get_elapsed_time())) # convert to JobSpec if len(jobs) > 0: # get extractor plugin if hasattr(queueConfig, 'extractor'): extractorCore = self.pluginFactory.get_plugin( queueConfig.extractor) else: extractorCore = None jobSpecs = [] fileStatMap = dict() sw_startconvert = core_utils.get_stopwatch() for job in jobs: timeNow = datetime.datetime.utcnow() jobSpec = JobSpec() jobSpec.convert_job_json(job) jobSpec.computingSite = queueName jobSpec.status = 'starting' jobSpec.subStatus = 'fetched' jobSpec.creationTime = timeNow jobSpec.stateChangeTime = timeNow jobSpec.configID = queueConfig.configID jobSpec.set_one_attribute( 'schedulerID', 'harvester-{0}'.format( harvester_config.master.harvester_id)) if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None: jobSpec.zipPerMB = queueConfig.zipPerMB fileGroupDictList = [ jobSpec.get_input_file_attributes() ] if extractorCore is not None: fileGroupDictList.append( extractorCore.get_aux_inputs(jobSpec)) for fileGroupDict in fileGroupDictList: for tmpLFN, fileAttrs in iteritems(fileGroupDict): # check file status if tmpLFN not in fileStatMap: fileStatMap[ tmpLFN] = self.dbProxy.get_file_status( tmpLFN, 'input', queueConfig.ddmEndpointIn, 'starting') # make file spec fileSpec = FileSpec() fileSpec.PandaID = jobSpec.PandaID fileSpec.taskID = jobSpec.taskID fileSpec.lfn = tmpLFN fileSpec.endpoint = queueConfig.ddmEndpointIn fileSpec.scope = fileAttrs['scope'] # set preparing to skip stage-in if the file is (being) taken care of by another job if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \ or 'to_prepare' in fileStatMap[tmpLFN]: fileSpec.status = 'preparing' else: fileSpec.status = 'to_prepare' if fileSpec.status not in fileStatMap[tmpLFN]: fileStatMap[tmpLFN][fileSpec.status] = 0 fileStatMap[tmpLFN][fileSpec.status] += 1 if 'INTERNAL_FileType' in fileAttrs: fileSpec.fileType = fileAttrs[ 'INTERNAL_FileType'] jobSpec.auxInput = JobSpec.AUX_hasAuxInput else: fileSpec.fileType = 'input' if 'INTERNAL_URL' in fileAttrs: fileSpec.url = fileAttrs['INTERNAL_URL'] jobSpec.add_in_file(fileSpec) jobSpec.trigger_propagation() jobSpecs.append(jobSpec) # insert to DB tmpLog.debug("Converting of {0} jobs {1}".format( len(jobs), sw_startconvert.get_elapsed_time())) sw_insertdb = core_utils.get_stopwatch() self.dbProxy.insert_jobs(jobSpecs) tmpLog.debug('Insert of {0} jobs {1}'.format( len(jobSpecs), sw_insertdb.get_elapsed_time())) mainLog.debug('done') # check if being terminated if self.terminated(harvester_config.jobfetcher.sleepTime): mainLog.debug('terminated') return
jobSpec.modificationTime = datetime.datetime.now() realDataset = 'panda.sgotest.' + uuid.uuid4().hex ddmEndPointIn = 'BNL-OSG2_DATADISK' inFiles_scope_str = '' inFiles_str = '' realDatasets_str = '' realDatasetsIn_str = '' ddmEndPointIn_str = '' GUID_str = '' fsize_str = '' checksum_str = '' scope_in_str = '' # create up 5 files for input for index in range(random.randint(1, 5)): fileSpec = FileSpec() assFileSpec = FileSpec() fileSpec.fileType = 'input' assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex fileSpec.lfn = assFileSpec.lfn fileSpec.scope = 'panda' inFiles_scope_str += 'panda,' inFiles_str += fileSpec.lfn + ',' realDatasets_str += realDataset + "," realDatasetsIn_str += realDataset + "," ddmEndPointIn_str += ddmEndPointIn + "," # some dummy inputs GUID_str += 'd82e8e5e301b77489fd4da04bcdd6565,' fsize_str += '3084569129,' checksum_str += 'ad:9f60d29f,' scope_in_str += 'panda,'
file_prefix = 'panda.sgotest.' def exit_func(): for f in os.listdir('.'): if f.startswith(file_prefix): os.remove(f) atexit.register(exit_func) queueName = sys.argv[1] queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(queueName) fileSpec = FileSpec() fileSpec.fileType = 'output' fileSpec.lfn = file_prefix + uuid.uuid4().hex + '.gz' fileSpec.fileAttributes = {'guid': str(uuid.uuid4())} fileSpec.checksum = '0d439274' assFileSpec = FileSpec() assFileSpec.lfn = file_prefix + uuid.uuid4().hex assFileSpec.fileType = 'es_output' assFileSpec.fsize = random.randint(10, 100) assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn oFile = open(assFileSpec.lfn, 'w') oFile.write(''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize))) oFile.close() fileSpec.add_associated_file(assFileSpec)
def run(self): while True: mainLog = self.make_logger(_logger, 'id={0}'.format(self.ident), method_name='run') mainLog.debug('getting number of jobs to be fetched') # get number of jobs to be fetched nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch( harvester_config.jobfetcher.nQueues, harvester_config.jobfetcher.lookupTime) mainLog.debug('got {0} queues'.format(len(nJobsPerQueue))) # loop over all queues for queueName, nJobs in iteritems(nJobsPerQueue): # check queue if not self.queueConfigMapper.has_queue(queueName): continue tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName), method_name='run') # get queue queueConfig = self.queueConfigMapper.get_queue(queueName) # upper limit if nJobs > harvester_config.jobfetcher.maxJobs: nJobs = harvester_config.jobfetcher.maxJobs # get jobs tmpLog.debug('getting {0} jobs'.format(nJobs)) siteName = queueConfig.siteName jobs, errStr = self.communicator.get_jobs( siteName, self.nodeName, queueConfig.get_source_label(), self.nodeName, nJobs, queueConfig.getJobCriteria) tmpLog.info('got {0} jobs with {1}'.format(len(jobs), errStr)) # convert to JobSpec if len(jobs) > 0: jobSpecs = [] fileStatMap = dict() for job in jobs: timeNow = datetime.datetime.utcnow() jobSpec = JobSpec() jobSpec.convert_job_json(job) jobSpec.computingSite = queueName jobSpec.status = 'starting' jobSpec.subStatus = 'fetched' jobSpec.creationTime = timeNow jobSpec.stateChangeTime = timeNow jobSpec.configID = queueConfig.configID jobSpec.set_one_attribute( 'schedulerID', 'harvester-{0}'.format( harvester_config.master.harvester_id)) if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None: jobSpec.zipPerMB = queueConfig.zipPerMB for tmpLFN, fileAttrs in iteritems( jobSpec.get_input_file_attributes()): # check file status if tmpLFN not in fileStatMap: fileStatMap[ tmpLFN] = self.dbProxy.get_file_status( tmpLFN, 'input', queueConfig.ddmEndpointIn, 'starting') # make file spec fileSpec = FileSpec() fileSpec.PandaID = jobSpec.PandaID fileSpec.taskID = jobSpec.taskID fileSpec.lfn = tmpLFN fileSpec.endpoint = queueConfig.ddmEndpointIn fileSpec.scope = fileAttrs['scope'] # set preparing to skip stage-in if the file is (being) taken care of by another job if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \ or 'to_prepare' in fileStatMap[tmpLFN]: fileSpec.status = 'preparing' else: fileSpec.status = 'to_prepare' if fileSpec.status not in fileStatMap[tmpLFN]: fileStatMap[tmpLFN][fileSpec.status] = 0 fileStatMap[tmpLFN][fileSpec.status] += 1 fileSpec.fileType = 'input' jobSpec.add_in_file(fileSpec) jobSpec.trigger_propagation() jobSpecs.append(jobSpec) # insert to DB self.dbProxy.insert_jobs(jobSpecs) mainLog.debug('done') # check if being terminated if self.terminated(harvester_config.jobfetcher.sleepTime): mainLog.debug('terminated') return
import sys import uuid from pandaharvester.harvestercore.job_spec import JobSpec from pandaharvester.harvestercore.file_spec import FileSpec from pandaharvester.harvestercore.event_spec import EventSpec from pandaharvester.harvestercore.communicator_pool import CommunicatorPool rID = sys.argv[1] taskid = rID.split('-')[0] pandaid = long(rID.split('-')[1]) job = JobSpec() job.PandaID = pandaid event = EventSpec() file = FileSpec() file.status = 'finished' file.objstoreID = 9575 file.pathConvention = 1000 file.lfn = str(uuid.uuid4().hex) + '.zip' file.fsize = 555 file.chksum = '0d2a9dc9' event.eventRangeID = rID event.eventStatus = 'finished' job.zipEventMap = {1: {'events':[event], 'zip':file}} a = CommunicatorPool() a.update_jobs([job])