Ejemplo n.º 1
0
import time
import os
from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
from pandaharvester.harvestercore.job_spec import JobSpec

queueName = sys.argv[1]

queueConfigMapper = QueueConfigMapper()

queueConfig = queueConfigMapper.get_queue(queueName)

jobSpec = JobSpec()
jobSpec.jobParams = {'inFiles': 'DAOD_STDM4.09596175._000008.pool.root.1',
                     'scopeIn': 'mc15_13TeV',
                     'fsize': '658906675',
                     'GUID': '7e3776f9bb0af341b03e59d3de895a13',
                     'checksum': 'ad:3734bdd9',
                     'ddmEndPointIn': 'BNL-OSG2_DATADISK',
                     'realDatasetsIn': 'mc15_13TeV.363638.MGPy8EG_N30NLO_Wmunu_Ht500_700_BFilter.merge.DAOD_STDM4.e4944_s2726_r7772_r7676_p2842_tid09596175_00',
                     }
jobSpec.computingSite = queueName

from pandaharvester.harvestercore.plugin_factory import PluginFactory

pluginFactory = PluginFactory()

# get plugin
preparatorCore = pluginFactory.get_plugin(queueConfig.preparator)
print ("plugin={0}".format(preparatorCore.__class__.__name__))

print ("testing stagein:")
print ("BasePath from preparator configuration: %s " % preparatorCore.basePath)
Ejemplo n.º 2
0
queueName = sys.argv[1]

from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper

queueConfigMapper = QueueConfigMapper()

queueConfig = queueConfigMapper.get_queue(queueName)

from pandaharvester.harvestercore.job_spec import JobSpec

jobSpec = JobSpec()
jobSpec.computingSite = sys.argv[1]
jobSpec.jobParams = {'inFiles': 'EVNT.06820166._000001.pool.root.1',
                     'scopeIn': 'mc15_13TeV',
                     'fsize': '196196765',
                     'GUID': 'B7F387CD-1F97-1C47-88BD-D8785442C49D',
                     'checksum': 'ad:326e445d',
                     'ddmEndPointIn': 'MWT2_DATADISK',
                     'realDatasetsIn': 'mc15_13TeV:mc15_13TeV.301042.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_250M400.evgen.EVNT.e3649_tid06820166_00',
                     }

from pandaharvester.harvestercore.plugin_factory import PluginFactory

pluginFactory = PluginFactory()

# get plugin
preparatorCore = pluginFactory.get_plugin(queueConfig.preparator)
print ("plugin={0}".format(preparatorCore.__class__.__name__))

print ("testing preparation")
tmpStat, tmpOut = preparatorCore.trigger_preparation(jobSpec)
if tmpStat:
Ejemplo n.º 3
0
    def make_worker(self, jobspec_list, queue_config, resource_type):
        tmpLog = self.make_logger(_logger,
                                  'queue={0}'.format(queue_config.queueName),
                                  method_name='make_worker')

        tmpLog.debug('jobspec_list: {0}'.format(jobspec_list))

        workSpec = WorkSpec()
        workSpec.creationTime = datetime.datetime.utcnow()

        # get the queue configuration from the DB
        panda_queues_dict = PandaQueuesDict()
        queue_dict = panda_queues_dict.get(queue_config.queueName, {})

        unified_queue = queue_dict.get('capability', '') == 'ucore'
        # case of traditional (non-unified) queue: look at the queue configuration
        if not unified_queue:
            workSpec.nCore = queue_dict.get('corecount', 1) or 1
            workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1

        # case of unified queue: look at the resource type and queue configuration
        else:
            catchall = queue_dict.get('catchall', '')
            if 'useMaxRam' in catchall or queue_config.queueName in (
                    'Taiwan-LCG2-HPC2_Unified', 'Taiwan-LCG2-HPC_Unified',
                    'DESY-ZN_UCORE'):
                # temporary hack to debug killed workers in Taiwan queues
                site_corecount = queue_dict.get('corecount', 1) or 1
                site_maxrss = queue_dict.get('maxrss', 1) or 1

                # some cases need to overwrite those values
                if 'SCORE' in resource_type:
                    # the usual pilot streaming use case
                    workSpec.nCore = 1
                    workSpec.minRamCount = int(
                        math.ceil(site_maxrss / site_corecount))
                else:
                    # default values
                    workSpec.nCore = site_corecount
                    workSpec.minRamCount = site_maxrss
            else:
                workSpec.nCore, workSpec.minRamCount = self.rt_mapper.calculate_worker_requirements(
                    resource_type, queue_dict)

        # parameters that are independent on traditional vs unified
        workSpec.maxWalltime = queue_dict.get('maxtime', 1)
        workSpec.maxDiskCount = queue_dict.get('maxwdir', 1)
        walltimeLimit_default = getattr(queue_config, 'walltimeLimit', 0)

        if len(jobspec_list) > 0:
            # get info from jobs
            nCore = 0
            minRamCount = 0
            maxDiskCount = 0
            maxWalltime = 0
            ioIntensity = 0
            for jobSpec in jobspec_list:
                job_corecount, job_memory = self.get_job_core_and_memory(
                    queue_dict, jobSpec)
                nCore += job_corecount
                minRamCount += job_memory
                try:
                    maxDiskCount += jobSpec.jobParams['maxDiskCount']
                except Exception:
                    pass
                try:
                    ioIntensity += jobSpec.jobParams['ioIntensity']
                except Exception:
                    pass
            try:
                # maxWallTime from AGIS or qconf, not trusting job currently
                maxWalltime = queue_dict.get('maxtime', walltimeLimit_default)
            except Exception:
                pass

            if (nCore > 0 and 'nCore' in self.jobAttributesToUse) \
               or unified_queue:
                workSpec.nCore = nCore
            if (minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse) \
               or unified_queue:
                workSpec.minRamCount = minRamCount
            if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse:
                workSpec.maxDiskCount = maxDiskCount
            if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse:
                workSpec.maxWalltime = maxWalltime
            if ioIntensity > 0 and 'ioIntensity' in self.jobAttributesToUse:
                workSpec.ioIntensity = ioIntensity
            workSpec.pilotType = jobspec_list[0].get_pilot_type()
        else:
            # when no job
            # randomize pilot type with weighting
            pdpm = getattr(queue_config,
                           'prodSourceLabelRandomWeightsPermille', {})
            choice_list = core_utils.make_choice_list(pdpm=pdpm,
                                                      default='managed')
            tmp_prodsourcelabel = random.choice(choice_list)
            fake_job = JobSpec()
            fake_job.jobParams = {}
            fake_job.jobParams['prodSourceLabel'] = tmp_prodsourcelabel
            workSpec.pilotType = fake_job.get_pilot_type()
            del fake_job
            if workSpec.pilotType in ['RC', 'ALRB', 'PT']:
                tmpLog.info('a worker has pilotType={0}'.format(
                    workSpec.pilotType))
        # TODO: this needs to be improved with real resource types
        if resource_type and resource_type != 'ANY':
            workSpec.resourceType = resource_type
        elif workSpec.nCore == 1:
            workSpec.resourceType = 'SCORE'
        else:
            workSpec.resourceType = 'MCORE'

        return workSpec
Ejemplo n.º 4
0
    lfn=assFileSpec.lfn)
if not os.path.exists(os.path.dirname(assFileSpec.path)):
    print "os.makedirs({})".format(os.path.dirname(assFileSpec.path))
    os.makedirs(os.path.dirname(assFileSpec.path))
oFile = open(assFileSpec.path, 'w')
oFile.write(''.join(
    random.choice(string.ascii_uppercase + string.digits)
    for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)

jobSpec = JobSpec()
jobSpec.jobParams = {
    'outFiles': fileSpec.lfn + ',log',
    'scopeOut': 'panda',
    'scopeLog': 'panda',
    'logFile': 'log',
    'realDatasets': 'panda.' + fileSpec.lfn,
    'ddmEndPointOut': 'BNL-OSG2_DATADISK',
}
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.add_out_file(fileSpec)

print "file to transfer - {}".format(assFileSpec.path)
print "dump(jobSpec)"
#dump(jobSpec)

pluginFactory = PluginFactory()

# get stage-out plugin
stagerCore = pluginFactory.get_plugin(queueConfig.stager)
Ejemplo n.º 5
0
fileSpec.fileAttributes = {'guid': str(uuid.uuid4())}
fileSpec.checksum = '0d439274'
assFileSpec = FileSpec()
assFileSpec.lfn = file_prefix + uuid.uuid4().hex
assFileSpec.fileType = 'es_output'
assFileSpec.fsize = random.randint(10, 100)
assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn
oFile = open(assFileSpec.lfn, 'w')
oFile.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)
jobSpec = JobSpec()
jobSpec.jobParams = {'outFiles': fileSpec.lfn + ',log',
                     'scopeOut': 'panda',
                     'scopeLog': 'panda',
                     'logFile': 'log',
                     'realDatasets': 'panda.' + fileSpec.lfn,
                     'ddmEndPointOut': 'BNL-OSG2_DATADISK',
                     }
jobSpec.add_out_file(fileSpec)

pluginFactory = PluginFactory()

# get stage-out plugin
stagerCore = pluginFactory.get_plugin(queueConfig.stager)
print ("plugin={0}".format(stagerCore.__class__.__name__))

print ("testing zip")
tmpStat, tmpOut = stagerCore.zip_output(jobSpec)
if tmpStat:
    print (" OK")
locked = stagerCore.dbInterface.get_object_lock('dummy_id_for_out_0',lock_interval=120)
if not locked:
   tmpLog.debug('DB Already locked by another thread')
# now unlock db
unlocked = stagerCore.dbInterface.release_object_lock('dummy_id_for_out_0')
if unlocked :
   tmpLog.debug('unlocked db')
else:
   tmpLog.debug(' Could not unlock db')

# loop over the job id's creating various JobSpecs
jobSpec_list = []
for job_id in range(begin_job_id,end_job_id+1):
   jobSpec = JobSpec()
   jobSpec.jobParams = {
                        'scopeLog': 'panda',
                        'logFile': 'log',
                        }
   jobSpec.computingSite = queueName
   jobSpec.PandaID = job_id
   jobSpec.modificationTime = datetime.datetime.now()
   realDataset = 'panda.sgotest.' + uuid.uuid4().hex
   ddmEndPointOut = 'BNL-OSG2_DATADISK'
   outFiles_scope_str = ''
   outFiles_str = ''
   realDatasets_str = ''
   ddmEndPointOut_str = ''
   # create up 5 files for output
   for index in range(random.randint(1, 5)):
      fileSpec = FileSpec()
      assFileSpec = FileSpec()
      fileSpec.fileType = 'es_output'
Ejemplo n.º 7
0
            errStr += ' source Endpoint not activated '
        if not tmpStatdst:
            errStr += ' destination Endpoint not activated '
        tmpLog.error(errStr)
        sys.exit(2)
    # We are sending test files from our destination machine to the source machine
    # both endpoints activated now prepare to transfer data
    tdata = TransferData(tc, dstEndpoint, srcEndpoint, sync_level="checksum")
except:
    errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
    sys.exit(1)

# create JobSpec
jobSpec = JobSpec()
jobSpec.jobParams = {
    'scopeLog': 'panda',
    'logFile': 'log',
}
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.modificationTime = datetime.datetime.now()
realDataset = 'panda.sgotest.' + uuid.uuid4().hex
ddmEndPointIn = 'BNL-OSG2_DATADISK'
inFiles_scope_str = ''
inFiles_str = ''
realDatasets_str = ''
realDatasetsIn_str = ''
ddmEndPointIn_str = ''
GUID_str = ''
fsize_str = ''
checksum_str = ''
scope_in_str = ''