Beispiel #1
0
    def test_j_Queues(self):
        from Ganga.GPI import queues, Job, GenericSplitter

        queues

        # -- QUEUES EXAMPLE START
        for i in range(1, 10):
            j = Job()
            queues.add(j.submit)
        # -- QUEUES EXAMPLE STOP

        # -- QUEUES FUNCTION START
        def f(x):
            print x

        queues.add(f, args=(123,))
        # -- QUEUES FUNCTION STOP

        # -- QUEUES SPLIT START
        j = Job()
        j.splitter = GenericSplitter()
        j.splitter.attribute = 'application.args'
        j.splitter.values = [i for i in range(0, 10)]
        j.parallel_submit = True
        j.submit()
Beispiel #2
0
def submit_job(brunel_app, reco_type, input_files=None, local=RUN_LOCAL):
    # Set EvtMax depending on if this is a local job
    brunel_app.extraOpts += 'from Configurables import Brunel\n'
    brunel_app.extraOpts += 'Brunel().EvtMax = {}'.format(2 * int(local) - 1)

    # Configure the corresponding Job
    job = Job(name='VP hybrid distortions',
              comment='{reco_type} reconstruction {suffix}'.format(
                  reco_type=reco_type, suffix=['', '(local)'][local]),
              application=brunel_app,
              splitter=SplitByFiles(filesPerJob=1, ignoremissing=True),
              parallel_submit=True)

    if local:
        job.backend = Local()
        job.outputfiles = [LocalFile('*.xdst'), LocalFile('*.root')]
        job.inputdata = dataset[:1]
    else:
        job.backend = Dirac()
        job.outputfiles = [DiracFile('*.xdst'), DiracFile('*.root')]
        job.inputdata = dataset

    job.inputfiles = input_files or []

    queues.add(job.submit)
    return True
Beispiel #3
0
def diracAPI_interactive(connection_attempts=5):
    '''
    Run an interactive server within the DIRAC environment.
    '''
    import os
    import sys
    import time
    import inspect
    import traceback
    from GangaDirac.Lib.Server.InspectionClient import runClient
    serverpath = os.path.join(
        os.path.dirname(inspect.getsourcefile(runClient)),
        'InspectionServer.py')
    queues.add(
        execute("execfile('%s')" % serverpath, timeout=None, shell=False))

    #time.sleep(1)
    sys.stdout.write(
        "\nType 'q' or 'Q' or 'exit' or 'exit()' to quit but NOT ctrl-D")
    i = 0
    excpt = None
    while i < connection_attempts:
        try:
            runClient()
            break
        except:
            if i == (connection_attempts - 1):
                excpt = traceback.format_exc()
        finally:
            i += 1
    return excpt
    def testSetParentOnLoad(self):
        """
        Test that the parents are set correctly on load
        """
        from Ganga.GPI import jobs, queues, Executable, Local
        from Ganga.GPIDev.Base.Proxy import isType

        def flush_full_job():
            mj = jobs(0)
            mj.comment = "Make sure I'm dirty " + ''.join(random.choice(string.ascii_uppercase) for _ in range(5))
            mj._impl._getRegistry()._flush([j])

        # Make sure the main job is fully loaded
        j = jobs(0)
        assert isType(j.application, Executable)
        assert isType(j.backend, Local)
        assert j.application.exe == "sleep"

        # fire off a load of threads to flush
        for i in range(0, 20):
            queues.add(flush_full_job)

        # Now loop over and force the load of all the subjobs
        for sj in j.subjobs:
            assert sj.splitter is None
            assert isType(sj.application, Executable)
            assert isType(sj.backend, Local)
            assert sj.application.exe == "sleep"
            assert sj.application.args == ['400']
            assert sj._impl._getRoot() is j._impl
Beispiel #5
0
def diracAPI_interactive(connection_attempts=5):
    '''
    Run an interactive server within the DIRAC environment.
    '''
    import os
    import sys
    import time
    import inspect
    import traceback
    from GangaDirac.Lib.Server.InspectionClient import runClient
    serverpath = os.path.join(os.path.dirname(inspect.getsourcefile(runClient)), 'InspectionServer.py')
    queues.add(execute("execfile('%s')" % serverpath, timeout=None, shell=False))

    #time.sleep(1)
    sys.stdout.write( "\nType 'q' or 'Q' or 'exit' or 'exit()' to quit but NOT ctrl-D")
    i = 0
    excpt = None
    while i < connection_attempts:
        try:
            runClient()
            break
        except:
            if i == (connection_attempts - 1):
                excpt = traceback.format_exc()
        finally:
            i += 1
    return excpt
Beispiel #6
0
    def submit(self, **opts):  # called on client, so job_info is Job object
        """Log submit event on client."""
        # if this job has a master and it is the first subjob then sent
        # submitted for master job
        if self.job_info.master is not None:
            if self.job_info.id == 0:
                masterjob_msg = self.getMessage('submitted')
                masterjob_msg['subjobs'] = len(self.job_info.master.subjobs)
                masterjob_msg['ganga_job_id'] = str(
                    masterjob_msg['ganga_job_id']).split('.')[0]
                # override ganga_job_uuid as the message 'from the master' is
                # really sent from the subjob
                masterjob_msg['ganga_job_uuid'] = masterjob_msg[
                    'ganga_master_uuid']
                masterjob_msg['ganga_master_uuid'] = 0
                self.send(masterjob_msg)

        from Ganga.Utility import Config
        gangausername = Config.getConfig('Configuration')['user']
        self.job_info.info.monitoring_links.append(
            ('http://gangamon.cern.ch/ganga/#user=%s' % gangausername, 'dashboard'))

        # send submitted for this job
        msg = self.getMessage('submitted')
        from Ganga.GPI import queues
        queues.add(self.send, (msg))
Beispiel #7
0
    def testSetParentOnLoad(self):
        """
        Test that the parents are set correctly on load
        """
        from Ganga.GPI import jobs, queues, Executable, Local
        from Ganga.GPIDev.Base.Proxy import isType, stripProxy

        def flush_full_job():
            mj = jobs(0)
            mj.comment = "Make sure I'm dirty " + ''.join(
                random.choice(string.ascii_uppercase) for _ in range(5))
            stripProxy(mj)._getRegistry()._flush([stripProxy(mj)])

        # Make sure the main job is fully loaded
        j = jobs(0)
        assert isType(j.application, Executable)
        assert isType(j.backend, Local)
        assert j.application.exe == "sleep"

        # fire off a load of threads to flush
        for i in range(0, 20):
            queues.add(flush_full_job)

        # Now loop over and force the load of all the subjobs
        for sj in j.subjobs:
            assert sj.splitter is None
            assert isType(sj.application, Executable)
            assert isType(sj.backend, Local)
            assert sj.application.exe == "sleep"
            assert sj.application.args == ['400']
            assert stripProxy(sj)._getRoot() is stripProxy(j)
            assert stripProxy(sj.application)._getRoot() is stripProxy(j)
Beispiel #8
0
    def test_j_Queues(self):
        from Ganga.GPI import queues, Job, GenericSplitter

        queues

        # -- QUEUES EXAMPLE START
        for i in range(1, 10):
            j = Job()
            queues.add(j.submit)
        # -- QUEUES EXAMPLE STOP

        # -- QUEUES FUNCTION START
        def f(x):
            print x

        queues.add(f, args=(123, ))
        # -- QUEUES FUNCTION STOP

        # -- QUEUES SPLIT START
        j = Job()
        j.splitter = GenericSplitter()
        j.splitter.attribute = 'application.args'
        j.splitter.values = [i for i in range(0, 10)]
        j.parallel_submit = True
        j.submit()
Beispiel #9
0
def queueDownload(joblist,targetDirBase=None,force_redownload=False):
  '''joblist may be jobs.select, or array of jobs or array of jobids'''
  for job in joblist:
    job,joblist = getJobList(job)
    targetdir=None
    if isinstance(targetDirBase,str):
      targetdir = targetDirBase+"/"+job.name
    queues.add(download,kwargs={"job":job,"targetDir":targetdir,"force_redownload":force_redownload})
Beispiel #10
0
 def stop(self, exitcode, **opts):
     """Log stop event on worker node."""
     if exitcode == 0:
         event = 'finished'
     else:
         event = 'failed'
     message = self.getMessage(event)
     from Ganga.GPI import queues
     queues.add(self.send, (message))
Beispiel #11
0
def _get_publisher(server, port, username, password):
    # FIXME: this assumes server/port/username/password cannot change and
    # caches a singleton publisher
    global _publisher
    if _publisher is None:
        _publisher = MSGUtil.createPublisher(server, port, username, password)
        from Ganga.GPI import queues
        queues.add(_publisher.start)
    return _publisher
Beispiel #12
0
def queueDownload(joblist, targetDirBase=None, force_redownload=False):
    '''joblist may be jobs.select, or array of jobs or array of jobids'''
    for job in joblist:
        job, joblist = getJobList(job)
        targetdir = None
        if isinstance(targetDirBase, str):
            targetdir = targetDirBase + "/" + job.name
        queues.add(download,
                   kwargs={
                       "job": job,
                       "targetDir": targetdir,
                       "force_redownload": force_redownload
                   })
Beispiel #13
0
 def send(self, message):
     """Send the message to the configured destination."""
     # get publisher
     p = _get_publisher(
         self.config_info['server'],
         self.config_info['port'],
         self.config_info['username'],
         self.config_info['password'],
     )
     # send message
     headers = {'persistent': 'true'}
     from Ganga.GPI import queues
     queues.add(
         p.send, (self.config_info['message_destination'], repr(message), headers))
Beispiel #14
0
    def test_c_QueueSubmits(self):
        from Ganga.GPI import jobs, queues

        for j in jobs:
            print('adding job', j.id, 'to queue for submission')
            queues.add(j.submit)

        while queues.totalNumUserThreads() > 0:
            print('remaining threads:', queues.totalNumUserThreads())
            time.sleep(1)

        print('remaining threads:', queues.totalNumUserThreads())

        # All user threads should have terminated by now
        for j in jobs:
            print('checking job', j.id)
            assert j.status != 'new'
Beispiel #15
0
    def test_c_QueueSubmits(self):
        from Ganga.GPI import jobs, queues

        for j in jobs:
            print('adding job', j.id, 'to queue for submission')
            queues.add(j.submit)

        while queues.totalNumUserThreads() > 0:
            print('remaining threads:', queues.totalNumUserThreads())
            time.sleep(1)

        print('remaining threads:', queues.totalNumUserThreads())

        # All user threads should have terminated by now
        for j in jobs:
            print('checking job', j.id)
            assert j.status != 'new'
Beispiel #16
0
def fixMyJobs(joblist):
  queueresubmission(joblist)
  for j in joblist:
    queues.add(resubmitStrangeJobs,kwargs={"j":j})
Beispiel #17
0
def queueresubmission(joblist):
  for j in joblist:
    queues.add(resub,kwargs={"j":j})
Beispiel #18
0
 def _auto_remove(self):
     """
     Remove called when job is removed as long as config option allows
     """
     if self.lfn != '':
         queues.add(self.remove)
Beispiel #19
0
def diracAPI_async(cmd, timeout=120):
    '''
    Execute DIRAC API commands from w/in Ganga.
    '''
    return queues.add(execute(cmd, timeout=timeout))
Beispiel #20
0
 def _auto_remove(self):
     """
     Remove called when job is removed as long as config option allows
     """
     if self.lfn != '':
         queues.add(self.remove)
Beispiel #21
0
 def start(self, **opts):
     """Log start event on worker node."""
     message = self.getMessage('running')
     from Ganga.GPI import queues
     queues.add(self.send, (message))
Beispiel #22
0
def queueresubmission(joblist):
    for j in joblist:
        queues.add(resub, kwargs={"j": j})
Beispiel #23
0
def fixMyJobs(joblist):
    queueresubmission(joblist)
    for j in joblist:
        queues.add(resubmitStrangeJobs, kwargs={"j": j})
Beispiel #24
0
def diracAPI_async(cmd, timeout=120):
    '''
    Execute DIRAC API commands from w/in Ganga.
    '''
    return queues.add(execute(cmd, timeout=timeout))
Beispiel #25
0
def GangaDiracSplitter(inputs, filesPerJob, maxFiles, ignoremissing):
    """
    Generator that yields a datasets for dirac split jobs
    """

    split_files = []
    i = inputs.__class__()

    if len(inputs.getLFNs()) != len(inputs.files):
        raise SplittingError(
            "Error trying to split dataset using DIRAC backend with non-DiracFile in the inputdata")

    file_replicas = {}

    from Ganga.GPI import queues

    for i in inputs:
        #logging.debug( "getting metadata: %s" % str(i.lfn) )
        queues.add(i.getReplicas)

    logger.info("Requesting LFN replica info")

    # This finds all replicas for all LFNs...
    # This will probably struggle for LFNs which don't exist
    all_lfns = [i.locations for i in inputs]
    while [] in all_lfns:
        import time
        time.sleep(0.5)
        all_lfns = [i.locations for i in inputs]

    logger.info("Got replicas")

    for i in inputs:
        file_replicas[i.lfn] = i.locations
        #logger.info( "%s" % str( i.accessURL() ) )

    logger.debug("found all replicas")

    super_dict = dict()
    for lfn, repz in file_replicas.iteritems():
        sitez = set([])
        for i in repz:
            # print i
            sitez.add(i)
        super_dict[lfn] = sitez

    allSubSets = []
    allChosenSets = {}

    logger.info("Determining overlap")

    import random
    for i in super_dict.keys():

        # Randomly Select 2 SE as the starting point for spliting jobs
        if len(super_dict[i]) > 2:
            req_sitez = set([])
            chosen = random.sample(super_dict[i], 2)
            for s in chosen:
                req_sitez.add(s)
        # Keep the 2 or less SE as the SE of choice
        else:
            req_sitez = set([])
            for s in super_dict[i]:
                req_sitez.add(s)

        allChosenSets[i] = req_sitez

    logger.debug("Found all SE in use")

    Tier1Sites = set([])

    for i in super_dict.keys():

        req_sitez = allChosenSets[i]
        _this_subset = []

        # Starting with i, populate subset with LFNs which have an
        # overlap of at least 2 SE

        for k in super_dict.keys():
            if req_sitez.issubset(super_dict[k]):
                if len(_this_subset) >= filesPerJob:
                    break
                _this_subset.append(str(k))
                super_dict.pop(k)

        if len(_this_subset) > 0:
            allSubSets.append(_this_subset)

    split_files = allSubSets

    logger.info("Created %s subsets" % str(len(split_files)))

    #logger.info( "Split Files: %s" % str(split_files) )

    for dataset in split_files:
        yield dataset