Esempio n. 1
0
    def insertJobAndDescription(self, jobdesc, proxyid, siteName, lazy=False):
        """
        Insert job into clientjobs and job description into jobdescriptions.

        This function also inserts job description. It is meant for clients
        that can insert everything at the same time.

        Args:
            jobdesc: A string with xRSL job description.
            proxyid: ID from proxies table of a proxy that job will
                be submitted with.
            siteName: A string with name of a site in configuration
                that job will be submitted to.
            lazy: A boolean that determines whether transaction should be
                commited after operation.

        Returns:
            ID of inserted job.
        """
        c = self.db.getCursor()

        # first, insert job description and retreive the job ID
        try:
            query = 'INSERT INTO jobdescriptions (jobdescription) VALUES (%s)'
            c.execute(query, [jobdesc])
            c.execute('SELECT LAST_INSERT_ID()')
            jobdescid = c.fetchone()['LAST_INSERT_ID()']
        except:
            self.log.exception('Error inserting job description')
            raise

        # get job name from xRSL
        jobdescs = arc.JobDescriptionList()
        arc.JobDescription_Parse(str(jobdesc), jobdescs)
        jobname = jobdescs[0].Identification.JobName

        # insert job
        query = """
            INSERT INTO clientjobs (created, jobname, jobdesc, siteName, proxyid)
            VALUES (%s, %s, %s, %s, %s)
        """
        c = self.db.getCursor()
        try:
            c.execute(
                query,
                [self.getTimeStamp(), jobname, jobdescid, siteName, proxyid])
            c.execute('SELECT LAST_INSERT_ID()')
            jobid = c.fetchone()['LAST_INSERT_ID()']
        except:
            self.log.exception('Error while inserting new job')
            raise
        else:
            if not lazy:
                self.Commit()
            return jobid
Esempio n. 2
0
def Submit(id, appjobid, jobdescstr, ucproxy, timeout):

    global queuelist
    global usercred

    # get the submission logger
    #log = logger()
    log = logging.getLogger()

    if len(queuelist) == 0:
        log.error("%s: no cluster free for submission" % appjobid)
        return None

    #cred_type=arc.initializeCredentialsType(arc.initializeCredentialsType.SkipCredentials)
    #uc=arc.UserConfig(cred_type)
    uc = usercred

    uc.CredentialString(ucproxy)

    jobdescs = arc.JobDescriptionList()
    if not jobdescstr or not arc.JobDescription_Parse(jobdescstr, jobdescs):
        log.error("%s: Failed to prepare job description" % appjobid)
        return None

    # Do brokering among the available queues
    jobdesc = jobdescs[0]
    broker = arc.Broker(uc, jobdesc, "Random")
    targetsorter = arc.ExecutionTargetSorter(broker)
    for target in queuelist:
        log.debug("%s: considering target %s:%s" %
                  (appjobid, target.ComputingService.Name,
                   target.ComputingShare.Name))

        # Adding an entity performs matchmaking and brokering
        targetsorter.addEntity(target)

    if len(targetsorter.getMatchingTargets()) == 0:
        log.error("%s: no clusters satisfied job description requirements" %
                  appjobid)
        return None

    targetsorter.reset(
    )  # required to reset iterator, otherwise we get a seg fault
    selectedtarget = targetsorter.getCurrentTarget()
    # Job object will contain the submitted job
    job = arc.Job()
    submitter = arc.Submitter(uc)
    if submitter.Submit(selectedtarget, jobdesc,
                        job) != arc.SubmissionStatus.NONE:
        log.error("%s: Submission failed" % appjobid)
        return None

    jconv = JobConv()
    return jconv.job2db(job)
Esempio n. 3
0
    def insertArcJob(self,
                     jobdesc,
                     jobdescid,
                     proxyid='',
                     maxattempts=0,
                     clusterlist='',
                     appjobid='',
                     downloadfiles='',
                     fairshare=''):
        '''
        Insert job into arcjobs table.

        This function is a modified version of insertArcJobDescription from aCTDBArc
        module. Because client engine uses jobdescriptions table to store job descriptions,
        it cannot use job insertion functions from aCTDBArc for passing jobs to ARC engine,
        because those insert job description themselves (duplicating it).

        Function is kept to be similar and violate some conventions (exceptions) of other
        functions in this module (clientdb) on purpose for now.
        '''
        # extract priority from job desc (also checks if desc is valid)
        jobdescs = arc.JobDescriptionList()
        if not arc.JobDescription_Parse(str(jobdesc), jobdescs):
            self.log.error("%s: Failed to prepare job description" % appjobid)
            return None
        priority = jobdescs[0].Application.Priority
        if priority == -1:  # use nicer default priority
            priority = 50

        c = self.db.getCursor()

        desc = {}
        desc['created'] = self.getTimeStamp()
        desc['arcstate'] = "tosubmit"
        desc['tarcstate'] = desc['created']
        desc['tstate'] = desc['created']
        desc['cluster'] = ''
        desc['clusterlist'] = clusterlist
        desc['jobdesc'] = jobdescid
        desc['attemptsleft'] = maxattempts
        desc['proxyid'] = proxyid
        desc['appjobid'] = appjobid
        desc['downloadfiles'] = downloadfiles
        desc['priority'] = priority
        desc['fairshare'] = fairshare
        s="insert into arcjobs" + " ( " + \
            ",".join(["%s" % (k) for k in desc.keys()]) + \
            " ) " + " values " + " ( " + \
            ",".join(['%s' % (k) for k in ["%s"] * len(desc)]) + " ) "
        c.execute(s, list(desc.values()))
        c.execute("SELECT LAST_INSERT_ID()")
        row = c.fetchone()
        self.Commit()
        return row
Esempio n. 4
0
    def insertArcJobDescription(self,
                                jobdesc,
                                proxyid='',
                                maxattempts=0,
                                clusterlist='',
                                appjobid='',
                                downloadfiles='',
                                fairshare=''):
        '''
        Add a new job description for the ARC engine to process. If specified
        the job will be sent to a cluster in the given list.
        '''
        # extract priority from job desc (also checks if desc is valid)
        jobdescs = arc.JobDescriptionList()
        if not arc.JobDescription_Parse(str(jobdesc), jobdescs):
            self.log.error("%s: Failed to prepare job description" % appjobid)
            return None
        priority = jobdescs[0].Application.Priority
        if priority == -1:  # use nicer default priority
            priority = 50

        # todo: find some useful default for proxyid
        c = self.db.getCursor()

        s = "insert into jobdescriptions (jobdescription) values (%s)"
        c.execute(s, [jobdesc])
        c.execute("SELECT LAST_INSERT_ID()")
        jobdescid = c.fetchone()['LAST_INSERT_ID()']

        desc = {}
        desc['created'] = self.getTimeStamp()
        desc['arcstate'] = "tosubmit"
        desc['tarcstate'] = desc['created']
        desc['tstate'] = desc['created']
        desc['cluster'] = ''
        desc['clusterlist'] = clusterlist
        desc['jobdesc'] = jobdescid
        desc['attemptsleft'] = maxattempts
        desc['proxyid'] = proxyid
        desc['appjobid'] = appjobid
        desc['downloadfiles'] = downloadfiles
        desc['priority'] = priority
        desc['fairshare'] = fairshare
        s="insert into arcjobs" + " ( " + ",".join(['%s' % (k) for k in desc.keys()]) + " ) " + " values " + \
            " ( " + ",".join(['%s' % (k) for k in ["%s"] * len(desc.keys()) ]) + " ) "
        c.execute(s, list(desc.values()))
        c.execute("SELECT LAST_INSERT_ID()")
        row = c.fetchone()
        self.Commit()
        return row
Esempio n. 5
0
def checkJobDesc(jobdesc):
    """
    Check if job description is valid.

    This part is taken from aCTDBArc.py and should be kept updated.

    Args:
        jobdesc: A string with job xRSL job description.

    Raises:
        InvalidJobDescriptionError: Job description is invalid.
    """
    jobdescs = arc.JobDescriptionList()
    if not arc.JobDescription_Parse(str(jobdesc), jobdescs):
        logger.error('Job description is not valid')
        raise errors.InvalidJobDescriptionError()
Esempio n. 6
0
    def insertJob(self, jobdesc, proxyid, siteName, lazy=False):
        """
        Insert job into clientjobs table.

        This function does not insert job decription in to the database. It
        has to be inserted separately. However, job description is still needed
        to determine the name of the job. This function is meant for clients
        that need to perform additional work on job descriptions.

        Args:
            jobdesc: A string with xRSL job description.
            proxyid: ID from proxies table of a proxy that job will
                be submitted with.
            siteName: A string with name of a site in configuration
                that job will be submitted to.
            lazy: A boolean that determines whether transaction should be
                commited after operation.

        Returns:
            ID of inserted job.
        """
        # get job name from xRSL
        jobdescs = arc.JobDescriptionList()
        # Error is not checked because caller (actsub.py) already checked
        # validity of xrsl.
        arc.JobDescription_Parse(str(jobdesc), jobdescs)
        jobname = jobdescs[0].Identification.JobName

        # insert job
        query = """
            INSERT INTO clientjobs (created, jobname, jobdesc, siteName, proxyid)
            VALUES (%s, %s, %s, %s, %s)
        """
        c = self.db.getCursor()
        try:
            c.execute(query,
                      [self.getTimeStamp(), jobname, None, siteName, proxyid])
            c.execute('SELECT LAST_INSERT_ID()')
            jobid = c.fetchone()['LAST_INSERT_ID()']
        except:
            self.log.exception('Error while inserting new job')
            raise
        else:
            if not lazy:
                self.Commit()
            return jobid
Esempio n. 7
0
def main(lrms, grami, conf="/etc/arc.conf"):
    lrms = get_lrms_module(lrms)
    gridid = grami.split('.')[-2]
    is_parsed = False
    try:
        jds = arc.JobDescriptionList()
        with open(grami, 'r+') as jobdesc:
            content = jobdesc.read()
            is_parsed = JobDescriptionParserGRAMi.Parse(content, jds)
            jd = jds[0]
            localid = lrms.Submit(conf, jd)
            assert (type(localid) == str)
            jobdesc.write('joboption_jobid=%s\n' % localid)
        return 0
    except (ArcError, AssertionError):
        pass
    except IOError:
        error('%s: Failed to access GRAMi file' % gridid, 'pySubmit')
    except Exception:
        error('Unexpected exception:\n%s' % traceback.format_exc(), 'pySubmit')
    return 1
Esempio n. 8
0
    def get_job_descriptions(self, jsdl):
        """
        Return an instance of ``arc.JobDescriptionList`` containing the job described by the
        given JSDL

        :param jsdl: String containing the job description in JSDL format
        """
        job_descriptions = arc.JobDescriptionList()
        temp_filename = None
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            temp_filename = temp_file.name
            temp_file.write(jsdl)

        try:
            if not arc.JobDescription_ParseFromFile(temp_filename,
                                                    job_descriptions):
                raise InvalidJobDescription(
                    "Could not parse job description XML")
        finally:
            # Delete the temp file - finally clause is run even if exception is raised
            os.unlink(temp_filename)

        return job_descriptions
Esempio n. 9
0
    def _arc_submit(self, xrsl, arcces, userconfig, log):
        '''Check the available CEs and submit'''

        queuelist = []

        for arcce in arcces:
            (ce_endpoint, ce_queue) = arcce
            aris = arc.URL(str(ce_endpoint))
            ce_host = aris.Host()
            if aris.Protocol() == 'https':
                aris.ChangePath('/arex')
                infoendpoints = [
                    arc.Endpoint(aris.str(), arc.Endpoint.COMPUTINGINFO,
                                 'org.ogf.glue.emies.resourceinfo')
                ]
            else:
                aris = 'ldap://' + aris.Host() + '/mds-vo-name=local,o=grid'
                infoendpoints = [
                    arc.Endpoint(aris, arc.Endpoint.COMPUTINGINFO,
                                 'org.nordugrid.ldapng')
                ]

            # retriever contains a list of CE endpoints
            retriever = arc.ComputingServiceRetriever(userconfig,
                                                      infoendpoints)
            retriever.wait()
            # targets is the list of queues
            # parse target.ComputingService.ID for the CE hostname
            # target.ComputingShare.Name is the queue name
            targets = retriever.GetExecutionTargets()

            # Filter only sites for this process
            for target in targets:
                if not target.ComputingService.ID:
                    log.info(
                        "Target {0} does not have ComputingService ID defined, skipping"
                        .format(target.ComputingService.Name))
                    continue
                # If EMI-ES infoendpoint, force EMI-ES submission
                if infoendpoints[0].InterfaceName == 'org.ogf.glue.emies.resourceinfo' \
                  and target.ComputingEndpoint.InterfaceName != 'org.ogf.glue.emies.activitycreation':
                    log.debug(
                        "Rejecting target interface {0} because not EMI-ES".
                        format(target.ComputingEndpoint.InterfaceName))
                    continue
                # Check for matching host and queue
                targethost = re.sub(
                    ':arex$', '',
                    re.sub('urn:ogf:ComputingService:', '',
                           target.ComputingService.ID))
                targetqueue = target.ComputingShare.Name
                if targethost != ce_host:
                    log.debug(
                        'Rejecting target host {0} as it does not match {1}'.
                        format(targethost, ce_host))
                    continue
                if targetqueue != ce_queue:
                    log.debug(
                        'Rejecting target queue {0} as it does not match {1}'.
                        format(targetqueue, ce_queue))
                    continue

                queuelist.append(target)
                log.debug("Adding target {0}:{1}".format(
                    targethost, targetqueue))

        # check if any queues are available, if not leave and try again next time
        if not queuelist:
            raise Exception("No free queues available")

        log.debug("preparing submission")
        jobdescs = arc.JobDescriptionList()
        if not arc.JobDescription_Parse(str(xrsl), jobdescs):
            raise Exception("Failed to prepare job description")

        # Run the submission in a separate thread
        thr = SubmitThr(queuelist, jobdescs, userconfig)
        return self._run_submit(thr)
Esempio n. 10
0
  def submitJob( self, executableFile, proxy, numberOfJobs = 1 ):
    """ Method to submit job
    """

    # Assume that the ARC queues are always of the format nordugrid-<batchSystem>-<queue>
    # And none of our supported batch systems have a "-" in their name
    self.arcQueue = self.queue.split("-",2)[2]
    result = self._prepareProxy()
    self.usercfg.ProxyPath(os.environ['X509_USER_PROXY'])
    if not result['OK']:
      gLogger.error( 'ARCComputingElement: failed to set up proxy', result['Message'] )
      return result

    gLogger.verbose( "Executable file path: %s" % executableFile )
    if not os.access( executableFile, 5 ):
      os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH + stat.S_IXOTH )

    batchIDList = []
    stampDict = {}

    endpoint = arc.Endpoint( self.ceHost + ":2811/jobs", arc.Endpoint.JOBSUBMIT,
                            "org.nordugrid.gridftpjob")

    # Submit jobs iteratively for now. Tentatively easier than mucking around with the JobSupervisor class
    for __i in range(numberOfJobs):
      # The basic job description
      jobdescs = arc.JobDescriptionList()
      # Get the job into the ARC way
      xrslString, diracStamp = self.__writeXRSL( executableFile )
      gLogger.debug("XRSL string submitted : %s" %xrslString)
      gLogger.debug("DIRAC stamp for job : %s" %diracStamp)
      if not arc.JobDescription_Parse(xrslString, jobdescs):
        gLogger.error("Invalid job description")
        break
      # Submit the job
      jobs = arc.JobList() # filled by the submit process
      submitter = arc.Submitter(self.usercfg)
      result = submitter.Submit(endpoint, jobdescs, jobs)
      # Save info or else ..else.
      if ( result == arc.SubmissionStatus.NONE ):
        # Job successfully submitted
        pilotJobReference = jobs[0].JobID
        batchIDList.append( pilotJobReference )
        stampDict[pilotJobReference] = diracStamp
        gLogger.debug("Successfully submitted job %s to CE %s" % (pilotJobReference, self.ceHost))
      else:
        message = "Failed to submit job because "
        if (result.isSet(arc.SubmissionStatus.NOT_IMPLEMENTED) ):
          gLogger.warn( "%s feature not implemented on CE? (weird I know - complain to site admins" % message )
        if ( result.isSet(arc.SubmissionStatus.NO_SERVICES) ):
          gLogger.warn( "%s no services are running on CE? (open GGUS ticket to site admins" % message )
        if ( result.isSet(arc.SubmissionStatus.ENDPOINT_NOT_QUERIED) ):
          gLogger.warn( "%s endpoint was not even queried. (network ..?)" % message )
        if ( result.isSet(arc.SubmissionStatus.BROKER_PLUGIN_NOT_LOADED) ):
          gLogger.warn( "%s BROKER_PLUGIN_NOT_LOADED : ARC library installation problem?" % message )
        if ( result.isSet(arc.SubmissionStatus.DESCRIPTION_NOT_SUBMITTED) ):
          gLogger.warn( "%s Job not submitted - incorrect job description? (missing field in XRSL string?)" % message )
        if ( result.isSet(arc.SubmissionStatus.SUBMITTER_PLUGIN_NOT_LOADED) ):
          gLogger.warn( "%s SUBMITTER_PLUGIN_NOT_LOADED : ARC library installation problem?" % message )
        if ( result.isSet(arc.SubmissionStatus.AUTHENTICATION_ERROR) ):
          gLogger.warn( "%s authentication error - screwed up / expired proxy? Renew / upload pilot proxy on machine?" % message )
        if ( result.isSet(arc.SubmissionStatus.ERROR_FROM_ENDPOINT) ):
          gLogger.warn( "%s some error from the CE - possibly CE problems?" % message )
        gLogger.warn( "%s ... maybe above messages will give a hint." % message )
        break # Boo hoo *sniff*

    if batchIDList:
      result = S_OK( batchIDList )
      result['PilotStampDict'] = stampDict
    else:
      result = S_ERROR('No pilot references obtained from the ARC job submission')
    return result
Esempio n. 11
0
# Set up logging to stderr with level VERBOSE (a lot of output will be shown)
logstdout = arc.LogStream(sys.stdout)
logstdout.setFormat(arc.ShortFormat)
arc.Logger_getRootLogger().addDestination(logstdout)
arc.Logger_getRootLogger().setThreshold(arc.VERBOSE)
logger = arc.Logger(arc.Logger_getRootLogger(), "jobsubmit")

# UserConfig contains information on credentials and default services to use.
# This form of the constructor is necessary to initialise the local job list.
usercfg = arc.UserConfig("", "")

# Simple job description which outputs hostname to stdout
jobdescstring = "&(executable=/bin/hostname)(stdout=stdout)"

# Parse job description
jobdescs = arc.JobDescriptionList()
if not arc.JobDescription_Parse(jobdescstring, jobdescs):
    logger.msg(arc.ERROR, "Invalid job description")
    sys.exit(1)

# Use 'arc.JobDescription_ParseFromFile("helloworld.xrsl", jobdescs)'
# to parse job description from file.

# Use top-level NorduGrid information index to find resources
index = arc.Endpoint(
    "ldap://index1.nordugrid.org:2135/Mds-Vo-name=NorduGrid,o=grid",
    arc.Endpoint.REGISTRY, "org.nordugrid.ldapegiis")
services = arc.EndpointList(1, index)

# Do the submission
jobs = arc.JobList()
Esempio n. 12
0
    def submit(self):
        """
        Main function to submit jobs.
        """

        global queuelist

        # check for stopsubmission flag
        if self.conf.get(['downtime', 'stopsubmission']) == "true":
            self.log.info('Submission suspended due to downtime')
            return 0

        # Get cluster host and queue: cluster/queue
        clusterhost = clusterqueue = None
        if self.cluster:
            cluster = self.cluster
            if cluster.find('://') == -1:
                cluster = 'gsiftp://' + cluster
            clusterurl = arc.URL(cluster)
            clusterhost = clusterurl.Host()
            clusterqueue = clusterurl.Path()[1:]  # strip off leading slash

        # Apply fair-share
        if self.cluster:
            fairshares = self.db.getArcJobsInfo(
                "arcstate='tosubmit' and clusterlist like '%" + self.cluster +
                "%'", ['fairshare'])
        else:
            fairshares = self.db.getArcJobsInfo(
                "arcstate='tosubmit' and clusterlist=''", ['fairshare'])

        if not fairshares:
            self.log.info('Nothing to submit')
            return 0

        fairshares = list(set([p['fairshare'] for p in fairshares]))
        # For EMI-ES proxy bug - see below
        shuffle(fairshares)
        count = 0

        for fairshare in fairshares:

            try:
                # catch any exceptions here to avoid leaving lock
                if self.cluster:
                    # Lock row for update in case multiple clusters are specified
                    #jobs=self.db.getArcJobsInfo("arcstate='tosubmit' and ( clusterlist like '%{0}' or clusterlist like '%{0},%' ) and fairshare='{1}' order by priority desc limit 10".format(self.cluster, fairshare),
                    jobs = self.db.getArcJobsInfo(
                        "arcstate='tosubmit' and ( clusterlist like '%{0}' or clusterlist like '%{0},%' ) and fairshare='{1}' limit 10"
                        .format(self.cluster, fairshare),
                        columns=[
                            "id", "jobdesc", "appjobid", "priority", "proxyid"
                        ],
                        lock=True)
                    if jobs:
                        self.log.debug("started lock for writing %d jobs" %
                                       len(jobs))
                else:
                    jobs = self.db.getArcJobsInfo(
                        "arcstate='tosubmit' and clusterlist='' and fairshare='{0}' limit 10"
                        .format(fairshare),
                        columns=["id", "jobdesc", "appjobid", "priority"])
                # mark submitting in db
                jobs_taken = []
                for j in jobs:
                    jd = {
                        'cluster': self.cluster,
                        'arcstate': 'submitting',
                        'tarcstate': self.db.getTimeStamp()
                    }
                    self.db.updateArcJobLazy(j['id'], jd)
                    jobs_taken.append(j)
                jobs = jobs_taken

            finally:
                if self.cluster:
                    try:
                        self.db.Commit(lock=True)
                        self.log.debug("ended lock")
                    except:
                        self.log.warning("Failed to release DB lock")
                else:
                    self.db.Commit()

            if len(jobs) == 0:
                #self.log.debug("No jobs to submit")
                continue
            self.log.info("Submitting %d jobs for fairshare %s" %
                          (len(jobs), fairshare))

            # max waiting priority
            try:
                maxpriowaiting = max(jobs,
                                     key=lambda x: x['priority'])['priority']
            except:
                maxpriowaiting = 0
            self.log.info("Maximum priority of waiting jobs: %d" %
                          maxpriowaiting)

            # Query infosys - either local or index
            if self.cluster:
                if self.cluster.find('://') != -1:
                    aris = arc.URL(self.cluster)
                else:
                    aris = arc.URL('gsiftp://%s' % self.cluster)
                if aris.Protocol() == 'https':
                    aris.ChangePath('/arex')
                    infoendpoints = [
                        arc.Endpoint(aris.str(), arc.Endpoint.COMPUTINGINFO,
                                     'org.ogf.glue.emies.resourceinfo')
                    ]
                elif aris.Protocol() == 'local':
                    infoendpoints = [
                        arc.Endpoint(aris.str(), arc.Endpoint.COMPUTINGINFO,
                                     'org.nordugrid.local')
                    ]
                else:
                    aris = 'ldap://' + aris.Host(
                    ) + '/mds-vo-name=local,o=grid'
                    infoendpoints = [
                        arc.Endpoint(aris, arc.Endpoint.COMPUTINGINFO,
                                     'org.nordugrid.ldapng')
                    ]
            else:
                giises = self.conf.getList(['atlasgiis', 'item'])
                infoendpoints = []
                for g in giises:
                    # Specify explicitly EGIIS
                    infoendpoints.append(
                        arc.Endpoint(str(g), arc.Endpoint.REGISTRY,
                                     "org.nordugrid.ldapegiis"))

            # Set UserConfig credential for each proxy. Assumes that any proxy
            # in the fairshare can query the CE infosys
            self.uc.CredentialString(self.db.getProxy(jobs[0]['proxyid']))
            # retriever contains a list of CE endpoints
            retriever = arc.ComputingServiceRetriever(self.uc, infoendpoints)
            retriever.wait()
            # targets is the list of queues
            # parse target.ComputingService.ID for the CE hostname
            # target.ComputingShare.Name is the queue name
            targets = retriever.GetExecutionTargets()

            # Filter only sites for this process
            queuelist = []
            for target in targets:
                if not target.ComputingService.ID:
                    self.log.info(
                        "Target %s does not have ComputingService ID defined, skipping"
                        % target.ComputingService.Name)
                    continue
                # If EMI-ES infoendpoint, force EMI-ES submission
                if infoendpoints[
                        0].InterfaceName == 'org.ogf.glue.emies.resourceinfo' and target.ComputingEndpoint.InterfaceName != 'org.ogf.glue.emies.activitycreation':
                    self.log.debug(
                        "Rejecting target interface %s because not EMI-ES" %
                        target.ComputingEndpoint.InterfaceName)
                    continue
                # Check for matching host and queue
                targethost = re.sub(
                    ':arex$', '',
                    re.sub('urn:ogf:ComputingService:', '',
                           target.ComputingService.ID))
                targetqueue = target.ComputingShare.Name
                if clusterhost and targethost != clusterhost:
                    self.log.debug(
                        'Rejecting target host %s as it does not match %s' %
                        (targethost, clusterhost))
                    continue
                if clusterqueue and targetqueue != clusterqueue:
                    self.log.debug(
                        'Rejecting target queue %s as it does not match %s' %
                        (targetqueue, clusterqueue))
                    continue
                if targetqueue in self.conf.getList(['queuesreject', 'item']):
                    self.log.debug(
                        'Rejecting target queue %s in queuesreject list' %
                        targetqueue)
                    continue
                elif targethost in self.conf.getList(
                    ['clustersreject', 'item']):
                    self.log.debug(
                        'Rejecting target host %s in clustersreject list' %
                        targethost)
                    continue
                else:
                    # tmp hack
                    target.ComputingShare.LocalWaitingJobs = 0
                    target.ComputingShare.PreLRMSWaitingJobs = 0
                    target.ExecutionEnvironment.CPUClockSpeed = 2000
                    qjobs = self.db.getArcJobsInfo(
                        "cluster='" + str(self.cluster) +
                        "' and  arcstate='submitted' and fairshare='%s'" %
                        fairshare, ['id', 'priority'])
                    rjobs = self.db.getArcJobsInfo(
                        "cluster='" + str(self.cluster) +
                        "' and  arcstate='running' and fairshare='%s'" %
                        fairshare, ['id'])

                    # max queued priority
                    try:
                        maxprioqueued = max(
                            qjobs, key=lambda x: x['priority'])['priority']
                    except:
                        maxprioqueued = 0
                    self.log.info("Max priority queued: %d" % maxprioqueued)

                    # Set number of submitted jobs to running * 0.15 + 400/num of shares
                    # Note: assumes only a few shares are used
                    jlimit = len(rjobs) * 0.15 + 100 / len(fairshares)
                    if str(self.cluster).find('arc-boinc-0') != -1:
                        jlimit = len(rjobs) * 0.15 + 400
                    if str(self.cluster).find('XXXpikolit') != -1:
                        jlimit = len(rjobs) * 0.15 + 100
                    if str(self.cluster).find('arc05.lcg') != -1:
                        jlimit = len(rjobs) * 0.15 + 400
                    target.ComputingShare.PreLRMSWaitingJobs = len(qjobs)
                    if len(qjobs) < jlimit or ((maxpriowaiting > maxprioqueued)
                                               and (maxpriowaiting > 10)):
                        if maxpriowaiting > maxprioqueued:
                            self.log.info(
                                "Overriding limit, maxpriowaiting: %d > maxprioqueued: %d"
                                % (maxpriowaiting, maxprioqueued))
                        queuelist.append(target)
                        self.log.debug("Adding target %s:%s" %
                                       (targethost, targetqueue))
                    else:
                        self.log.info(
                            "%s/%s already at limit of submitted jobs for fairshare %s"
                            % (targethost, targetqueue, fairshare))

            # check if any queues are available, if not leave and try again next time
            if not queuelist:
                self.log.info("No free queues available")
                self.db.Commit()
                # EMI-ES proxy problem - see bug 3685
                if self.cluster and self.cluster.startswith('https://'):
                    raise ExceptInterrupt(15)
                continue

            self.log.info("start submitting")

            # Just run one thread for each job in sequence. Strange things happen
            # when trying to create a new UserConfig object for each thread.
            for j in jobs:
                self.log.debug("%s: preparing submission" % j['appjobid'])
                jobdescstr = str(
                    self.db.getArcJobDescription(str(j['jobdesc'])))
                jobdescs = arc.JobDescriptionList()
                if not jobdescstr or not arc.JobDescription_Parse(
                        jobdescstr, jobdescs):
                    self.log.error("%s: Failed to prepare job description" %
                                   j['appjobid'])
                    continue
                # TODO: might not work if proxies are different within a share
                # since same uc object is shared among threads
                self.uc.CredentialString(self.db.getProxy(j['proxyid']))
                t = SubmitThr(Submit, j['id'], j['appjobid'], jobdescs,
                              self.uc, self.log)
                self.RunThreadsSplit([t], 1)
                count = count + 1

            self.log.info("threads finished")
            # commit transaction to release row locks
            self.db.Commit()

            # EMI-ES proxy problem - see bug 3685
            if self.cluster and self.cluster.startswith('https://'):
                raise ExceptInterrupt(15)

        self.log.info("end submitting")

        return count
Esempio n. 13
0
    def submit(self):
        """
        Main function to submit jobs.
        """

        global queuelist

        # check for stopsubmission flag
        if self.conf.get(['downtime', 'stopsubmission']) == "true":
            self.log.info('Submission suspended due to downtime')
            return

        # check for any site-specific limits or status
        clusterstatus = self.conf.getCond(["sites", "site"],
                                          f"endpoint={self.cluster}",
                                          ["status"]) or 'online'
        if clusterstatus == 'offline':
            self.log.info('Site status is offline')
            return

        clustermaxjobs = int(
            self.conf.getCond(["sites", "site"], f"endpoint={self.cluster}",
                              ["maxjobs"]) or 999999)
        nsubmitted = self.db.getNArcJobs(f"cluster='{self.cluster}'")
        if nsubmitted >= clustermaxjobs:
            self.log.info(
                f'{nsubmitted} submitted jobs is greater than or equal to max jobs {clustermaxjobs}'
            )
            return

        # Get cluster host and queue: cluster/queue
        clusterhost = clusterqueue = None
        if self.cluster:
            cluster = self.cluster
            if cluster.find('://') == -1:
                cluster = 'gsiftp://' + cluster
            clusterurl = arc.URL(cluster)
            clusterhost = clusterurl.Host()
            clusterqueue = clusterurl.Path()[1:]  # strip off leading slash

        # Apply fair-share
        if self.cluster:
            fairshares = self.db.getArcJobsInfo(
                "arcstate='tosubmit' and clusterlist like '%" + self.cluster +
                "%'", ['fairshare', 'proxyid'])
        else:
            fairshares = self.db.getArcJobsInfo(
                "arcstate='tosubmit' and clusterlist=''",
                ['fairshare', 'proxyid'])

        if not fairshares:
            self.log.info('Nothing to submit')
            return

        # split by proxy for GU queues
        fairshares = list(
            set([(p['fairshare'], p['proxyid']) for p in fairshares]))
        # For proxy bug - see below
        shuffle(fairshares)

        for fairshare, proxyid in fairshares:

            # apply maxjobs limit (check above should make sure greater than zero)
            # Note: relies on exit after first loop
            limit = min(clustermaxjobs - nsubmitted, 10)
            try:
                # catch any exceptions here to avoid leaving lock
                if self.cluster:
                    # Lock row for update in case multiple clusters are specified
                    #jobs=self.db.getArcJobsInfo("arcstate='tosubmit' and ( clusterlist like '%{0}' or clusterlist like '%{0},%' ) and fairshare='{1}' order by priority desc limit 10".format(self.cluster, fairshare),
                    jobs = self.db.getArcJobsInfo(
                        "arcstate='tosubmit' and ( clusterlist like '%{0}' or clusterlist like '%{0},%' ) and fairshare='{1}' and proxyid='{2}' limit {3}"
                        .format(self.cluster, fairshare, proxyid, limit),
                        columns=[
                            "id", "jobdesc", "appjobid", "priority", "proxyid",
                            "clusterlist"
                        ],
                        lock=True)
                    if jobs:
                        self.log.debug("started lock for writing %d jobs" %
                                       len(jobs))
                else:
                    jobs = self.db.getArcJobsInfo(
                        "arcstate='tosubmit' and clusterlist='' and fairshare='{0} and proxyid={1}' limit {2}"
                        .format(fairshare, proxyid, limit),
                        columns=[
                            "id", "jobdesc", "appjobid", "priority", "proxyid",
                            "clusterlist"
                        ])
                # mark submitting in db
                jobs_taken = []
                for j in jobs:
                    jd = {
                        'cluster': self.cluster,
                        'arcstate': 'submitting',
                        'tarcstate': self.db.getTimeStamp()
                    }
                    self.db.updateArcJobLazy(j['id'], jd)
                    jobs_taken.append(j)
                jobs = jobs_taken

            finally:
                if self.cluster:
                    try:
                        self.db.Commit(lock=True)
                        self.log.debug("ended lock")
                    except:
                        self.log.warning("Failed to release DB lock")
                else:
                    self.db.Commit()

            if len(jobs) == 0:
                #self.log.debug("No jobs to submit")
                continue
            self.log.info(
                "Submitting %d jobs for fairshare %s and proxyid %d" %
                (len(jobs), fairshare, proxyid))

            # max waiting priority
            try:
                maxpriowaiting = max(jobs,
                                     key=lambda x: x['priority'])['priority']
            except:
                maxpriowaiting = 0
            self.log.info("Maximum priority of waiting jobs: %d" %
                          maxpriowaiting)

            # Query infosys - either local or index
            if self.cluster:
                if self.cluster.find('://') != -1:
                    aris = arc.URL(self.cluster)
                else:
                    aris = arc.URL('gsiftp://%s' % self.cluster)
                if aris.Protocol() == 'https':
                    aris.ChangePath('/arex')
                    infoendpoints = [
                        arc.Endpoint(aris.str(), arc.Endpoint.COMPUTINGINFO,
                                     'org.ogf.glue.emies.resourceinfo')
                    ]
                elif aris.Protocol() == 'local':
                    infoendpoints = [
                        arc.Endpoint(aris.str(), arc.Endpoint.COMPUTINGINFO,
                                     'org.nordugrid.local')
                    ]
                else:
                    aris = 'ldap://' + aris.Host(
                    ) + '/mds-vo-name=local,o=grid'
                    infoendpoints = [
                        arc.Endpoint(aris, arc.Endpoint.COMPUTINGINFO,
                                     'org.nordugrid.ldapng')
                    ]
            else:
                giises = self.conf.getList(['atlasgiis', 'item'])
                infoendpoints = []
                for g in giises:
                    # Specify explicitly EGIIS
                    infoendpoints.append(
                        arc.Endpoint(str(g), arc.Endpoint.REGISTRY,
                                     "org.nordugrid.ldapegiis"))

            # Set UserConfig credential for querying infosys
            proxystring = str(self.db.getProxy(proxyid))
            self.uc.CredentialString(proxystring)
            global usercred
            usercred = self.uc
            # retriever contains a list of CE endpoints
            retriever = arc.ComputingServiceRetriever(self.uc, infoendpoints)
            retriever.wait()
            # targets is the list of queues
            # parse target.ComputingService.ID for the CE hostname
            # target.ComputingShare.Name is the queue name
            targets = retriever.GetExecutionTargets()

            # Filter only sites for this process
            queuelist = []
            for target in targets:
                if not target.ComputingService.ID:
                    self.log.info(
                        "Target %s does not have ComputingService ID defined, skipping"
                        % target.ComputingService.Name)
                    continue
                # If EMI-ES infoendpoint, force EMI-ES submission
                if infoendpoints[
                        0].InterfaceName == 'org.ogf.glue.emies.resourceinfo' and target.ComputingEndpoint.InterfaceName != 'org.ogf.glue.emies.activitycreation':
                    self.log.debug(
                        "Rejecting target interface %s because not EMI-ES" %
                        target.ComputingEndpoint.InterfaceName)
                    continue
                # Check for matching host and queue
                targethost = re.sub(
                    ':arex$', '',
                    re.sub('urn:ogf:ComputingService:', '',
                           target.ComputingService.ID))
                targetqueue = target.ComputingShare.Name
                if clusterhost and targethost != clusterhost:
                    self.log.debug(
                        'Rejecting target host %s as it does not match %s' %
                        (targethost, clusterhost))
                    continue
                if clusterqueue and targetqueue != clusterqueue:
                    self.log.debug(
                        'Rejecting target queue %s as it does not match %s' %
                        (targetqueue, clusterqueue))
                    continue
                if targetqueue in self.conf.getList(['queuesreject', 'item']):
                    self.log.debug(
                        'Rejecting target queue %s in queuesreject list' %
                        targetqueue)
                    continue
                elif targethost in self.conf.getList(
                    ['clustersreject', 'item']):
                    self.log.debug(
                        'Rejecting target host %s in clustersreject list' %
                        targethost)
                    continue
                else:
                    # tmp hack
                    target.ComputingShare.LocalWaitingJobs = 0
                    target.ComputingShare.PreLRMSWaitingJobs = 0
                    target.ExecutionEnvironment.CPUClockSpeed = 2000
                    qjobs = self.db.getArcJobsInfo(
                        "cluster='" + str(self.cluster) +
                        "' and  arcstate='submitted' and fairshare='%s'" %
                        fairshare, ['id', 'priority'])
                    rjobs = self.db.getArcJobsInfo(
                        "cluster='" + str(self.cluster) +
                        "' and  arcstate='running' and fairshare='%s'" %
                        fairshare, ['id'])

                    # max queued priority
                    try:
                        maxprioqueued = max(
                            qjobs, key=lambda x: x['priority'])['priority']
                    except:
                        maxprioqueued = 0
                    self.log.info("Max priority queued: %d" % maxprioqueued)

                    # Limit number of submitted jobs using configuration or default (0.15 + 100/num of shares)
                    # Note: assumes only a few shares are used
                    qfraction = float(self.conf.get([
                        'jobs', 'queuefraction'
                    ])) if self.conf.get(['jobs', 'queuefraction']) else 0.15
                    qoffset = int(self.conf.get([
                        'jobs', 'queueoffset'
                    ])) if self.conf.get(['jobs', 'queueoffset']) else 100
                    jlimit = len(rjobs) * qfraction + qoffset / len(fairshares)
                    self.log.debug("running %d, queued %d, queue limit %d" %
                                   (len(rjobs), len(qjobs), jlimit))
                    if str(self.cluster).find('arc-boinc-0') != -1:
                        jlimit = len(rjobs) * 0.15 + 400
                    if str(self.cluster).find('XXXpikolit') != -1:
                        jlimit = len(rjobs) * 0.15 + 100
                    if str(self.cluster).find('arc05.lcg') != -1:
                        jlimit = len(rjobs) * 0.15 + 400
                    target.ComputingShare.PreLRMSWaitingJobs = len(qjobs)
                    if len(qjobs) < jlimit or ((maxpriowaiting > maxprioqueued)
                                               and (maxpriowaiting > 10)):
                        if maxpriowaiting > maxprioqueued:
                            self.log.info(
                                "Overriding limit, maxpriowaiting: %d > maxprioqueued: %d"
                                % (maxpriowaiting, maxprioqueued))
                        queuelist.append(target)
                        self.log.debug("Adding target %s:%s" %
                                       (targethost, targetqueue))
                    else:
                        self.log.info(
                            "%s/%s already at limit of submitted jobs for fairshare %s"
                            % (targethost, targetqueue, fairshare))

            # check if any queues are available, if not leave and try again next time
            if not queuelist:
                self.log.info("No free queues available")
                self.db.Commit()
                continue

            self.log.info("start submitting")

            # Just run one thread for each job in sequence. Strange things happen
            # when trying to create a new UserConfig object for each thread.
            tasks = []
            for j in jobs:
                self.log.debug("%s: preparing submission" % j['appjobid'])
                jobdescstr = str(
                    self.db.getArcJobDescription(str(j['jobdesc'])))
                jobdescs = arc.JobDescriptionList()
                if not jobdescstr or not arc.JobDescription_Parse(
                        jobdescstr, jobdescs):
                    self.log.error("%s: Failed to prepare job description" %
                                   j['appjobid'])
                    continue
                tasks.append((j['id'], j['appjobid'], jobdescstr, proxystring,
                              int(self.conf.get(['atlasgiis', 'timeout']))))

            npools = 1
            if any(s in self.cluster
                   for s in self.conf.getList(['parallelsubmit', 'item'])):
                npools = int(self.conf.get(['parallelsubmit', 'npools']))
            self.log.debug("Starting submitters: %s" % npools)

            pool = multiprocessing.Pool(npools)
            #results = []
            #for task in tasks:
            #    result = pool.apply_async(Submit,(task))
            #    results.append(result)
            # Submit in workers
            results = [pool.apply_async(Submit, (t)) for t in tasks]

            # timeout per submission
            timeout = 60
            stopflag = False
            for result, task in zip(results, tasks):
                try:
                    jdb = result.get(timeout)
                    jconv = JobConv()
                    job = jconv.db2job(jdb)
                except multiprocessing.TimeoutError:
                    self.log.error(
                        "%s: submission timeout: exit and try again" % task[1])
                    # abort submission if Submit process is stuck
                    #pool.terminate()
                    KillPool(pool)
                    pool.join()
                    stopflag = True
                    # reduce timeout to finish quickly
                    timeout = 0.1
                    continue
                if job is None:
                    self.log.error("%s: no job defined for %d" %
                                   (task[1], task[0]))
                    continue
                jd = {}
                jd['arcstate'] = 'submitted'
                # initial offset to 1 minute to force first status check
                jd['tarcstate'] = self.db.getTimeStamp(
                    time.time() -
                    int(self.conf.get(['jobs', 'checkinterval'])) + 120)
                jd['tstate'] = self.db.getTimeStamp()
                # extract hostname of cluster (depends on JobID being a URL)
                self.log.info("%s: job id %s" % (task[1], job.JobID))
                jd['cluster'] = self.cluster
                self.db.updateArcJobLazy(task[0], jd, job)
            if not stopflag:
                pool.terminate()
                pool.join()
            else:
                # stop submitting, gsiftp connection problem likely
                raise ExceptInterrupt(15)

            self.log.info("threads finished")
            # commit transaction to release row locks
            self.db.Commit()

            # still proxy bug - exit if there are multiple proxies
            if len(self.db.getProxiesInfo('TRUE', ['id'])) > 1:
                raise ExceptInterrupt(15)

        self.log.info("end submitting")

        return
Esempio n. 14
0
    def submitJob(self,
                  executableFile,
                  proxy,
                  numberOfJobs=1,
                  inputs=None,
                  outputs=None):
        """Method to submit job"""

        # Assume that the ARC queues are always of the format nordugrid-<batchSystem>-<queue>
        # And none of our supported batch systems have a "-" in their name
        self.arcQueue = self.queue.split("-", 2)[2]
        result = self._prepareProxy()
        if not result["OK"]:
            self.log.error("ARCComputingElement: failed to set up proxy",
                           result["Message"])
            return result
        self.usercfg.ProxyPath(os.environ["X509_USER_PROXY"])

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(
                executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
                | stat.S_IROTH + stat.S_IXOTH)

        executables = None
        if self.preamble:
            executables = [executableFile]
            executableFile = self._bundlePreamble(executableFile)

        batchIDList = []
        stampDict = {}

        if self.endpointType == "Gridftp":
            endpoint = arc.Endpoint(str(self.ceHost + ":2811/jobs"),
                                    arc.Endpoint.JOBSUBMIT,
                                    "org.nordugrid.gridftpjob")
        else:
            endpoint = arc.Endpoint(
                str("https://" + self.ceHost + ":8443/arex"),
                arc.Endpoint.JOBSUBMIT,
                "org.ogf.glue.emies.activitycreation",
            )

        # Submit jobs iteratively for now. Tentatively easier than mucking around with the JobSupervisor class
        for __i in range(numberOfJobs):
            # The basic job description
            jobdescs = arc.JobDescriptionList()
            # Get the job into the ARC way
            xrslString, diracStamp = self._writeXRSL(executableFile, inputs,
                                                     outputs, executables)
            self.log.debug("XRSL string submitted : %s" % xrslString)
            self.log.debug("DIRAC stamp for job : %s" % diracStamp)
            # The arc bindings don't accept unicode objects in Python 2 so xrslString must be explicitly cast
            result = arc.JobDescription_Parse(str(xrslString), jobdescs)
            if not result:
                self.log.error("Invalid job description",
                               "%r, message=%s" % (xrslString, result.str()))
                break
            # Submit the job
            jobs = arc.JobList()  # filled by the submit process
            submitter = arc.Submitter(self.usercfg)
            result = submitter.Submit(endpoint, jobdescs, jobs)
            # Save info or else ..else.
            if result == arc.SubmissionStatus.NONE:
                # Job successfully submitted
                pilotJobReference = jobs[0].JobID
                batchIDList.append(pilotJobReference)
                stampDict[pilotJobReference] = diracStamp
                self.log.debug("Successfully submitted job %s to CE %s" %
                               (pilotJobReference, self.ceHost))
            else:
                self._analyzeSubmissionError(result)
                break  # Boo hoo *sniff*

        if self.preamble:
            os.unlink(executableFile)

        if batchIDList:
            result = S_OK(batchIDList)
            result["PilotStampDict"] = stampDict
        else:
            result = S_ERROR(
                "No pilot references obtained from the ARC job submission")
        return result
Esempio n. 15
0
    def submitJob(self,
                  executableFile,
                  proxy,
                  numberOfJobs=1,
                  inputs=None,
                  outputs=None):
        """Method to submit job"""

        # Assume that the ARC queues are always of the format nordugrid-<batchSystem>-<queue>
        # And none of our supported batch systems have a "-" in their name
        self.arcQueue = self.queue.split("-", 2)[2]
        result = self._prepareProxy()
        if not result["OK"]:
            self.log.error("ARCComputingElement: failed to set up proxy",
                           result["Message"])
            return result
        self.usercfg.ProxyPath(os.environ["X509_USER_PROXY"])

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(
                executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
                | stat.S_IROTH + stat.S_IXOTH)

        executables = None
        if self.preamble:
            executables = [executableFile]
            executableFile = self._bundlePreamble(executableFile)

        batchIDList = []
        stampDict = {}

        if self.endpointType == "Gridftp":
            endpoint = arc.Endpoint(str(self.ceHost + ":2811/jobs"),
                                    arc.Endpoint.JOBSUBMIT,
                                    "org.nordugrid.gridftpjob")
        else:
            endpoint = arc.Endpoint(
                str("https://" + self.ceHost + ":8443/arex"),
                arc.Endpoint.JOBSUBMIT,
                "org.ogf.glue.emies.activitycreation",
            )

        # Submit jobs iteratively for now. Tentatively easier than mucking around with the JobSupervisor class
        for __i in range(numberOfJobs):
            # The basic job description
            jobdescs = arc.JobDescriptionList()
            # Get the job into the ARC way
            xrslString, diracStamp = self.__writeXRSL(executableFile, inputs,
                                                      outputs, executables)
            self.log.debug("XRSL string submitted : %s" % xrslString)
            self.log.debug("DIRAC stamp for job : %s" % diracStamp)
            # The arc bindings don't accept unicode objects in Python 2 so xrslString must be explicitly cast
            result = arc.JobDescription_Parse(str(xrslString), jobdescs)
            if not result:
                self.log.error("Invalid job description",
                               "%r, message=%s" % (xrslString, result.str()))
                break
            # Submit the job
            jobs = arc.JobList()  # filled by the submit process
            submitter = arc.Submitter(self.usercfg)
            result = submitter.Submit(endpoint, jobdescs, jobs)
            # Save info or else ..else.
            if result == arc.SubmissionStatus.NONE:
                # Job successfully submitted
                pilotJobReference = jobs[0].JobID
                batchIDList.append(pilotJobReference)
                stampDict[pilotJobReference] = diracStamp
                self.log.debug("Successfully submitted job %s to CE %s" %
                               (pilotJobReference, self.ceHost))
            else:
                message = "Failed to submit job because "
                if result.isSet(arc.SubmissionStatus.NOT_IMPLEMENTED):  # pylint: disable=no-member
                    self.log.warn(
                        "%s feature not implemented on CE? (weird I know - complain to site admins"
                        % message)
                if result.isSet(arc.SubmissionStatus.NO_SERVICES):  # pylint: disable=no-member
                    self.log.warn(
                        "%s no services are running on CE? (open GGUS ticket to site admins"
                        % message)
                if result.isSet(arc.SubmissionStatus.ENDPOINT_NOT_QUERIED):  # pylint: disable=no-member
                    self.log.warn(
                        "%s endpoint was not even queried. (network ..?)" %
                        message)
                if result.isSet(arc.SubmissionStatus.BROKER_PLUGIN_NOT_LOADED):  # pylint: disable=no-member
                    self.log.warn(
                        "%s BROKER_PLUGIN_NOT_LOADED : ARC library installation problem?"
                        % message)
                if result.isSet(
                        arc.SubmissionStatus.DESCRIPTION_NOT_SUBMITTED):  # pylint: disable=no-member
                    self.log.warn(
                        "%s Job not submitted - incorrect job description? (missing field in XRSL string?)"
                        % message)
                if result.isSet(
                        arc.SubmissionStatus.SUBMITTER_PLUGIN_NOT_LOADED):  # pylint: disable=no-member
                    self.log.warn(
                        "%s SUBMITTER_PLUGIN_NOT_LOADED : ARC library installation problem?"
                        % message)
                if result.isSet(arc.SubmissionStatus.AUTHENTICATION_ERROR):  # pylint: disable=no-member
                    self.log.warn(
                        "%s authentication error - screwed up / expired proxy? Renew / upload pilot proxy on machine?"
                        % message)
                if result.isSet(arc.SubmissionStatus.ERROR_FROM_ENDPOINT):  # pylint: disable=no-member
                    self.log.warn(
                        "%s some error from the CE - possibly CE problems?" %
                        message)
                self.log.warn("%s ... maybe above messages will give a hint." %
                              message)
                break  # Boo hoo *sniff*

        if self.preamble:
            os.unlink(executableFile)

        if batchIDList:
            result = S_OK(batchIDList)
            result["PilotStampDict"] = stampDict
        else:
            result = S_ERROR(
                "No pilot references obtained from the ARC job submission")
        return result
Esempio n. 16
0
    def submitJob(self, executableFile, proxy, numberOfJobs=1):
        """Method to submit job"""

        # Assume that the ARC queues are always of the format nordugrid-<batchSystem>-<queue>
        # And none of our supported batch systems have a "-" in their name
        self.arcQueue = self.queue.split("-", 2)[2]
        result = self._prepareProxy()
        if not result["OK"]:
            self.log.error("ARCComputingElement: failed to set up proxy", result["Message"])
            return result
        self.usercfg.ProxyPath(os.environ["X509_USER_PROXY"])

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH + stat.S_IXOTH)

        batchIDList = []
        stampDict = {}

        # Creating an endpoint
        endpoint = arc.Endpoint(self.ceHost, arc.Endpoint.COMPUTINGINFO, "org.nordugrid.ldapglue2")

        # Get the ExecutionTargets of the ComputingElement (Can be REST, EMI-ES or GRIDFTP)
        retriever = arc.ComputingServiceRetriever(self.usercfg, [endpoint])
        retriever.wait()
        targetsWithQueues = list(retriever.GetExecutionTargets())

        # Targets also include queues
        # To avoid losing time trying to submit to queues we cannot interact with, we only keep the interesting ones
        targets = []
        for target in targetsWithQueues:
            if target.ComputingShare.Name == self.arcQueue:
                self.log.debug(
                    "Adding target:",
                    "%s (%s)" % (target.ComputingEndpoint.URLString, target.ComputingEndpoint.InterfaceName),
                )
                targets.append(target)

        # At this point, we should have GRIDFTP and AREX (EMI-ES and REST) targets related to arcQueue
        # We intend to submit to AREX first, if it does not work, GRIDFTP is used
        submissionWorked = False
        for target in targets:
            # If the submission is already done, we stop
            if submissionWorked:
                break

            for __i in range(numberOfJobs):

                # The basic job description
                jobdescs = arc.JobDescriptionList()

                # Get the job into the ARC way
                xrslString, diracStamp = self._writeXRSL(executableFile)
                self.log.debug("XRSL string submitted : %s" % xrslString)
                self.log.debug("DIRAC stamp for job : %s" % diracStamp)

                # The arc bindings don't accept unicode objects in Python 2 so xrslString must be explicitly cast
                result = arc.JobDescription_Parse(str(xrslString), jobdescs)
                if not result:
                    self.log.error("Invalid job description", "%r, message=%s" % (xrslString, result.str()))
                    break

                # Submit the job
                job = arc.Job()
                result = target.Submit(self.usercfg, jobdescs[0], job)

                # Save info or else ..else.
                if result == arc.SubmissionStatus.NONE:
                    # Job successfully submitted
                    pilotJobReference = job.JobID
                    batchIDList.append(pilotJobReference)
                    stampDict[pilotJobReference] = diracStamp
                    submissionWorked = True
                    self.log.debug("Successfully submitted job %s to CE %s" % (pilotJobReference, self.ceHost))
                else:
                    self._analyzeSubmissionError(result)
                    break  # Boo hoo *sniff*

        if batchIDList:
            result = S_OK(batchIDList)
            result["PilotStampDict"] = stampDict
        else:
            result = S_ERROR("No pilot references obtained from the ARC job submission")
        return result