Esempio n. 1
0
    def surls2metalink(self,replicas,metalinkFile):
        """ Convert list of replicas (of multiple files) to metalink
        Input argument, replicas, is dict with guid as key, and a list of surls
        Mappings from surl to https turl will come from ddm eventually
        to cover surls from remote SEs.
        For now just add the mapping for the local SE from copysetup.
        """
	site_name=self.site_name
	local_se_token=site_name+"_DATADISK"
	tolog("local SE token: %s"%(local_se_token))
       # self.surl2https_map has key is srm hostname, then tuple of (from,to) regexp replace
        dirAcc = getDirectAccessDic(readpar('copysetupin'))
        if not dirAcc:
          dirAcc = getDirectAccessDic(readpar('copysetup'))
       # extract srm host for key
        srmhost=None
        if dirAcc:
          srmhost = self.hostFromSurl(dirAcc['oldPrefix'])
        for guid in replicas.keys():
          reps = replicas[guid]
          tolog("Got replicas=%s for guid=%s" % (str(reps), guid))
	
        try:
			token_file=open('token_file', 'r')
        except IOError, e:
			tolog ("!!WARNING!! Failed to open file: %s"%(e))
			raise Exception("!!FAILED!!1099!! Cannot open file with token!")
Esempio n. 2
0
    def _check_space(self, ub):
        """Checking space of a local directory"""

        # "source setup.sh"
        if self._setup:
            _setup_str = "source %s; " % self._setup
        else:
            _setup_str = ''

        fail = 0
        ret = ''
        if ub == "" or ub == "None" or ub == None:
            # seprodpath can have a complex structure in case of space tokens
            # although currently not supported in this site mover, prepare the code anyway
            # (use the first list item only)
            dst_loc_se = self.getDirList(readpar('seprodpath'))[0]
            if dst_loc_se == "":
                dst_loc_se = readpar('sepath')
            if dst_loc_se == "":
                tolog(
                    "WARNING: Can not perform alternative space check since sepath is not set"
                )
                return -1
            else:
                tolog("Attempting to use df for checking SE space: %s" %
                      (dst_loc_se))
                return self.check_space_df(dst_loc_se)
        else:
            try:
                f = urllib.urlopen(ub + '/storages/default')
            except Exception, e:
                tolog('!!WARNING!!2999!! Fetching default storage failed!')
                return -1
            else:
Esempio n. 3
0
    def getFileTransferInfo(self, transferType, buildJob):
        """ Get all relevant fields related to file transfer """

        copysetup = readpar('copysetupin')

        # create the direct access dictionary
        fileTransferInfo = getDirectAccessDic(copysetup)

        # if copysetupin did not contain direct access info, try the copysetup instead
        if not fileTransferInfo:
            copysetup = readpar('copysetup')
            fileTransferInfo = getDirectAccessDic(copysetup)

        # should the copytool be used?
        useCopyTool = False
        useFileStager = False
        useDirectAccess = False
        oldPrefix = ""
        newPrefix = ""
        dInfo = None
        if fileTransferInfo:
            dInfo = True
            # no direct access / remote I/O, use standard copytool (copy-to-scratch)
            if fileTransferInfo['useCopyTool']:
                useCopyTool = True
            # do not set the LFC host for file stager
            if fileTransferInfo['useFileStager']:
                useFileStager = True
            if fileTransferInfo['directIn']:
                useDirectAccess = True

            oldPrefix = fileTransferInfo['oldPrefix']
            newPrefix = fileTransferInfo['newPrefix']

        # override settings for transferType direct
        if transferType == 'direct':
            useCopyTool = False
            useFileStager = False
            useDirectAccess = True

        # should pilot create TURL based PFC? (not done here, but setup needs to be aware of it)
        # if dInfo and useDirectAccess and oldPrefix == "" and newPrefix == "":
        if (transferType == 'direct' or (useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == "") and not buildJob:
#        if (transferType == 'direct' or (not useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == ""):
            usePFCTurl = True
        else:
            usePFCTurl = False

        # force usePFCTurl for all jobs
        if not buildJob and useDirectAccess:
            tolog("Forced usePFCTurl (reset old/newPrefix)")
            usePFCTurl = True
            oldPrefix = ""
            newPrefix = ""

        if os.environ.get("TestXRootD", 'False') == 'True':
            import re
            re.sub(r'\/xrootdsetup\.sh', '/xrootdsetup-dev.sh', copysetup)

        return dInfo, useCopyTool, useDirectAccess, useFileStager, oldPrefix, newPrefix, copysetup, usePFCTurl
Esempio n. 4
0
def extractSingularityOptions():
    """ Extract any singularity options from catchall """

    # e.g. catchall = "somestuff singularity_options=\'-B /etc/grid-security/certificates,/var/spool/slurmd,/cvmfs,/ceph/grid,/data0,/sys/fs/cgroup\'"
    #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" #readpar("catchall")

    # ${workdir} should be there, otherwise the pilot cannot add the current workdir
    # if not there, add it

    # First try with reading new parameters from schedconfig
    container_options = readpar("container_options")
    if container_options == "":
        tolog(
            "container_options either does not exist in queuedata or is empty, trying with catchall instead"
        )
        catchall = readpar("catchall")
        #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'"

        pattern = re.compile(r"singularity\_options\=\'?\"?(.+)\'?\"?")
        found = re.findall(pattern, catchall)
        if len(found) > 0:
            container_options = found[0]

    if container_options != "":
        if container_options.endswith("'") or container_options.endswith('"'):
            container_options = container_options[:-1]
        # add the workdir if missing
        if not "${workdir}" in container_options and " --contain" in container_options:
            container_options = container_options.replace(
                " --contain", ",${workdir} --contain")
            tolog("Note: added missing ${workdir} to singularity_options")

    return container_options
Esempio n. 5
0
def updateCopysetups(cmd3,
                     transferType=None,
                     useCT=None,
                     directIn=None,
                     useFileStager=None):
    """ Update the relevant copysetup fields for remote I/O or file stager """

    si = SiteInformation()

    _copysetupin = readpar('copysetupin')
    _copysetup = readpar('copysetup')

    if _copysetupin != "":
        si.updateCopysetup(cmd3,
                           'copysetupin',
                           _copysetupin,
                           transferType=transferType,
                           useCT=useCT,
                           directIn=directIn,
                           useFileStager=useFileStager)
    else:
        si.updateCopysetup(cmd3,
                           'copysetup',
                           _copysetup,
                           transferType=transferType,
                           useCT=useCT,
                           directIn=directIn,
                           useFileStager=useFileStager)
Esempio n. 6
0
def getFAXRedirectors(computingSite, sourceSite, jobId):
    """ Get the FAX redirectors primarily from the google server, fall back to schedconfig.faxredirector value """

    fax_redirectors_dictionary = {}

    # Is the sourceSite set?
    if sourceSite and sourceSite.lower() != 'null':
        # Get the FAX redirectors (if the method returns an empty dictionary, the keys and values will be set below)
        fax_redirectors_dictionary = _getFAXRedirectors(computingSite, sourceSite, jobId)

        # Verify the dictionary
        if fax_redirectors_dictionary.has_key('computingsite') and fax_redirectors_dictionary['computingsite'] != None:
            if fax_redirectors_dictionary['computingsite'] == "" or fax_redirectors_dictionary['computingsite'].lower() == "null":
                fax_redirectors_dictionary['computingsite'] = readpar('faxredirector')
                tolog("!!WARNING!!5555!! FAX computingsite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['computingsite'])
        else:
            fax_redirectors_dictionary['computingsite'] = readpar('faxredirector')
            tolog("!!WARNING!!5556!! FAX computingsite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['computingsite'])
        if fax_redirectors_dictionary.has_key('sourcesite') and fax_redirectors_dictionary['sourcesite'] != None:
            if fax_redirectors_dictionary['sourcesite'] == "" or fax_redirectors_dictionary['sourcesite'].lower() == "null":
                fax_redirectors_dictionary['sourcesite'] = readpar('faxredirector')
                tolog("!!WARNING!!5555!! FAX sourcesite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['sourcesite'])
        else:
            fax_redirectors_dictionary['sourcesite'] = readpar('faxredirector')
            tolog("!!WARNING!!5556!! FAX aourcesite is unknown, using default AGIS value (%s)" % fax_redirectors_dictionary['sourcesite'])

    else:
        tolog("sourceSite is not set, use faxredirector value from AGIS")

        _faxredirector = readpar('faxredirector')
        _faxredirector = updateRedirector(_faxredirector)
        fax_redirectors_dictionary['computingsite'] = _faxredirector
        fax_redirectors_dictionary['sourcesite'] = _faxredirector

    return fax_redirectors_dictionary
Esempio n. 7
0
def extractSingularityOptions():
    """ Extract any singularity options from catchall """

    # e.g. catchall = "somestuff singularity_options=\'-B /etc/grid-security/certificates,/var/spool/slurmd,/cvmfs,/ceph/grid,/data0,/sys/fs/cgroup\'"
    #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" #readpar("catchall")

    # ${workdir} should be there, otherwise the pilot cannot add the current workdir
    # if not there, add it

    # First try with reading new parameters from schedconfig
    container_options = readpar("container_options")
    if container_options == "":
        tolog("container_options either does not exist in queuedata or is empty, trying with catchall instead")
        catchall = readpar("catchall")
        #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'"

        pattern = re.compile(r"singularity\_options\=\'?\"?(.+)\'?\"?")
        found = re.findall(pattern, catchall)
        if len(found) > 0:
            container_options = found[0]

    if container_options != "":
        if container_options.endswith("'") or container_options.endswith('"'):
            container_options = container_options[:-1]
        # add the workdir if missing
        if not "${workdir}" in container_options and " --contain" in container_options:
            container_options = container_options.replace(" --contain", ",${workdir} --contain")
            tolog("Note: added missing ${workdir} to singularity_options")

    return container_options
Esempio n. 8
0
    def getFileTransferInfo(self, transferType, buildJob):
        """ Get all relevant fields related to file transfer """

        copysetup = readpar('copysetupin')

        # create the direct access dictionary
        fileTransferInfo = getDirectAccessDic(copysetup)

        # if copysetupin did not contain direct access info, try the copysetup instead
        if not fileTransferInfo:
            copysetup = readpar('copysetup')
            fileTransferInfo = getDirectAccessDic(copysetup)

        # should the copytool be used?
        useCopyTool = False
        useFileStager = False
        useDirectAccess = False
        oldPrefix = ""
        newPrefix = ""
        dInfo = None
        if fileTransferInfo:
            dInfo = True
            # no direct access / remote I/O, use standard copytool (copy-to-scratch)
            if fileTransferInfo['useCopyTool']:
                useCopyTool = True
            # do not set the LFC host for file stager
            if fileTransferInfo['useFileStager']:
                useFileStager = True
            if fileTransferInfo['directIn']:
                useDirectAccess = True

            oldPrefix = fileTransferInfo['oldPrefix']
            newPrefix = fileTransferInfo['newPrefix']

        # override settings for transferType direct
        if transferType == 'direct':
            useCopyTool = False
            useFileStager = False
            useDirectAccess = True

        # should pilot create TURL based PFC? (not done here, but setup needs to be aware of it)
        # if dInfo and useDirectAccess and oldPrefix == "" and newPrefix == "":
        if (transferType == 'direct' or (useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == "") and not buildJob:
#        if (transferType == 'direct' or (not useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == ""):
            usePFCTurl = True
        else:
            usePFCTurl = False

        # force usePFCTurl for all jobs
        if not buildJob and useDirectAccess:
            tolog("Forced usePFCTurl (reset old/newPrefix)")
            usePFCTurl = True
            oldPrefix = ""
            newPrefix = ""

        if os.environ.get("TestXRootD", 'False') == 'True':
            import re
            re.sub(r'\/xrootdsetup\.sh', '/xrootdsetup-dev.sh', copysetup)

        return dInfo, useCopyTool, useDirectAccess, useFileStager, oldPrefix, newPrefix, copysetup, usePFCTurl
    def surls2metalink(self, replicas, metalinkFile):
        """ Convert list of replicas (of multiple files) to metalink
        Input argument, replicas, is dict with guid as key, and a list of surls
        Mappings from surl to https turl will come from ddm eventually
        to cover surls from remote SEs.
        For now just add the mapping for the local SE from copysetup.
        """
        site_name = self.site_name
        local_se_token = site_name + "_DATADISK"
        tolog("local SE token: %s" % (local_se_token))
        # self.surl2https_map has key is srm hostname, then tuple of (from,to) regexp replace
        dirAcc = getDirectAccessDic(readpar('copysetupin'))
        if not dirAcc:
            dirAcc = getDirectAccessDic(readpar('copysetup'))

    # extract srm host for key
        srmhost = None
        if dirAcc:
            srmhost = self.hostFromSurl(dirAcc['oldPrefix'])
        for guid in replicas.keys():
            reps = replicas[guid]
            tolog("Got replicas=%s for guid=%s" % (str(reps), guid))

        try:
            token_file = open('token_file', 'r')
        except IOError, e:
            tolog("!!WARNING!! Failed to open file: %s" % (e))
            raise Exception("!!FAILED!!1099!! Cannot open file with token!")
Esempio n. 10
0
    def _check_space(self, ub):
        """Checking space of a local directory"""

        # "source setup.sh"
        if self._setup:
            _setup_str = "source %s; " % self._setup
        else:
            _setup_str = ''

        fail = 0
        ret = ''
        if ub == "" or ub == "None" or ub == None:
            # seprodpath can have a complex structure in case of space tokens
            # although currently not supported in this site mover, prepare the code anyway
            # (use the first list item only)
            dst_loc_se = self.getDirList(readpar('seprodpath'))[0]
            if dst_loc_se == "":
                dst_loc_se = readpar('sepath')
            if dst_loc_se == "":
                tolog("WARNING: Can not perform alternative space check since sepath is not set")
                return -1
            else:
                tolog("Attempting to use df for checking SE space: %s" % (dst_loc_se))
                return self.check_space_df(dst_loc_se)
        else:
            try:
                f = urllib.urlopen(ub + '/storages/default')
            except Exception, e:
                tolog('!!WARNING!!2999!! Fetching default storage failed!')
                return -1
            else:
Esempio n. 11
0
def updateCopysetups(cmd3, transferType=None, useCT=None, directIn=None, useFileStager=None):
    """ Update the relevant copysetup fields for remote I/O or file stager """

    si = SiteInformation()

    _copysetupin = readpar('copysetupin')
    _copysetup = readpar('copysetup')

    if _copysetupin != "":
        si.updateCopysetup(cmd3, 'copysetupin', _copysetupin, transferType=transferType, useCT=useCT, directIn=directIn, useFileStager=useFileStager)
    else:
        si.updateCopysetup(cmd3, 'copysetup', _copysetup, transferType=transferType, useCT=useCT, directIn=directIn, useFileStager=useFileStager)
Esempio n. 12
0
    def getFileTransferInfo(self, transferType, buildJob):
        """ Get all relevant fields related to file transfer """

        copysetup = readpar('copysetupin')

        # create the direct access dictionary
        fileTransferInfo = getDirectAccessDic(copysetup)

        # if copysetupin did not contain direct access info, try the copysetup instead
        if not fileTransferInfo:
            copysetup = readpar('copysetup')
            fileTransferInfo = getDirectAccessDic(copysetup)

        # should the copytool be used?
        useCopyTool = False
        useFileStager = False
        useDirectAccess = False
        lfcHost = readpar('lfchost')
        oldPrefix = ""
        newPrefix = ""
        dInfo = None
        if fileTransferInfo:
            dInfo = True
            # no direct access / remote I/O, use standard copytool (copy-to-scratch)
            if fileTransferInfo['useCopyTool']:
                useCopyTool = True
            # do not set the LFC host for file stager
            if fileTransferInfo['useFileStager']:
                useFileStager = True
            if fileTransferInfo['directIn']:
                useDirectAccess = True

            oldPrefix = fileTransferInfo['oldPrefix']
            newPrefix = fileTransferInfo['newPrefix']

        # override settings for transferType direct
        if transferType == 'direct':
            useCopyTool = False
            useFileStager = False
            useDirectAccess = True
            if oldPrefix == "" and newPrefix == "":
                lfcHost = ""

        # should pilot create TURL based PFC? (not done here, but setup needs to be aware of it)
        # if dInfo and useDirectAccess and oldPrefix == "" and newPrefix == "":
        if (transferType == 'direct' or (useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == "") and not buildJob:
#        if (transferType == 'direct' or (not useFileStager and useDirectAccess)) and (oldPrefix == "" and newPrefix == ""):
            usePFCTurl = True
        else:
            usePFCTurl = False

        return dInfo, useCopyTool, useDirectAccess, useFileStager, oldPrefix, newPrefix, copysetup, usePFCTurl, lfcHost
Esempio n. 13
0
    def surls2metalink(self,replicas,metalinkFile):
        """ Convert list of replicas (of multiple files) to metalink
        Input argument, replicas, is dict with guid as key, and a list of surls
        Mappings from surl to https turl will come from ddm eventually
        to cover surls from remote SEs.
        For now just add the mapping for the local SE from copysetup.
        """
       # self.surl2https_map has key is srm hostname, then tuple of (from,to) regexp replace
        
        dirAcc = getDirectAccessDic(readpar('copysetupin'))
        if not dirAcc:
          dirAcc = getDirectAccessDic(readpar('copysetup'))
       # extract srm host for key
        if dirAcc:
          srmhost = self.hostFromSurl(dirAcc['oldPrefix'])
        if srmhost:
          self.surl2https_map[srmhost] = (dirAcc['oldPrefix'],dirAcc['newPrefix'])

          
       # Start building metalink
        metalink='<?xml version="1.0" encoding="utf-8"?>\n'
        metalink+='<metalink version="3.0" generator="Pilot" xmlns="http://www.metalinker.org/">\n'
        metalink+='<files>\n'
        for guid in replicas.keys():
          reps = replicas[guid]
         # surl can have __DQ2blah at the end - strip it 
          name = reps[0].sfn.split('/')[-1]
          extindex = name.rfind('__DQ2-')
          if extindex > 0: name = name[:extindex]
          metalink+='<file name="%s">\n'%name
          metalink+='<size>%s</size>'%reps[0].filesize
          metalink+='<verification><hash type="adler32">%s</hash></verification>\n'%reps[0].csumvalue
          metalink+='<resources>\n'
         # if the surl matches a list of https sites, then add a url 
          for rep in reps:
            srmhost =  self.hostFromSurl(rep.sfn)
            if srmhost in self.surl2https_map.keys():
              pair = self.surl2https_map[srmhost]
              metalink+='<url type="https" >%s</url>\n'% \
                                       re.sub(pair[0],pair[1],rep.sfn)
            else:
              tolog("Not found: %s"%rep.sfn)
          metalink+='</resources></file>\n'

        metalink+='</files></metalink>\n'
        print metalink
        mlfile = open(metalinkFile,'w')
        mlfile.write(metalink)
        mlfile.close()
Esempio n. 14
0
    def getProdSourceLabel(self):
        """ determine the job type """

        prodSourceLabel = None
 
        # not None value; can be user (user analysis job), ddm (panda mover job, sitename should contain DDM)
        # test will return a testEvgen/testReco job, ptest will return a job sent with prodSourceLabel ptest
        if self.__env['uflag']:
            if self.__env['uflag'] == 'self' or self.__env['uflag'] == 'ptest':
                if self.__env['uflag'] == 'ptest':
                    prodSourceLabel = self.__env['uflag']
                elif self.__env['uflag'] == 'self':
                    prodSourceLabel = 'user'
            else:
                prodSourceLabel = self.__env['uflag']

        # for PandaMover jobs the label must be ddm
        if "DDM" in self.__env['thisSite'].sitename or (self.__env['uflag'] == 'ddm' and self.__env['thisSite'].sitename == 'BNL_ATLAS_test'):
            prodSourceLabel = 'ddm'
        elif "Install" in self.__env['thisSite'].sitename:  # old, now replaced with prodSourceLabel=install
            prodSourceLabel = 'software'
        if pUtil.readpar('status').lower() == 'test' and self.__env['uflag'] != 'ptest' and self.__env['uflag'] != 'ddm':
            prodSourceLabel = 'test'

        # override for release candidate pilots
        if self.__env['pilot_version_tag'] == "RC":
            prodSourceLabel = "rc_test"
        if self.__env['pilot_version_tag'] == "DDM":
            prodSourceLabel = "ddm"

        return prodSourceLabel
Esempio n. 15
0
    def getProdSourceLabel(self):
        """ determine the job type """

        prodSourceLabel = None
 
        # not None value; can be user (user analysis job), ddm (panda mover job, sitename should contain DDM)
        # test will return a testEvgen/testReco job, ptest will return a job sent with prodSourceLabel ptest
        if self.__env['uflag']:
            if self.__env['uflag'] == 'self' or self.__env['uflag'] == 'ptest':
                if self.__env['uflag'] == 'ptest':
                    prodSourceLabel = self.__env['uflag']
                elif self.__env['uflag'] == 'self':
                    prodSourceLabel = 'user'
            else:
                prodSourceLabel = self.__env['uflag']

        # for PandaMover jobs the label must be ddm
        if "DDM" in self.__env['thisSite'].sitename or (self.__env['uflag'] == 'ddm' and self.__env['thisSite'].sitename == 'BNL_ATLAS_test'):
            prodSourceLabel = 'ddm'
        elif "Install" in self.__env['thisSite'].sitename:  # old, now replaced with prodSourceLabel=install
            prodSourceLabel = 'software'
        if pUtil.readpar('status').lower() == 'test' and self.__env['uflag'] != 'ptest' and self.__env['uflag'] != 'ddm':
            prodSourceLabel = 'test'

        # override for release candidate pilots
        if self.__env['pilot_version_tag'] == "RC":
            prodSourceLabel = "rc_test"
        if self.__env['pilot_version_tag'] == "DDM":
            prodSourceLabel = "ddm"

        return prodSourceLabel
 def addMD5sum(self, lfn, md5sum):
     """ add md5sum to lfn """
     if os.environ.has_key('LD_LIBRARY_PATH'):
         tolog("LD_LIBRARY_PATH prior to lfc import: %s" %
               os.environ['LD_LIBRARY_PATH'])
     else:
         tolog(
             "!!WARNING!!2999!! LD_LIBRARY_PATH not set prior to lfc import"
         )
     import lfc
     os.environ['LFC_HOST'] = readpar('lfchost')
     stat = lfc.lfc_filestatg()
     exitcode = lfc.lfc_statg(lfn, "", stat)
     if exitcode != 0:
         #    print "error:",buffer
         err_num = lfc.cvar.serrno
         tolog("!!WARNING!!2999!! lfc.lfc_statg: %d %s" % (err_num, lfn))
         return exitcode
     exitcode = lfc.lfc_setfsizeg(stat.guid, stat.filesize, 'MD', md5sum)
     if exitcode != 0:
         #    print "error:",buffer
         err_num = lfc.cvar.serrno
         tolog("[Non-fatal] ERROR: lfc.lfc_setfsizeg: %d %s %s" %
               (err_num, lfn, md5sum))
         return exitcode
     tolog("Successfully set md5sum for %s" % (lfn))
     return exitcode
Esempio n. 17
0
    def getGlobalFilePaths(self, dsname):
        """ Get the global file paths using to_native_lfn """

        tolog("Guessing the global path using to_native_lfn()..")

        # this method will in fact only ever return a single path, but keep 'paths' as a list for consistency with getGlobalFilePathsDQ2()
        paths = []

        # get the global redirector
        redirector = readpar("faxredirector")  # 'root://glrd.usatlas.org/'

        # correct the redirector in case the protocol and/or trailing slash are missing
        redirector = self.updateRedirector(redirector)

        # get the pre-path
        native_path = self.to_native_lfn(dsname, "DUMMYLFN")
        native_path = native_path.replace("DUMMYLFN", "")  # the real lfn will be added by the caller

        # remove the /grid substring
        native_path = native_path.replace("/grid", "")

        # construct the global path
        paths.append(redirector + native_path)

        tolog("Will use global path: %s" % (paths[0]))

        return paths
Esempio n. 18
0
def getObjectstoresListXXX(queuename):
    """ Get the objectstores list from the proper queuedata for the relevant queue """
    # queuename is needed as long as objectstores field is not available in normal queuedata (temporary)

    objectstores = None

    # First try to get the objectstores field from the normal queuedata
    try:
        from pUtil import readpar

        _objectstores = readpar("objectstores")
    except:
        # tolog("Field \'objectstores\' not yet available in queuedata")
        _objectstores = None

    # Get the field from AGIS
    if not _objectstores:
        s = True
        # Download the new queuedata in case it has not been downloaded already
        if not os.path.exists(getNewQueuedataFilename()):
            s = getNewQueuedata(queuename)
        if s:
            _objectstores = getField("objectstores")

    if _objectstores:
        objectstores = _objectstores

    return objectstores
Esempio n. 19
0
def getObjectstoresListXXX(queuename):
    """ Get the objectstores list from the proper queuedata for the relevant queue """
    # queuename is needed as long as objectstores field is not available in normal queuedata (temporary)

    objectstores = None

    # First try to get the objectstores field from the normal queuedata
    try:
        from pUtil import readpar
        _objectstores = readpar('objectstores')
    except:
        #tolog("Field \'objectstores\' not yet available in queuedata")
        _objectstores = None

    # Get the field from AGIS
    if not _objectstores:
        s = True
        # Download the new queuedata in case it has not been downloaded already
        if not os.path.exists(getNewQueuedataFilename()):
            s = getNewQueuedata(queuename)
        if s:
            _objectstores = getField('objectstores')

    if _objectstores:
        objectstores = _objectstores

    return objectstores
Esempio n. 20
0
    def getSubprocessName(self, eventService):
        """ Select which subprocess is to be run by the Monitor """

        # The default subprocess is RunJob (name='Normal', which performs payload setup, stage-in, payload execution and stage-out).
        # An alternative subprocess is the runEvent module which downloads events from an Event Server, executes a payload
        # and stages ou output files asynchronously as they are ready.
        # Note: send the entire job object to this method since there might be other subprocesses created at a later time which
        # will be identified by this method using some other job data member

        # Default subprocess name
        name = "RunJob"

        # Select alternative subprocess names for HPCs
        isHPC, _name = extractHPCInfo(readpar('catchall'))
        if isHPC:
            name = "RunJob" + _name # e.g. "RunJobTitan" is the proper subprocess name for the Titan plug-in

        # for es merge jobs
        if _name == "Hpc":
            name = "RunJob"

        # Are we going to run an event service job?
        if eventService:
            tolog("Encountered an event service job")
            if isHPC:
                name = "RunJob%sEvent" % (_name)
            else:
                name = "RunJobEvent"

        tolog("Selected subprocess: %s" % (name))

        return name
Esempio n. 21
0
 def addMD5sum(self, lfn, md5sum):
     """ add md5sum to lfn """
     if os.environ.has_key('LD_LIBRARY_PATH'):
         tolog("LD_LIBRARY_PATH prior to lfc import: %s" % os.environ['LD_LIBRARY_PATH'])
     else:
         tolog("!!WARNING!!2999!! LD_LIBRARY_PATH not set prior to lfc import")
     import lfc
     os.environ['LFC_HOST'] = readpar('lfchost')
     #    b="."
     #    buffer = b.zfill(200)
     #    ret = lfc.lfc_seterrbuf(buffer, len(buffer))
     stat = lfc.lfc_filestatg()
     exitcode = lfc.lfc_statg(lfn, "", stat)
     if exitcode != 0:
         #    print "error:",buffer
         err_num = lfc.cvar.serrno
         tolog("!!WARNING!!2999!! lfc.lfc_statg: %d %s" % (err_num, lfn))
         return exitcode
     exitcode = lfc.lfc_setfsizeg(stat.guid, stat.filesize, 'MD', md5sum)
     if exitcode != 0:
         #    print "error:",buffer
         err_num = lfc.cvar.serrno
         tolog("[Non-fatal] ERROR: lfc.lfc_setfsizeg: %d %s %s" % (err_num, lfn, md5sum))
         return exitcode
     tolog("Successfully set md5sum for %s" % (lfn))
     return exitcode
Esempio n. 22
0
def extractSingularityOptions():
    """ Extract any singularity options from catchall """

    # e.g. catchall = "somestuff singularity_options=\'-B /etc/grid-security/certificates,/var/spool/slurmd,/cvmfs,/ceph/grid,/data0,/sys/fs/cgroup\'"
    #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'" #readpar("catchall")

    # ${workdir} should be there, otherwise the pilot cannot add the current workdir
    # if not there, add it

    catchall = readpar("catchall")
    #catchall = "singularity_options=\'-B /etc/grid-security/certificates,/cvmfs,${workdir} --contain\'"
    tolog("catchall: %s" % catchall)
    pattern = re.compile(r"singularity\_options\=\'?\"?(.+)\'?\"?")
    found = re.findall(pattern, catchall)
    if len(found) > 0:
        singularity_options = found[0]
        if singularity_options.endswith("'") or singularity_options.endswith(
                '"'):
            singularity_options = singularity_options[:-1]

        # add the workdir if missing
        if not "${workdir}" in singularity_options and " --contain" in singularity_options:
            singularity_options = singularity_options.replace(
                " --contain", ",${workdir} --contain")
            tolog("Note: added missing ${workdir} to singularity_options")
    else:
        singularity_options = ""

    return singularity_options
Esempio n. 23
0
    def getSubprocessName(self, eventService):
        """ Select which subprocess is to be run by the Monitor """

        # The default subprocess is RunJob (name='Normal', which performs payload setup, stage-in, payload execution and stage-out).
        # An alternative subprocess is the runEvent module which downloads events from an Event Server, executes a payload
        # and stages ou output files asynchronously as they are ready.
        # Note: send the entire job object to this method since there might be other subprocesses created at a later time which
        # will be identified by this method using some other job data member

        # Default subprocess name
        name = "RunJob"

        # Select alternative subprocess names for HPCs
        isHPC, _name = extractHPCInfo(readpar('catchall'))
        if isHPC:
            name = "RunJob" + _name  # e.g. "RunJobTitan" is the proper subprocess name for the Titan plug-in

        # for es merge jobs
        if _name and _name.startswith("Hpc"):
            name = "RunJob"

        # Are we going to run an event service job?
        if eventService:
            tolog("Encountered an event service job")
            if isHPC:
                name = "RunJob%sEvent" % (_name)
            else:
                name = "RunJobEvent"

        tolog("Selected subprocess: %s" % (name))

        return name
Esempio n. 24
0
    def fixStageInPath(self, path):
        """Fix the path"""

        if path[:3] == "srm" and '?SFN=' in path:
            self.log("Found SFN part in file path: %s" % (path))
        elif path[:3] == "srm":
            try:
                hostname = path.split('/',3)[2]
            except Exception as e:
                self.log("'!!WARNING!!2999!! Could not extract srm protocol for replacement, keeping path variable as it is: %s (%s)' %\
                      (path, str(e))")
            else:
                # srm = 'srm://head01.aglt2.org'
                srm = 'srm://' + hostname

                # does seopt contain any matching srm's?
                sematch = self.getSEMatchFromSEOpt(srm)
                if sematch != "":
                    path = path.replace(srm, sematch)
                    self.log("Replaced %s with %s (from seopt) in path: %s" % (srm, sematch, path))
                else:
                     se = readpar('se').split(",")[0]
                     _dummytoken, se = self.extractSE(se)
                     tolog("Using SE: %s" % (se))

                     path = path.replace(srm, se)
                     self.log("Replaced %s with %s (from se) in path: %s" % (srm, se, path))

                # add port number from se to getfile if necessary
                path = self.addPortToPath(se, path)
        return path
Esempio n. 25
0
    def fixStageInPath(self, path):
        """Fix the path"""

        if path[:3] == "srm" and '?SFN=' in path:
            self.log("Found SFN part in file path: %s" % (path))
        elif path[:3] == "srm":
            try:
                hostname = path.split('/', 3)[2]
            except Exception as e:
                self.log(
                    "'!!WARNING!!2999!! Could not extract srm protocol for replacement, keeping path variable as it is: %s (%s)' %\
                      (path, str(e))")
            else:
                # srm = 'srm://head01.aglt2.org'
                srm = 'srm://' + hostname

                # does seopt contain any matching srm's?
                sematch = self.getSEMatchFromSEOpt(srm)
                if sematch != "":
                    path = path.replace(srm, sematch)
                    self.log("Replaced %s with %s (from seopt) in path: %s" %
                             (srm, sematch, path))
                else:
                    se = readpar('se').split(",")[0]
                    _dummytoken, se = self.extractSE(se)
                    tolog("Using SE: %s" % (se))

                    path = path.replace(srm, se)
                    self.log("Replaced %s with %s (from se) in path: %s" %
                             (srm, se, path))

                # add port number from se to getfile if necessary
                path = self.addPortToPath(se, path)
        return path
    def getPreDestination(self, sitemover, analJob, token, prodSourceLabel, alt=False):
        """ get the pre destination """

        destination = ""
        if not analJob:
            # process the destination path with getDirList since it can have a complex structure
            # as well as be a list of destination paths matching a corresponding space token
            if prodSourceLabel == 'ddm' and readpar('seprodpath') == '':
                sepath = readpar('sepath', alt=alt)
            else:
                sepath = readpar('seprodpath', alt=alt)
            destinationList = sitemover.getDirList(sepath)

            # decide which destination path to use depending on the space token for the current file
            if token:
                # find the proper path
                destination = sitemover.getMatchingDestinationPath(token, destinationList, alt=alt)
                if destination == "":
                    tolog("!!WARNING!!2990!! seprodpath not properly defined: seprodpath = %s, destinationList = %s, using sepath instead" %\
                          (sepath, str(destinationList)))
                    sepath = readpar('sepath', alt=alt)
                    destinationList = sitemover.getDirList(sepath)
                    destination = sitemover.getMatchingDestinationPath(token, destinationList, alt=alt)
                    if destination == "":
                        tolog("!!WARNING!!2990!! sepath not properly defined: sepath = %s, destinationList = %s" %\
                              (sepath, str(destinationList)))
            else:
                # space tokens are not used
                destination = destinationList[0]
        else:
            sepath = readpar('sepath', alt=alt)
            destinationList = sitemover.getDirList(sepath)

            # decide which destination path to use depending on the space token for the current file
            if token:
                # find the proper path
                destination = sitemover.getMatchingDestinationPath(token, destinationList, alt=alt)
                if destination == "":
                    tolog("!!WARNING!!2990!! sepath not properly defined: sepath = %s, destinationList = %s" %\
                          (sepath, str(destinationList)))
            else:
                # space tokens are not used
                destination = destinationList[0]

        return destination
Esempio n. 27
0
    def core_get_data(self, envsetup, token, source_surl, local_fullname,
                      experiment):
        """ special get function developed for storm sites """

        error = PilotErrors()

        # Transform the surl into a full surl
        full_se_endpoint = self.extractSE(readpar('se').split(",")[0])[1]
        prefix = os.path.commonprefix([source_surl, full_se_endpoint])
        if prefix:
            # Can use the bdii-free form
            source_surl = full_se_endpoint + source_surl[len(prefix):]
            _cmd_str = '%s lcg-gt --nobdii --setype srmv2 "%s" file' % (
                envsetup, source_surl)
        else:
            # Fallback solution, use old lcg-gt form
            # get the TURL using the SURL
            tolog(
                "!!WARNING!1234!! Source surl does not match %s, cannot use the bdii-independent lcg-gt"
                % full_se_endpoint)
            _cmd_str = '%s lcg-gt "%s" file' % (envsetup, source_surl)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        s, o = commands.getstatusoutput(_cmd_str)
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Command finished after %f s" % (t))
        if s == 0:
            # get the experiment object
            thisExperiment = getExperiment(experiment)

            # add the full stage-out command to the job setup script
            to_script = _cmd_str
            to_script = to_script.lstrip(' ')  # remove any initial spaces
            if to_script.startswith('/'):
                to_script = 'source ' + to_script
            thisExperiment.updateJobSetupScript(
                os.path.dirname(local_fullname), to_script=to_script)

            source_turl, req_token = o.split('\n')
            source_turl = source_turl.replace('file://', '')
            tolog("Creating link from %s to %s" %
                  (source_turl, local_fullname))
            try:
                os.symlink(source_turl, local_fullname)
                _cmd_str = '%s lcg-sd %s %s 0' % (envsetup, source_surl,
                                                  req_token)
                tolog("Executing command: %s" % (_cmd_str))
                s, o = commands.getstatusoutput(_cmd_str)
                # Do we need to check the exit status of lcg-sd? What do we do if it fails?
                tolog("get_data succeeded")
            except Exception, e:
                pilotErrorDiag = "Exception caught: %s" % str(e)
                tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
                tolog("get_data failed")
                return error.ERR_STAGEINFAILED, pilotErrorDiag
Esempio n. 28
0
    def isTier3(self):
        """ Is the given site a Tier-3? """
        # Note: defined by DB

        if readpar('ddm') == "local":
            status = True
        else:
            status = False

        return status
    def isTier3(self):
        """ Is the given site a Tier-3? """
        # Note: defined by DB

        if readpar('ddm') == "local":
            status = True
        else:
            status = False

        return status
Esempio n. 30
0
    def setRecoveryDirs(self):
        """ Set the recovery directories """

        dirs = []
        if self.__site:
            _dir = self.__site.wntmpdir
        else:
            _dir = ""
        if _dir == "":
            if os.environ.has_key('TMPDIR'):
                _dir = os.environ['TMPDIR']
            elif os.environ.has_key('OSG_WN_TMP'):
                _dir = os.environ['OSG_WN_TMP']
            elif os.path.exists("/tmp"):
                _dir = "/tmp"
            elif os.path.exists("/scratch"):
                _dir = "/scratch"
            else:
                self.__pilotErrorDiag = "Could not locate any scratch dirs"
                tolog(self.__errorString % self.__pilotErrorDiag)
                _dir = ""
        if _dir != "":
            dirs.append(_dir)

        extradir = readpar('wntmpdir')
        if extradir != "" and extradir != "None" and extradir not in dirs:
            dirs.append(extradir)

        # check queuedata for external recovery directory
        # an empty externalRecoveryDir means that recovery should only search local WN disk for lost jobs
        # make sure the recovery directory actually exists (will not be added to dir list if empty)
        externalRecoveryDir = self.verifyRecoveryDir(readpar('recoverdir'))
        if externalRecoveryDir != "":
            dirs.append(externalRecoveryDir)

        if dirs != []:
            tolog("Job recovery will probe: %s" % str(dirs))
            self.__recoveryDirs = dirs
        else:
            self.__pilotErrorDiag = "Did not identify any base recovery directories"
            tolog(self.__errorString % self.__pilotErrorDiag)
Esempio n. 31
0
    def setRecoveryDirs(self):
        """ Set the recovery directories """

        dirs = []
        if self.__site:
            _dir = self.__site.wntmpdir
        else:
            _dir = ""
        if _dir == "":
            if os.environ.has_key('TMPDIR'):
                _dir = os.environ['TMPDIR']
            elif os.environ.has_key('OSG_WN_TMP'):
                _dir = os.environ['OSG_WN_TMP']
            elif os.path.exists("/tmp"):
                _dir = "/tmp"
            elif os.path.exists("/scratch"):
                _dir = "/scratch"
            else:
                self.__pilotErrorDiag = "Could not locate any scratch dirs"
                tolog(self.__errorString % self.__pilotErrorDiag)
                _dir = ""
        if _dir != "":
            dirs.append(_dir)

        extradir = readpar('wntmpdir')
        if extradir != "" and extradir != "None" and extradir not in dirs:
            dirs.append(extradir)

        # check queuedata for external recovery directory
        # an empty externalRecoveryDir means that recovery should only search local WN disk for lost jobs
        # make sure the recovery directory actually exists (will not be added to dir list if empty)
        externalRecoveryDir = self.verifyRecoveryDir(readpar('recoverdir'))
        if externalRecoveryDir != "":
            dirs.append(externalRecoveryDir)

        if dirs != []:
            tolog("Job recovery will probe: %s" % str(dirs))
            self.__recoveryDirs = dirs
        else:
            self.__pilotErrorDiag = "Did not identify any base recovery directories"
            tolog(self.__errorString % self.__pilotErrorDiag)
    def forceAlternativeStageOut(self, flag=False):
        """ Force stage-out to use alternative SE """
        # See allowAlternativeStageOut()
        # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced.

        tolog("ATLAS")
        if "force_alt_stageout" in readpar('catchall') and not flag:
            status = True
        else:
            status = False

        return status
Esempio n. 33
0
    def forceAlternativeStageOut(self, flag=False):
        """ Force stage-out to use alternative SE """
        # See allowAlternativeStageOut()
        # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced.

        tolog("ATLAS")
        if "force_alt_stageout" in readpar('catchall') and not flag:
            status = True
        else:
            status = False

        return status
Esempio n. 34
0
def setEnvVars(sitename):
    """ Set ATLAS_CONDDB if necessary """

    if not os.environ.has_key('ATLAS_CONDDB'):
        atlas_conddb = readpar('gatekeeper')
        if atlas_conddb != "to.be.set":
            os.environ["ATLAS_CONDDB"] = atlas_conddb
            tolog("Note: ATLAS_CONDDB was not set by the pilot wrapper script")
            tolog("The pilot has set ATLAS_CONDDB to: %s" % (atlas_conddb))
        else:
            tolog("Warning: ATLAS_CONDDB was not set by the pilot wrapper and schedconfig.gatekeeper value is to.be.set (pilot will take no action)")

    # set specially requested env vars
    os.environ["PANDA_SITE_NAME"] = sitename
    tolog("Set PANDA_SITE_NAME = %s" % (sitename))
    copytool = readpar("copytoolin")
    if copytool == "":
        copytool = readpar("copytool")
    if "^" in copytool:
        copytool = copytool.split("^")[0]
    os.environ["COPY_TOOL"] = copytool
    tolog("Set COPY_TOOL = %s" % (copytool))
Esempio n. 35
0
def setEnvVars(sitename):
    """ Set ATLAS_CONDDB if necessary """

    if not os.environ.has_key('ATLAS_CONDDB'):
        atlas_conddb = readpar('gatekeeper')
        if atlas_conddb != "to.be.set":
            os.environ["ATLAS_CONDDB"] = atlas_conddb
            tolog("Note: ATLAS_CONDDB was not set by the pilot wrapper script")
            tolog("The pilot has set ATLAS_CONDDB to: %s" % (atlas_conddb))
        else:
            tolog("Warning: ATLAS_CONDDB was not set by the pilot wrapper and schedconfig.gatekeeper value is to.be.set (pilot will take no action)")

    # set specially requested env vars
    os.environ["PANDA_SITE_NAME"] = sitename
    tolog("Set PANDA_SITE_NAME = %s" % (sitename))
    copytool = readpar("copytoolin")
    if copytool == "":
        copytool = readpar("copytool")
    if "^" in copytool:
        copytool = copytool.split("^")[0]
    os.environ["COPY_TOOL"] = copytool
    tolog("Set COPY_TOOL = %s" % (copytool))
Esempio n. 36
0
    def core_get_data(self, envsetup, token, source_surl, local_fullname, experiment):
        """ special get function developed for storm sites """

        error = PilotErrors()

        # Transform the surl into a full surl
        full_se_endpoint = self.extractSE(readpar('se').split(",")[0])[1]
        prefix = os.path.commonprefix([source_surl, full_se_endpoint])
        if prefix:
            # Can use the bdii-free form
            source_surl = full_se_endpoint + source_surl[len(prefix):]
            _cmd_str = '%s lcg-gt --nobdii --setype srmv2 "%s" file' % (envsetup, source_surl)
        else:
            # Fallback solution, use old lcg-gt form 
            # get the TURL using the SURL
            tolog("!!WARNING!1234!! Source surl does not match %s, cannot use the bdii-independent lcg-gt" % full_se_endpoint)
            _cmd_str = '%s lcg-gt "%s" file' % (envsetup, source_surl)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        s, o = commands.getstatusoutput(_cmd_str)
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Command finished after %f s" % (t))
        if s == 0:
            # get the experiment object
            thisExperiment = getExperiment(experiment)

            # add the full stage-out command to the job setup script
            to_script = _cmd_str
            to_script = to_script.lstrip(' ') # remove any initial spaces
            if to_script.startswith('/'):
                to_script = 'source ' + to_script
            thisExperiment.updateJobSetupScript(os.path.dirname(local_fullname), to_script=to_script)

            source_turl, req_token = o.split('\n')
            source_turl = source_turl.replace('file://','')
            tolog("Creating link from %s to %s" % (source_turl, local_fullname))
            try:
                os.symlink(source_turl, local_fullname)
                _cmd_str = '%s lcg-sd %s %s 0' % (envsetup, source_surl, req_token)
                tolog("Executing command: %s" % (_cmd_str))
                s,o = commands.getstatusoutput(_cmd_str)
                # Do we need to check the exit status of lcg-sd? What do we do if it fails?
                tolog("get_data succeeded")
            except Exception, e:
                pilotErrorDiag = "Exception caught: %s" % str(e)
                tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
                tolog("get_data failed")
                return error.ERR_STAGEINFAILED, pilotErrorDiag
Esempio n. 37
0
def executePayload(runCommandList, job):
    """ execute the payload """

    # do not hide the proxy for PandaMover since it needs it or for sites that has sc.proxy = donothide
    #if 'DDM' not in jobSite.sitename and readpar('proxy') != 'donothide':
    #    # create the proxy guard object (must be created here before the sig2exc())
    #    proxyguard = ProxyGuard()
    #
    #    # hide the proxy
    #    hP_ret = proxyguard.hideProxy()
    #    if not hP_ret:
    #        tolog("Warning: Proxy exposed to payload")

    # run the payload process, which could take days to finish
    t0 = os.times()
    res_tuple = (0, 'Undefined')

    # loop over all run commands (only >1 for multi-trfs)
    current_job_number = 0
    getstatusoutput_was_interrupted = False
    number_of_jobs = len(runCommandList)
    for cmd in runCommandList:
        current_job_number += 1
        try:
            # add the full job command to the job_setup.sh file
            to_script = cmd.replace(";", ";\n")
            addToJobSetupScript(to_script, job.workdir)

            tolog("Executing job command %d/%d: %s" % (current_job_number, number_of_jobs, cmd))
            if readpar('glexec').lower() in ['true', 'uid']: 
                # execute trf under glexec
                res_tuple = executePayloadGLExec(cmd, job)
            else:
                # execute trf normally
                res_tuple = commands.getstatusoutput(cmd)
        except Exception, e:
            tolog("!!FAILED!!3000!! Failed to run command %s" % str(e))
            getstatusoutput_was_interrupted = True
            if failureCode:
                job.result[2] = failureCode
                tolog("!!FAILED!!3000!! Failure code: %d" % (failureCode))
                break
        else:
            if res_tuple[0] == 0:
                tolog("Job command %d/%d finished" % (current_job_number, number_of_jobs))
            else:
                tolog("Job command %d/%d failed: res = %s" % (current_job_number, number_of_jobs, str(res_tuple)))
                break
Esempio n. 38
0
    def allowAlternativeStageOut(self, flag=False):
        """ Is alternative stage-out allowed? """
        # E.g. if stage-out to primary SE (at Tier-2) fails repeatedly, is it allowed to attempt stage-out to secondary SE (at Tier-1)?
        # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced.

        if "allow_alt_stageout" in readpar('catchall') and not flag:
            status = True
        else:
            status = False

#        if enableT1stageout.lower() == "true" or enableT1stageout.lower() == "retry":
#            status = True
#        else:
#            status = False

        return status
    def allowAlternativeStageOut(self, flag=False):
        """ Is alternative stage-out allowed? """
        # E.g. if stage-out to primary SE (at Tier-2) fails repeatedly, is it allowed to attempt stage-out to secondary SE (at Tier-1)?
        # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced.

        if "allow_alt_stageout" in readpar('catchall') and not flag:
            status = True
        else:
            status = False

#        if enableT1stageout.lower() == "true" or enableT1stageout.lower() == "retry":
#            status = True
#        else:
#            status = False

        return status
Esempio n. 40
0
    def setNumberOfCores(self) :
        """ Report the number of cores in the WN """
        # 1. Grab corecount from queuedata
        # 2. If corecount is number and corecount > 1, set ATHENA_PROC_NUMBER env variable to this value
        # 3. If corecount is 0, null, or doesn't exist, then don't set the env. variable
        # 4. If corecount is '-1', then get number of cores from /proc/cpuinfo, and set the env. variable accordingly.

        cores = []
        nCores = None

        # grab the schedconfig value
        try:
            nCores = int(readpar('corecount'))
        except ValueError: # covers the case 'NULL'
            tolog("corecount not an integer in queuedata")
        except Exception, e:
            tolog("corecount not set in queuedata: %s" % str(e))
Esempio n. 41
0
    def setNumberOfCores(self):
        """ Report the number of cores in the WN """
        # 1. Grab corecount from queuedata
        # 2. If corecount is number and corecount > 1, set ATHENA_PROC_NUMBER env variable to this value
        # 3. If corecount is 0, null, or doesn't exist, then don't set the env. variable
        # 4. If corecount is '-1', then get number of cores from /proc/cpuinfo, and set the env. variable accordingly.

        cores = []
        nCores = None

        # grab the schedconfig value
        try:
            nCores = int(readpar('corecount'))
        except ValueError:  # covers the case 'NULL'
            tolog("corecount not an integer in queuedata")
        except Exception, e:
            tolog("corecount not set in queuedata: %s" % str(e))
Esempio n. 42
0
    def forceAlternativeStageOut(self, **pdict):
        """ Force stage-out to use alternative SE """
        # See allowAlternativeStageOut()
        # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced.
        status = False

        flag = pdict.get('flag', False)
        altStageOut = pdict.get('altStageOut', False)
        objectstore = pdict.get('objectstore', False)

        if not objectstore:
            if altStageOut == "force":
                status = True
            elif "force_alt_stageout" in readpar('catchall') and not flag:
                status = True
            else:
                status = False

        return status
Esempio n. 43
0
    def forceAlternativeStageOut(self, **pdict):
        """ Force stage-out to use alternative SE """
        # See allowAlternativeStageOut()
        # For ATLAS, flag=isAnalysisJob(). Alt stage-out is currently disabled for user jobs, so do not allow alt stage-out to be forced.
        status = False

        flag = pdict.get('flag', False)
        altStageOut = pdict.get('altStageOut', False)
        objectstore = pdict.get('objectstore', False)

        if not objectstore:
            if altStageOut == "force":
                status = True
            elif "force_alt_stageout" in readpar('catchall') and not flag:
                status = True
            else:
                status = False

        return status
Esempio n. 44
0
def getContainerName(user="******"):
    # E.g. container_type = 'singularity:pilot;docker:wrapper'
    # getContainerName(user='******') -> return 'singularity'

    container_name = ""
    container_type = readpar('container_type')

    if container_type != "" and user in container_type:
        try:
            container_names = container_type.split(';')
            for name in container_names:
                t = name.split(':')
                if user == t[1]:
                    container_name = t[0]
        except Exception as e:
            tolog("Failed to parse the container name: %s, %s" % (container_type, e))
    else:
        tolog("Container type not specified in queuedata")

    return container_name
Esempio n. 45
0
def getContainerName(user="******"):
    # E.g. container_type = 'singularity:pilot;docker:wrapper'
    # getContainerName(user='******') -> return 'singularity'

    container_name = ""
    container_type = readpar('container_type')

    if container_type != "" and user in container_type:
        try:
            container_names = container_type.split(';')
            for name in container_names:
                t = name.split(':')
                if user == t[1]:
                    container_name = t[0]
        except Exception as e:
            tolog("Failed to parse the container name: %s, %s" %
                  (container_type, e))
    else:
        tolog("Container type not specified in queuedata")

    return container_name
Esempio n. 46
0
def _useDirectAccess(LAN=True, WAN=False):
    """ Should direct i/o be used over LAN or WAN? """

    useDA = False

    if LAN:
        par = 'direct_access_lan'
    elif WAN:
        par = 'direct_access_wan'
    else:
        tolog("!!WARNING!!3443!! Bad LAN/WAN combination: LAN=%s, WAN=%s" % (str(LAN), str(WAN)))
        par = ''

    if par != '':
        da = readpar(par)
        if da:
            da = da.lower()
            if da == "true":
                useDA = True

    return useDA
Esempio n. 47
0
def _useDirectAccess(LAN=True, WAN=False):
    """ Should direct i/o be used over LAN or WAN? """

    useDA = False

    if LAN:
        par = 'direct_access_lan'
    elif WAN:
        par = 'direct_access_wan'
    else:
        tolog("!!WARNING!!3443!! Bad LAN/WAN combination: LAN=%s, WAN=%s" % (str(LAN), str(WAN)))
        par = ''

    if par != '':
        da = readpar(par)
        if da:
            da = da.lower()
            if da == "true":
                useDA = True

    return useDA
Esempio n. 48
0
    def getSpecialSetupCommand(self):
        """ Set special_setup_cmd if necessary """

        # Note: this special setup command is hardly used and could probably be removed
        # in case any special setup should be added to the setup string before the trf is executed, the command defined in this method
        # could be added to the run command by using method addSPSetupToCmd().
        # the special command is also forwarded to the get and put functions (currently not used)

        special_setup_cmd = ""

        # add envsetup to the special command setup on tier-3 sites
        # (unknown if this is still needed)

        si = getSiteInformation(self.__experiment)
        if si.isTier3():
            _envsetup = readpar('envsetup')
            if _envsetup != "":
                special_setup_cmd += _envsetup
                if not special_setup_cmd.endswith(';'):
                    special_setup_cmd += ";"

        return special_setup_cmd
Esempio n. 49
0
    def getSpecialSetupCommand(self):
        """ Set special_setup_cmd if necessary """

        # Note: this special setup command is hardly used and could probably be removed
        # in case any special setup should be added to the setup string before the trf is executed, the command defined in this method
        # could be added to the run command by using method addSPSetupToCmd().
        # the special command is also forwarded to the get and put functions (currently not used)

        special_setup_cmd = ""

        # add envsetup to the special command setup on tier-3 sites
        # (unknown if this is still needed)

        si = getSiteInformation(self.__experiment)
        if si.isTier3():
            _envsetup = readpar('envsetup')
            if _envsetup != "":
                special_setup_cmd += _envsetup
                if not special_setup_cmd.endswith(';'):
                    special_setup_cmd += ";"

        return special_setup_cmd
Esempio n. 50
0
    def addMD5sum(self, lfn, md5sum):
        """ add md5sum to lfn """
        if os.environ.has_key("LD_LIBRARY_PATH"):
            tolog("LD_LIBRARY_PATH prior to lfc import: %s" % os.environ["LD_LIBRARY_PATH"])
        else:
            tolog("!!WARNING!!2999!! LD_LIBRARY_PATH not set prior to lfc import")
        import lfc

        os.environ["LFC_HOST"] = readpar("lfchost")
        stat = lfc.lfc_filestatg()
        exitcode = lfc.lfc_statg(lfn, "", stat)
        if exitcode != 0:
            #    print "error:",buffer
            err_num = lfc.cvar.serrno
            tolog("!!WARNING!!2999!! lfc.lfc_statg: %d %s" % (err_num, lfn))
            return exitcode
        exitcode = lfc.lfc_setfsizeg(stat.guid, stat.filesize, "MD", md5sum)
        if exitcode != 0:
            #    print "error:",buffer
            err_num = lfc.cvar.serrno
            tolog("[Non-fatal] ERROR: lfc.lfc_setfsizeg: %d %s %s" % (err_num, lfn, md5sum))
            return exitcode
        tolog("Successfully set md5sum for %s" % (lfn))
        return exitcode
Esempio n. 51
0
    def updateQueuedataFromJobParameters(self, jobParameters):
        """ Extract queuedata overwrite command from job parameters and update queuedata """

        tolog("called updateQueuedataFromJobParameters with: %s" % (jobParameters))

        # extract and remove queuedata overwrite command from job parameters
        if "--overwriteQueuedata" in jobParameters:
            tolog("Encountered an --overwriteQueuedata command in the job parameters")

            # (jobParameters might be updated [queuedata overwrite command should be removed if present], so they needs to be returned)
            jobParameters, queuedataUpdateDictionary = self.extractQueuedataOverwrite(jobParameters)

            # update queuedata
            if queuedataUpdateDictionary != {}:
                tolog("Queuedata will be updated from job parameters")
                for field in queuedataUpdateDictionary.keys():
                    ec = self.replaceQueuedataField(field, queuedataUpdateDictionary[field])
                    tolog("Updated %s in queuedata: %s (read back from file)" % (field, self.readpar(field)))

        # disable FAX if set in schedconfig
        if "--disableFAX" in jobParameters:
            tolog("Encountered a --disableFAX command in the job parameters")

            # remove string from jobParameters
            jobParameters = jobParameters.replace(" --disableFAX", "")

            # update queuedata if necessary
            if readpar("allowfax").lower() == "true":
                field = "allowfax"
                ec = self.replaceQueuedataField(field, "False")
                tolog("Updated %s in queuedata: %s (read back from file)" % (field, self.readpar(field)))

            else:
                tolog("No need to update queuedata for --disableFAX (allowfax is not set to True)")

        return jobParameters
Esempio n. 52
0
    def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None):
        """
        Update the job status with the jobdispatcher web server.
        State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp)
        log = log extracts
        xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function)
        jr = job recovery mode
        """
    
        tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState()))

        # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately
        # removes any work directory after the LSF job finishes which of course makes job recovery impossible)
        if not self.__jobrec:
            if job.result[0] == 'holding' and site.sitename != "CERNVM":
                job.result[0] = 'failed'
                tolog("This site does not support job recovery: HOLDING state reset to FAILED")

        # note: any changed job state above will be lost for fake server updates, does it matter?

        # get the node structure expected by the server
        node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log)

        # skip the server update (e.g. on NG)
        if not self.__updateServer:
            tolog("(fake server update)")
            return 0, node

        tolog("xmlstr = %s" % (xmlstr))

        # get the xml
        node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr)

        # stdout tail in case job.debug == 'true'
        if job.debug.lower() == "true" and stdout_tail != "":
            # protection for potentially large tails
            stdout_tail = stdout_tail[-2048:]
            node['stdout'] = stdout_tail
            tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail)))

            # also send the full stdout to a text indexer if required
            if stdout_path != "":
                if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path):
                    tolog("Will send payload stdout to text indexer")

                    # get the user name, which we will use to create a proper filename
                    from SiteMover import SiteMover
                    s = SiteMover()
                    username = s.extractUsername(job.prodUserID)

                    # get setup path for xrdcp
                    try:
                        si = getSiteInformation(job.experiment)
                        setup_path = si.getLocalROOTSetup()

                        filename = "PanDA_payload_stdout-%s.txt" % (job.jobId)
                        dateDirs = self.getDateDirs()
                        remotePath = os.path.join(os.path.join(username, dateDirs), filename)
                        url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath)
                        cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url)
                        tolog("Executing command: %s" % (cmd))
                        rc, rs = getstatusoutput(cmd)
                        tolog("rc=%d, rs=%s" % (rc, rs))
                    except Exception, e:
                        tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e))
            else:
                tolog("stdout_path not set")
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        proxycheck = pdict.get('proxycheck', False)
        experiment = pdict.get('experiment', '')
        analysisJob = pdict.get('analJob', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'curl'
            # mark the relative start
            report['catStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-', '')

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(
                source, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        #if proxycheck:
        #    s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2)
        #    if s != 0:
        #        self.prepareReport('NO_PROXY', report)
        #        return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        #else:
        #    tolog("Proxy verification turned off")
        tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            sitemover=self)  # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)
#here begins the new magic... from Vincenzo Lavorini
        sitemover = SiteMover.SiteMover()
        v_path = sitemover.getPathFromScope(scope, filename)
        rucio_c = Client()
        if "ATLAS" in token:
            token_ok = token[+5:]
        else:
            token_ok = token
        local_se_token = self.site_name + "_" + token_ok
        v_hostname = [
            j['hostname'] for j in rucio_c.get_protocols(local_se_token)
        ]
        v_port = [j['port'] for j in rucio_c.get_protocols(local_se_token)]
        v_prefix = [j['prefix'] for j in rucio_c.get_protocols(local_se_token)]
        v_address = "https://%s:%s%s" % (v_hostname[0], v_port[0], v_prefix[0])
        tolog("prova1 address is %s" % (v_address))
        if "rucio/" in v_address and "/rucio" in v_path:
            v_address = v_address[:-7]
            tolog("prova2 address is %s" % (v_address))
        elif "rucio" in v_address and "rucio" in v_path:
            v_address = v_address[:-6]
            tolog("prova3 address is %s" % (v_address))
        full_http_surl = v_address + v_path
        tolog("prova3 full_http__surl is %s" % (full_http_surl))

        full_surl = surl
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz

#putfile=surl
#tolog("putfile: %s" % (putfile))
#tolog("full_surl: %s" % (full_surl))

# get https surl
#full_http_surl = full_surl.replace("srm://", "https://")

# get the DQ2 site name from ToA ---why? Is it needed?
#try:
#    _dq2SiteName = self.getDQ2SiteName(surl=putfile)
#except Exception, e:
#    tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
#else:
#    report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
#    tolog("DQ2 site name: %s" % (_dq2SiteName))

        if testLevel == "1":
            source = "thisisjustatest"

        # determine which timeout option to use
        #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout)
        timeout_option = "--connect-timeout 300"

        sslCert = self.sslCert
        sslKey = self.sslKey
        sslCertDir = self.sslCertDir

        # check htcopy if it is existed or env is set properly
        #_cmd_str = 'which htcopy'
        #try:
        #    s, o = commands.getstatusoutput(_cmd_str)
        #except Exception, e:
        #    tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o))
        #    o = str(e)

        #if s != 0:
        #    tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str))
        #    o = o.replace('\n', ' ')
        #    tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o)))
        #return 999999

        # cleanup the SURL if necessary (remove port and srm substring)
        #if token:
        # used lcg-cp options:
        # --srcsetype: specify SRM version
        #   --verbose: verbosity on
        #        --vo: specifies the Virtual Organization the user belongs to
        #          -s: space token description
        #          -b: BDII disabling
        #          -t: time-out
        # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC
        #          -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally
        #          -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case,
        #              the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is
        #              generated in the same format as with the Replica Manager
        # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\
        #           (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn)
        # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file]
        #               [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc]
        #               [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2]
        #               [-b,--nobdii] [-t timeout] [-v,--verbose]  [-V,--vo vo] [--version] src_file  dest_file

        # surl = putfile[putfile.index('srm://'):]
        #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token)
        #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl)
        #else:
        # surl is the same as putfile
        #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl)
        #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl)
        _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % (
            self.sslKey, self.sslKey, self.sslKey, self.sslCertDir,
            full_http_surl, source)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        _cmd = Popen(_cmd_str, stdout=PIPE, stderr=PIPE, shell=True)
        _cmd_out, _cmd_stderr = _cmd.communicate()
        report['relativeStart'] = time()
        report['transferStart'] = time()
        report['validateStart'] = time()
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Curl command output = %s" % (_cmd_out))
        tolog("Command finished after %f s" % (t))
        if "bytes uploaded" not in _cmd_out:
            tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str))
        '''
            # check if file was partially transferred, if so, remove it
            _ec = self.removeFile(envsetup, self.timeout, dst_gpfn)
            if _ec == -2:
                pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out

            if "Could not establish context" in o:
                pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired"
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('CONTEXT_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
            elif "No such file or directory" in o:
                pilotErrorDiag += "No such file or directory: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('NO_FILE_DIR', report)
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            elif "globus_xio: System error" in o:
                pilotErrorDiag += "Globus system error: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('GLOBUS_FAIL', report)
                return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag)
            else:
                if len(o) == 0 and t >= self.timeout:
                    pilotErrorDiag += "Copy command self timed out after %d s" % (t)
                    tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                    self.prepareReport('CP_TIMEOUT', report)
                    return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag)
                else:
                    if len(o) == 0:
                        pilotErrorDiag += "Copy command returned error code %d but no output" % (ec)
                    else:
                        pilotErrorDiag += o
                    self.prepareReport('CP_ERROR', report)
                    return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
	'''
        verified = False
        #getting the remote checksum from Rucio:
        token_file = open('token_fle', 'r')
        token_rucio = token_file.readline()
        pos2print = token_rucio.find("CN")
        token_rucio2print = token_rucio[:pos2print] + '(Hidden token)'
        tolog("Token I am using: %s" % (token_rucio2print))
        httpredirector = readpar('httpredirector')

        trial_n = 1
        remote_checksum = "none"
        while (remote_checksum == "none" and trial_n < 8):
            trial_n += 1
            if not httpredirector:
                #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename)
                cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % (
                    token_rucio, scope, filename)
                cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % (
                    token_rucio2print, scope, filename)
            else:
                if "http" in httpredirector:
                    tolog("HTTP redirector I am using: %s" % (httpredirector))
                    cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % (
                        token_rucio, httpredirector, scope, filename)
                    cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % (
                        token_rucioi2print, httpredirector, scope, filename)
                else:
                    tolog("HTTP redirector I am using: %s" % (httpredirector))
                    cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % (
                        token_rucio, httpredirector, reps[0].scope,
                        reps[0].filename)
                    cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % (
                        token_rucio2print, httpredirector, reps[0].scope,
                        reps[0].filename)

            tolog("Getting remote checksum: command to be executed: %s" %
                  (cmd2print))
            checksum_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
            remote_checksum, stderr = checksum_cmd.communicate()
            tolog("Remote checksum as given by rucio %s" % (remote_checksum))
            if (remote_checksum == "none"):
                tolog("In checking checksum: command std error: %s" % (stderr))
                pilotErrorDiag = "Cannot get the checksum of file on SE"
                tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag))
                tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n))
                time.sleep(3)
                # try to get the remote checksum with lcg-get-checksum
                #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl)
                #if not remote_checksum:
                #    # try to grab the remote file info using lcg-ls command
                #    remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl)
                #else:
                #    tolog("Setting remote file size to None (not needed)")
                #    remote_fsize = None

        # compare the checksums if the remote checksum was extracted
        tolog("Remote checksum: %s" % str(remote_checksum))
        tolog("Local checksum: %s" % (fchecksum))

        if remote_checksum:
            if remote_checksum != fchecksum:
                pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                 (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum)
                tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag))
                if csumtype == "adler32":
                    self.prepareReport('AD_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTADMISMATCH,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
                else:
                    self.prepareReport('MD5_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTMD5MISMATCH,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
            else:
                tolog("Remote and local checksums verified")
                verified = True
        else:
            tolog(
                "Skipped primary checksum verification (remote checksum not known)"
            )

        # if lcg-ls could not be used
        if "/pnfs/" in surl and not remote_checksum:
            # for dCache systems we can test the checksum with the use method
            tolog(
                "Detected dCache system: will verify local checksum with the local SE checksum"
            )
            # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....
            path = surl[surl.find('/pnfs/'):]
            # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....#
            tolog("File path: %s" % (path))

            _filename = os.path.basename(path)
            _dir = os.path.dirname(path)

            # get the remote checksum
            tolog("Local checksum: %s" % (fchecksum))
            try:
                remote_checksum = self.getdCacheChecksum(_dir, _filename)
            except Exception, e:
                pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str(
                    e)
                tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            else:
                if remote_checksum == "NOSUCHFILE":
                    pilotErrorDiag = "The pilot will fail the job since the remote file does not exist"
                    tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
                    self.prepareReport('NOSUCHFILE', report)
                    return self.put_data_retfail(error.ERR_NOSUCHFILE,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
                elif remote_checksum:
                    tolog("Remote checksum: %s" % (remote_checksum))
                else:
                    tolog("Could not get remote checksum")

            if remote_checksum:
                if remote_checksum != fchecksum:
                    pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                     (csumtype, _filename, remote_checksum, fchecksum)
                    if csumtype == "adler32":
                        self.prepareReport('AD_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTADMISMATCH,
                                                     pilotErrorDiag,
                                                     surl=full_surl)
                    else:
                        self.prepareReport('MD5_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTMD5MISMATCH,
                                                     pilotErrorDiag,
                                                     surl=full_surl)
                else:
                    tolog("Remote and local checksums verified")
                    verified = True
Esempio n. 54
0
    def getJobExecutionCommandObsolete(self, job, jobSite, pilot_initdir):
        """ Define and test the command(s) that will be used to execute the payload """

        # Input tuple: (method is called from RunJob*)
        #   job: Job object
        #   jobSite: Site object
        #   pilot_initdir: launch directory of pilot.py
        #
        # Return tuple:
        #   pilot_error_code, pilot_error_diagnostics, job_execution_command, special_setup_command, JEM, cmtconfig
        # where
        #   pilot_error_code       : self.__error.<PILOT ERROR CODE as defined in PilotErrors class> (value should be 0 for successful setup)
        #   pilot_error_diagnostics: any output from problematic command or explanatory error diagnostics
        #   job_execution_command  : command to execute payload, e.g. cmd = "source <path>/setup.sh; <path>/python trf.py [options]"
        #   special_setup_command  : any special setup command that can be insterted into job_execution_command and is sent to stage-in/out methods
        #   JEM                    : Job Execution Monitor activation state (default value "NO", meaning JEM is not to be used. See JEMstub.py)
        #   cmtconfig              : cmtconfig symbol from the job def or schedconfig, e.g. "x86_64-slc5-gcc43-opt" [NOT USED IN THIS CLASS]

        pilotErrorDiag = ""
        cmd = ""
        special_setup_cmd = ""
        pysiteroot = ""
        siteroot = ""
        JEM = "NO"
        cmtconfig = ""

        # Is it's an analysis job or not?
        analysisJob = isAnalysisJob(job.trf)

        # Set the INDS env variable (used by runAthena)
        if analysisJob:
            self.setINDS(job.realDatasetsIn)

        # Command used to download runAthena or runGen
        wgetCommand = 'wget'

        # special setup for NG
        status, pilotErrorDiag, cmd = self.setupNordugridTrf(
            job, analysisJob, wgetCommand, pilot_initdir)
        if status != 0:
            return status, pilotErrorDiag, "", special_setup_cmd, JEM, cmtconfig

        # add FRONTIER debugging and RUCIO env variables
        cmd = self.addEnvVars2Cmd(cmd, job.jobId, job.taskID,
                                  job.processingType, jobSite.sitename,
                                  analysisJob)

        if readpar('cloud') == "DE":
            # Should JEM be used?
            metaOut = {}
            try:
                import sys
                from JEMstub import updateRunCommand4JEM
                # If JEM should be used, the command will get updated by the JEMstub automatically.
                cmd = updateRunCommand4JEM(cmd,
                                           job,
                                           jobSite,
                                           tolog,
                                           metaOut=metaOut)
            except:
                # On failure, cmd stays the same
                tolog(
                    "Failed to update run command for JEM - will run unmonitored."
                )

            # Is JEM to be used?
            if metaOut.has_key("JEMactive"):
                JEM = metaOut["JEMactive"]

            tolog("Use JEM: %s (dictionary = %s)" % (JEM, str(metaOut)))

        elif '--enable-jem' in cmd:
            tolog(
                "!!WARNING!!1111!! JEM can currently only be used on certain sites in DE"
            )

        # Pipe stdout/err for payload to files
        cmd += " 1>%s 2>%s" % (job.stdout, job.stderr)
        tolog("\nCommand to run the job is: \n%s" % (cmd))

        tolog("ATLAS_PYTHON_PILOT = %s" % (os.environ['ATLAS_PYTHON_PILOT']))

        if special_setup_cmd != "":
            tolog("Special setup command: %s" % (special_setup_cmd))

        return 0, pilotErrorDiag, cmd, special_setup_cmd, JEM, cmtconfig
Esempio n. 55
0
        self.prepareReport('DONE', report)
        return (0, pilotErrorDiag, r_gpfn, fsize, fchecksum, 'P')

    def getPnfsid(self, pnfs, guid):
        """ get PNFSID from BNL LFC """

        try:
            import lfc
        except Exception, e:
            pilotErrorDiag = "getPnfsid() could not import lfc module: %s" % str(
                e)
            tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
            return None

        os.environ['LFC_HOST'] = readpar('lfchost')
        s, replicas = lfc.lfc_getreplicax('', guid, '')
        if s != 0:
            pilotErrorDiag = "Fail to get PNFSID for guid: %s" % guid
            tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
            return None
        else:
            for replica in replicas:
                if pnfs in replica.sfn:
                    pnfsid = replica.setname
                    if pnfsid == "-1":
                        pilotErrorDiag = "getPnfsid() returned -1: File does not exist in dCache"
                        tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
                        return None
                    elif pnfsid == "":
                        pilotErrorDiag = "getPnfsid() returned nothing: PNFSID will not be used."
            srmhost = self.hostFromSurl(dirAcc['oldPrefix'])
        for guid in replicas.keys():
            reps = replicas[guid]
            tolog("Got replicas=%s for guid=%s" % (str(reps), guid))

        try:
            token_file = open('token_file', 'r')
        except IOError, e:
            tolog("!!WARNING!! Failed to open file: %s" % (e))
            raise Exception("!!FAILED!!1099!! Cannot open file with token!")
        else:
            token_rucio = token_file.readline()
            pos2print = token_rucio.find("CN")
            token_rucio2print = token_rucio[:pos2print] + '(Hidden token)'
            tolog("Token I am using: %s" % (token_rucio2print))
        httpredirector = readpar('httpredirector')
        if not httpredirector:
            cmd = "curl -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip" % (
                token_rucio, reps[0].scope, reps[0].filename)
            cmd2print = "curl -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip" % (
                token_rucio2print, reps[0].scope, reps[0].filename)
        else:
            if "http" in httpredirector:
                tolog("HTTP redirector I am using: %s" % (httpredirector))
                cmd = "curl -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip" % (
                    token_rucio, httpredirector, reps[0].scope,
                    reps[0].filename)
                cmd2print = "curl -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip" % (
                    token_rucio2print, httpredirector, reps[0].scope,
                    reps[0].filename)
            else:
Esempio n. 57
0
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """
        The local file (local access to the dCache file) is assumed to have a relative path
        that is the same of the relative path in the 'gpfn'
        loc_... are the variables used to access the file in the locally exported file system
        """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        useCT = pdict.get('usect', True)
        jobId = pdict.get('jobId', '')
        workDir = pdict.get('workDir', '')
        analJob = pdict.get('analJob', False)
        timeout = pdict.get('timeout', 5 * 3600)
        prodDBlockToken = pdict.get('access', '')

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'BNLdCache', lfn,
                                           guid)

        # get a proper envsetup
        envsetup = self.getEnvsetup(get=True)

        if self._setup:
            _setup_str = "source %s; " % self._setup
        else:
            _setup_str = envsetup

        ec, pilotErrorDiag = verifySetupCommand(error, _setup_str)
        if ec != 0:
            self.prepareReport('RFCP_FAIL', report)
            return ec, pilotErrorDiag

        # remove any host and SFN info from PFN path
        loc_pfn = self.extractPathFromPFN(gpfn)

        copyprefixin = readpar('copyprefixin')
        if copyprefixin != '':
            # Extract the copy prefix
            pfrom, pto = copyprefixin.split('^')
            loc_pfn = pfrom + loc_pfn
            tolog("Added copyprefixin to file: %s" % (loc_pfn))
        else:
            copyprefix = readpar('copyprefix')
            if copyprefix != '':
                # Extract the copy prefix
                pfrom, pto = copyprefix.split('^')
                loc_pfn = pfrom + loc_pfn
                tolog("Added copyprefix to file: %s" % (loc_pfn))

        report['relativeStart'] = time.time()

        pnfsid = self.getPnfsid(loc_pfn, guid)

        # for analysis jobs, skip input file if on tape or if lib file
        if analJob:
            if not self.isLibFile(loc_pfn):
                if pnfsid == None:
                    isStaged = self.isFileStaged(_setup_str, loc_pfn)
                else:
                    _com = "/cacheinfos/isFileInPool?pnfsid=%s" % (pnfsid)
                    isStaged = self.isFileStaged(
                        _setup_str,
                        loc_pfn,
                        url="ddmv02.usatlas.bnl.gov:8000",
                        com=_com)
                if not isStaged:
                    pilotErrorDiag = "File %s is not staged and will be skipped for analysis job" % (
                        loc_pfn)
                    self.prepareReport('FILE_ON_TAPE', report)
                    return error.ERR_FILEONTAPE, pilotErrorDiag
            else:
                tolog("Skipping file stage check for lib file")

        # should the root file be copied or read directly by athena?
        directIn, useFileStager = self.getTransferModes()
        if directIn:
            if useCT:
                directIn = False
                tolog(
                    "Direct access mode is switched off (file will be transferred with the copy tool)"
                )
                updateFileState(lfn,
                                workDir,
                                jobId,
                                mode="transfer_mode",
                                state="copy_to_scratch",
                                ftype="input")
            else:
                # determine if the file is a root file according to its name
                rootFile = self.isRootFileName(lfn)

                if prodDBlockToken == 'local' or not rootFile:
                    directIn = False
                    tolog(
                        "Direct access mode has been switched off for this file (will be transferred with the copy tool)"
                    )
                    updateFileState(lfn,
                                    workDir,
                                    jobId,
                                    mode="transfer_mode",
                                    state="copy_to_scratch",
                                    ftype="input")
                elif rootFile:
                    tolog(
                        "Found root file according to file name: %s (will not be transferred in direct reading mode)"
                        % (lfn))
                    report['relativeStart'] = None
                    report['transferStart'] = None
                    self.prepareReport('FOUND_ROOT', report)
                    if useFileStager:
                        updateFileState(lfn,
                                        workDir,
                                        jobId,
                                        mode="transfer_mode",
                                        state="file_stager",
                                        ftype="input")
                    else:
                        updateFileState(lfn,
                                        workDir,
                                        jobId,
                                        mode="transfer_mode",
                                        state="remote_io",
                                        ftype="input")
                    return error.ERR_DIRECTIOFILE, pilotErrorDiag
                else:
                    tolog("Normal file transfer")

        dest_path = os.path.join(path, lfn)
        if pnfsid == None:
            _cmd_str = '%sdccp %s %s' % (_setup_str, loc_pfn, dest_path)
        else:
            _cmd_str = '%sdccp pnfs://dcdcap.usatlas.bnl.gov:22125/%s %s' % (
                _setup_str, pnfsid, dest_path)

        tolog("Executing command: %s" % (_cmd_str))
        report['transferStart'] = time.time()
        try:
            s, telapsed, cout, cerr = timed_command(_cmd_str, timeout)
        except Exception, e:
            tolog("!!WARNING!!2999!! timed_command() threw an exception: %s" %
                  str(e))
            s = 1
            o = str(e)
            telapsed = timeout
Esempio n. 58
0
    # Get runJob object
    runJob = RunJobEdison()

    # Setup HPC specific parameters for Edison

    runJob.cpu_number_per_node = 24
    runJob.walltime = 120
    runJob.max_nodes = 10
    runJob.number_of_threads = 1
    runJob.min_walltime = 10
    runJob.waittime = 15
    runJob.nodes = 2
    runJob.partition_comp = 'edison'
    runJob.project_id = ""
    runJob.executed_queue = readpar('localqueue')

    # Define a new parent group
    os.setpgrp()

    # Protect the runJob code with exception handling
    hP_ret = False
    try:
        # always use this filename as the new jobDef module name
        import newJobDef

        jobSite = Site.Site()

        return_tuple = runJob.argumentParser()
        tolog("argumentParser returned: %s" % str(return_tuple))
        jobSite.setSiteInfo(return_tuple)
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """ copy input file from SE to local dir """

        # try to get the direct reading control variable (False for direct reading mode; file should not be copied)
        useCT = pdict.get('usect', True)
        prodDBlockToken = pdict.get('access', '')

        # get the DQ2 tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'local'
            # mark the relative start
            report['relativeStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-','')

        if not path:
            tolog('path is empty, using current directory')            
            path = os.getcwd()

        # build setup string
        envsetup = self.getEnvsetup(get=True)

        # should the root file be copied or read directly by athena?
        directIn = False
        dInfo = getDirectAccessDic(readpar('copysetupin'))
        # if copysetupin did not contain direct access info, try the copysetup instead
        if not dInfo:
            dInfo = getDirectAccessDic(readpar('copysetup'))

        tolog("dInfo: %s" % str(dInfo))
        # check if we should use the copytool
        if dInfo:
            directIn = dInfo['directIn']

        if directIn:
            if useCT:
                directIn = False
                tolog("Direct access mode is switched off (file will be transferred with the copy tool)")
            else:
                # determine if the file is a root file according to its name
                rootFile = self.isRootFileName(lfn)

                if prodDBlockToken == 'local' or not rootFile:
                    directIn = False
                    tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)")
                elif rootFile:
                    tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn))
                    report['relativeStart'] = None
                    report['transferStart'] = None
                    self.prepareReport('FOUND_ROOT', report)
                    return 0, self.__pilotErrorDiag
                else:
                    tolog("Normal file transfer")
        else:
            tolog("not directIn")

        # build the get command
        _params = ""
        if fchecksum and fchecksum != 'None' and fchecksum != 0 and fchecksum != "0" and not self.isDummyChecksum(fchecksum):
            csumtype = self.getChecksumType(fchecksum)
            # special case for md5sum (command only understands 'md5' and 'adler32', and not 'ad' and 'md5sum')
            if csumtype == 'md5sum':
                csumtype = 'md5'

        execStr = self.__localget % (envsetup, _params, gpfn, os.path.join(path, lfn))
        tolog("Executing command: %s" % (execStr))
        
        report['transferStart'] = time()
        try:
            status, telapsed, cout, cerr = timed_command(execStr, self.__timeout)
        except Exception, e:
            self.__pilotErrorDiag = 'timed_command() threw an exception: %s' % str(e)
            tolog(self.__warningStr % self.__pilotErrorDiag)
            status = 1
            output = str(e)
            telapsed = self.__timeout