예제 #1
0
    def core_get_data(self, envsetup, token, source_surl, dest_path):
        """ stage-in core function, can be overridden (see stormSiteMover) """

        error = PilotErrors()

        # determine which timeout option to use
        if self.isNewLCGVersion("%s lcg-cp" % (envsetup)):
            timeout_option = "--srm-timeout=%d --connect-timeout=300 --sendreceive-timeout=%d" % (self.timeout, self.timeout)
        else:
            timeout_option = "-t %d" % (self.timeout)

        # used lcg-cp options:
        # --vo: specifies the Virtual Organization the user belongs to
        #   -t: time-out
        if token:
            # do not use option -b on SL3 clusters running older versions of lcg_utils
            use_b = True
            s, o = commands.getstatusoutput("%s lcg-cr --version" % (envsetup))
            if s != 0:
                # (BDII collects all information coming from site GIISes and stores them in a permanent database)
                tolog("(Probably too old lcg_utils - skipping BDII disabling)")
                use_b = False

            # for the time being
            use_b = False
            if use_b:
                _cmd_str = '%s lcg-cp --vo atlas --srcsetype srmv2 -s %s -b %s %s file://%s' %\
                           (envsetup, token, timeout_option, source_surl, dest_path)
            else:
                tolog("(Skipping space token for the time being)")
                _cmd_str = '%s lcg-cp --vo atlas %s %s file://%s' % (envsetup, timeout_option, source_surl, dest_path)
        else:
            _cmd_str = '%s lcg-cp --vo atlas %s %s file://%s' % (envsetup, timeout_option, source_surl, dest_path)

        # add the full stage-out command to the job setup script
        to_script = _cmd_str.replace("file://%s" % os.path.dirname(dest_path), "file://`pwd`")
        to_script = to_script.lstrip(' ') # remove any initial spaces
        if to_script.startswith('/'):
            to_script = 'source ' + to_script
        addToJobSetupScript(to_script, os.path.dirname(dest_path))

        tolog("Executing command: %s" % (_cmd_str))
        s = -1
        o = '(not defined)'
        t0 = os.times()
        try:
            s, o = commands.getstatusoutput(_cmd_str)
        except Exception, e:
            tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o))
            o = str(e)
예제 #2
0
def executePayload(runCommandList, job):
    """ execute the payload """

    # do not hide the proxy for PandaMover since it needs it or for sites that has sc.proxy = donothide
    #if 'DDM' not in jobSite.sitename and readpar('proxy') != 'donothide':
    #    # create the proxy guard object (must be created here before the sig2exc())
    #    proxyguard = ProxyGuard()
    #
    #    # hide the proxy
    #    hP_ret = proxyguard.hideProxy()
    #    if not hP_ret:
    #        tolog("Warning: Proxy exposed to payload")

    # run the payload process, which could take days to finish
    t0 = os.times()
    res_tuple = (0, 'Undefined')

    # loop over all run commands (only >1 for multi-trfs)
    current_job_number = 0
    getstatusoutput_was_interrupted = False
    number_of_jobs = len(runCommandList)
    for cmd in runCommandList:
        current_job_number += 1
        try:
            # add the full job command to the job_setup.sh file
            to_script = cmd.replace(";", ";\n")
            addToJobSetupScript(to_script, job.workdir)

            tolog("Executing job command %d/%d: %s" % (current_job_number, number_of_jobs, cmd))
            if readpar('glexec').lower() in ['true', 'uid']: 
                # execute trf under glexec
                res_tuple = executePayloadGLExec(cmd, job)
            else:
                # execute trf normally
                res_tuple = commands.getstatusoutput(cmd)
        except Exception, e:
            tolog("!!FAILED!!3000!! Failed to run command %s" % str(e))
            getstatusoutput_was_interrupted = True
            if failureCode:
                job.result[2] = failureCode
                tolog("!!FAILED!!3000!! Failure code: %d" % (failureCode))
                break
        else:
            if res_tuple[0] == 0:
                tolog("Job command %d/%d finished" % (current_job_number, number_of_jobs))
            else:
                tolog("Job command %d/%d failed: res = %s" % (current_job_number, number_of_jobs, str(res_tuple)))
                break
예제 #3
0
    def core_get_data(self, envsetup, token, source_surl, local_fullname):
        """ special get function developed for storm sites """

        error = PilotErrors()

        # get the TURL using the SURL
        _cmd_str = '%s lcg-gt %s file' % (envsetup, source_surl)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        s, o = commands.getstatusoutput(_cmd_str)
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Command finished after %f s" % (t))
        if s == 0:
            # add the full stage-out command to the job setup script
            to_script = _cmd_str
            to_script = to_script.lstrip(' ') # remove any initial spaces
            if to_script.startswith('/'):
                to_script = 'source ' + to_script
            addToJobSetupScript(to_script, os.path.dirname(local_fullname))

            source_turl, req_token = o.split('\n')
            source_turl = source_turl.replace('file://','')
            tolog("Creating link from %s to %s" % (source_turl, local_fullname))
            try:
                os.symlink(source_turl, local_fullname)
                _cmd_str = '%s lcg-sd %s %s 0' % (envsetup, source_surl, req_token)
                tolog("Executing command: %s" % (_cmd_str))
                s,o = commands.getstatusoutput(_cmd_str)
                # Do we need to check the exit status of lcg-sd? What do we do if it fails?
                tolog("get_data succeeded")
            except Exception, e:
                pilotErrorDiag = "Exception caught: %s" % str(e)
                tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
                tolog("get_data failed")
                return error.ERR_STAGEINFAILED, pilotErrorDiag
예제 #4
0
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """
        Moves a DS file the local SE (where was put from DDM) to the working directory.
        Performs the copy and, for systems supporting it, checks size and md5sum correctness
        gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) 
        path: destination absolute path (in a local file system)
        returns the status of the transfer. In case of failure it should remove the partially copied destination
        """
        # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn'
        # loc_... are the variables used to access the file in the locally exported file system

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        useCT = pdict.get('usect', True)
        jobId = pdict.get('jobId', '')
        workDir = pdict.get('workDir', '')
        sitename = pdict.get('sitename', '')
        cmtconfig = pdict.get('cmtconfig', '')
        prodDBlockToken = pdict.get('access', '')

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'xrdcp', lfn, guid)

        # get a proper setup
        _setup_str = self.getSetup()

#        if "CERN" in sitename:
#            _setup_str = "source /afs/cern.ch/project/xrootd/software/setup_stable_for_atlas.sh;"

        ec, pilotErrorDiag = verifySetupCommand(error, _setup_str)
        if ec != 0:
            self.__sendReport('RFCP_FAIL', report)
            return ec, pilotErrorDiag

        # remove any host and SFN info from PFN path
        src_loc_pfn = self.extractPathFromPFN(gpfn)

        # determine which copy command to use
        cpt = "xrdcp"

        # Do pre-stagin
        copytoolin = readpar('copytoolin')
        # copytoolin = "xrdcp" #PN readpar('copytoolin')
        tolog("xrdcpSiteMover ELN copytoolin  : %s" % (copytoolin))
        if copytoolin != '':
            if copytoolin.find('^') > -1:
                cpt, pstage = copytoolin.split('^')
                if pstage != "" and pstage != 'dummy':
                    # xrdcp is anyway hardcoded below...
                    cmd = "%s %s " % (pstage, src_loc_pfn)
                    rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True)
                    if rc != 0:
                        self.__sendReport('PSTAGE_FAIL', report)
                        return rc, pilotErrorDiag
                    else:
                        tolog("Successfully pre-staged file")

            else:
                cpt = readpar('copytoolin')
                #cpt = "xrdcp" #PN readpar('copytoolin')

        # is there a special copytool to be used?
#        if cmtconfig != "" and "CERN" in sitename:
#            # special path at CERN since default command is broken (as of November 2011)
#            if "x86_64" in cmtconfig:
#                # export LD_LIBRARY_PATH:/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/x86_64-slc5-gcc43-opt/lib64/:/afs/cern.ch/sw/lcg/contrib/gcc/4.3.3/x86_64-slc5-gcc43-opt/lib64/
#                lib_path1 = "/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/%s/lib64" % (cmtconfig)
#                lib_path2 = "/afs/cern.ch/sw/lcg/contrib/gcc/4.3.3/%s/lib64" % (cmtconfig)
#            else:
#                lib_path1 = "/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/%s/lib" % (cmtconfig)
#                lib_path2 = "/afs/cern.ch/sw/lcg/contrib/gcc/4.3.3/%s/lib" % (cmtconfig)
#            command_path = "/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/%s/bin/xrdcp" % (cmtconfig)
#            status, badPath = self.verifyPaths([lib_path1, lib_path2, command_path])
#            if status:
#                cpt = "export LD_LIBRARY_PATH=%s:%s:$LD_LIBRARY_PATH;" % (lib_path1, lib_path2)
#                cpt += command_path
#            else:
#                tolog("Path %s does not exist" % (badPath))
#                pilotErrorDiag = "Failed to figure out a proper path for xrdcp: %s" % (badPath)
#                tolog("!!WARNING!!1776!! %s" % (pilotErrorDiag))
#                self.__sendReport('PSTAGE_FAIL', report)
#                return error.ERR_STAGEINFAILED, pilotErrorDiag

#            if "x86_64" in cmtconfig and "slc5" in cmtconfig:
#                _path = "/afs/cern.ch/project/xrootd/software/setup_stable_for_atlas_slc5_x86_64.sh"
#            elif "x86_64" in cmtconfig and "slc6" in cmtconfig:
#                _path = "/afs/cern.ch/project/xrootd/software/setup_stable_for_atlas_slc6_x86_64.sh"
#            else:
#                _path = "/afs/cern.ch/project/xrootd/software/setup_stable_for_atlas_slc5_i686.sh"
#            status, badPath = self.verifyPaths([_path])
#            if status:
#                cpt = "source %s; xrdcp" % (path)

        tolog("Site mover will use get command: %s" % (cpt))

        # copyprefixin = 'dcap://ccdcapatlas.in2p3.fr:22125^root://ccsrb15:1094'
        # gpfn = 'srm://ccsrm.in2p3.fr/pnfs/in2p3.fr/data/...'
        # src_loc_pfn = '/pnfs/in2p3.fr/data/atlas/...'
        # add 'root://ccsrb15:1094' to src_loc_pfn

        copyprefix = readpar('copyprefixin')
        if copyprefix == "":
            copyprefix = readpar('copyprefix')
            tolog("Using copyprefix = %s" % (copyprefix))            
        else:
            tolog("Using copyprefixin = %s" % (copyprefix))

        if copyprefix == "":
            pilotErrorDiag = "Empty copyprefix, cannot continue"
            tolog("!!WARNING!!1777!! %s" % (pilotErrorDiag))
            self.__sendReport('PSTAGE_FAIL', report)
            return error.ERR_STAGEINFAILED, pilotErrorDiag

        # handle copyprefix lists
        pfroms, ptos = getCopyprefixLists(copyprefix)

        if len(pfroms) != len(ptos):
            pilotErrorDiag = "Copyprefix lists not of equal length: %s, %s" % (str(pfroms), str(ptos))
            tolog("!!WARNING!!1777!! %s" % (pilotErrorDiag))
            self.__sendReport('PSTAGE_FAIL', report)
            return error.ERR_STAGEINFAILED, pilotErrorDiag

        for (pfrom, pto) in map(None, pfroms, ptos):
            if (pfrom != "" and pfrom != None and pfrom != "dummy") and (pto != "" and pto != None and pto != "dummy"):
                if gpfn[:len(pfrom)] == pfrom or gpfn[:len(pto)] == pto:
                    src_loc_pfn = pto + src_loc_pfn
                    src_loc_pfn = src_loc_pfn.replace('///','//')
                    break

        tolog("PFN=%s" % (gpfn))
        tolog("TURL=%s" % (src_loc_pfn))

        src_loc_filename = lfn # os.path.basename(src_loc_pfn)
        # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename
        # dest vars: path

        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # protect against bad pfn's
        # src_loc_pfn = src_loc_pfn.replace('///','/')
        src_loc_pfn = src_loc_pfn.replace('//xrootd/','/xrootd/')
        if src_loc_pfn.find('//pnfs') == -1:
            src_loc_pfn = src_loc_pfn.replace('/pnfs','//pnfs')

        # should the root file be copied or read directly by athena?
        directIn = self.checkForDirectAccess(lfn, useCT, workDir, jobId, prodDBlockToken)
        if directIn:
            report['relativeStart'] = None
            report['transferStart'] = None
            self.__sendReport('FOUND_ROOT', report)
            return error.ERR_DIRECTIOFILE, pilotErrorDiag

        # in case fchecksum is not given to this function, attempt to use the md5 option to get it
        dest_file = os.path.join(path, src_loc_filename)
        if fchecksum == 0 or fchecksum == "" or fchecksum == None or fchecksum == "None":
            useMd5Option = True
            cmd = "%s %s -md5 %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file)
        else:
            useMd5Option = False
            cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file)

        # is the md5 option available?
        if useMd5Option:
            cmd_test = "%s %s" % (_setup_str, cpt)
            tolog("Executing test command: %s" % (cmd_test))
            rc, rs = commands.getstatusoutput(cmd_test)
            if rs.find("-md5") > 0:
                tolog("This xrdcp version supports the md5 option")
            else:
                tolog("This xrdcp version does not support the md5 option (checksum test will be skipped)")
                useMd5Option = False
                cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file)

        #PN
#        if not ".lib." in src_loc_pfn:
#            cmd = "xrd-cpXXX -h"

        # add the full stage-out command to the job setup script
        to_script = cmd.replace(dest_file, "`pwd`/%s" % os.path.basename(dest_file))
        to_script = to_script.lstrip(' ') # remove any initial spaces
        if to_script.startswith('/'):
            to_script = 'source ' + to_script
        addToJobSetupScript(to_script, path)

        # transfer the file
        report['transferStart'] = time()
        rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True)
        report['validateStart'] = time()
        if rc != 0:
            self.__sendReport('COPY_FAIL', report)

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            return rc, pilotErrorDiag
        else:
            tolog("Successfully transferred file")

        # get file size from the command output if not known already
        if fsize == 0:
            fsize = self.getFileSize(rs)

        # get checksum from the command output if not known already
        if useMd5Option and fchecksum == 0:
            fchecksum = self.getChecksum(rs)
        else:
            if fchecksum == 0 or fchecksum == None:
                fchecksum = ""
            else:
                tolog("fchecksum = %s" % (fchecksum))

        # get destination (local) file size and checksum 
        ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype)
        tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum))
        if ec != 0:
            self.__sendReport('LOCAL_FILE_INFO_FAIL', report)

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            return ec, pilotErrorDiag

        # compare remote and local file checksum
        if fchecksum != "" and fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum):
            pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                             (csumtype, os.path.basename(gpfn), fchecksum, dstfchecksum)
            tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")
            
            if csumtype == "adler32":
                self.__sendReport('AD_MISMATCH', report)
                return error.ERR_GETADMISMATCH, pilotErrorDiag
            else:
                self.__sendReport('MD5_MISMATCH', report)
                return error.ERR_GETMD5MISMATCH, pilotErrorDiag

        # compare remote and local file size (skip test if remote/source file size is not known)
        if dstfsize != fsize and fsize != 0 and fsize != "":
            pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" %\
                             (os.path.basename(gpfn), str(dstfsize), str(fsize))
            tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            self.__sendReport('FS_MISMATCH', report)

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            return error.ERR_GETWRONGSIZE, pilotErrorDiag

        updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input")
        self.__sendReport('DONE', report)
        return 0, pilotErrorDiag