Beispiel #1
0
    def fixStageOutPath(self, path):
        """Fix the path"""
        statusRet = 0
        outputRet = {}
        outputRet["errorLog"] = None
        outputRet["report"] = {}
        outputRet["report"]["clientState"] = None

        siteInformation = SiteInformation()

        cpt = siteInformation.getCopyTool(stageIn=False)
        tolog("Site mover will use get command: %s, %s" % (cpt))

        # figure out which copyprefix to use (use the PFN to figure out where the file is and then use the appropriate copyprefix)
        # e.g. copyprefix=srm://srm-eosatlas.cern.ch,srm://srm-atlas.cern.ch^root://eosatlas.cern.ch/,root://castoratlas-xrdssl/
        # PFN=srm://srm-eosatlas.cern.ch/.. use copyprefix root://eosatlas.cern.ch/ to build the TURL src_loc_pfn
        # full example:
        # Using copyprefixin = srm://srm-eosatlas.cern.ch,srm://srm-atlas.cern.ch^root://eosatlas.cern.ch/,root://castoratlas-xrdssl/
        # PFN=srm://srm-eosatlas.cern.ch/eos/atlas/atlasdatadisk/rucio/mc12_8TeV/8d/c0/EVNT.01212395._000004.pool.root.1
        # TURL=root://eosatlas.cern.ch//eos/atlas/atlasdatadisk/rucio/mc12_8TeV/8d/c0/EVNT.01212395._000004.pool.root.1

        ret_path = siteInformation.getCopyPrefixPath(path, stageIn=False)
        if not ret_path.startswith("root:"):
            errorLog = "Failed to use copyprefix to convert the current path to local path."
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            outputRet["errorLog"] = errorLog
            outputRet["report"]["clientState"] = 'PSTAGE_FAIL'
            statusRet = PilotErrors.ERR_STAGEINFAILED

        tolog("PFN=%s" % (path))
        tolog("TURL=%s" % (ret_path))
        outputRet['path'] = ret_path

        return statusRet, outputRet
    def getStageInMode(self, lfn, prodDBlockToken):
        # should the root file be copied or read directly by athena?
        status = 0
        output = {}
        output["errorLog"] = None
        output["report"] = {}
        output["report"]["clientState"] = None

        output["transfer_mode"] = None

        isRootFileName = self.isRootFileName(lfn)

        siteInformation = SiteInformation()
        directIn, transfer_mode = siteInformation.getDirectInAccessMode(
            prodDBlockToken, isRootFileName)
        if transfer_mode:
            #updateFileState(lfn, workDir, jobId, mode="transfer_mode", state=transfer_mode, type="input")
            output["transfer_mode"] = transfer_mode
        if directIn:
            output["report"]["clientState"] = 'FOUND_ROOT'
            output["report"]['relativeStart'] = None
            output["report"]['transferStart'] = None

            return PilotErrors.ERR_DIRECTIOFILE, output

        return 0, output
Beispiel #3
0
def updateCopysetups(cmd3,
                     transferType=None,
                     useCT=None,
                     directIn=None,
                     useFileStager=None):
    """ Update the relevant copysetup fields for remote I/O or file stager """

    si = SiteInformation()

    _copysetupin = readpar('copysetupin')
    _copysetup = readpar('copysetup')

    if _copysetupin != "":
        si.updateCopysetup(cmd3,
                           'copysetupin',
                           _copysetupin,
                           transferType=transferType,
                           useCT=useCT,
                           directIn=directIn,
                           useFileStager=useFileStager)
    else:
        si.updateCopysetup(cmd3,
                           'copysetup',
                           _copysetup,
                           transferType=transferType,
                           useCT=useCT,
                           directIn=directIn,
                           useFileStager=useFileStager)
Beispiel #4
0
    def fixStageInPath(self, path):
        """Fix the path"""

        if path[:3] == "srm" and '?SFN=' in path:
            self.log("Found SFN part in file path: %s" % (path))
        elif path[:3] == "srm":
            try:
                hostname = path.split('/',3)[2]
            except Exception as e:
                self.log("'!!WARNING!!2999!! Could not extract srm protocol for replacement, keeping path variable as it is: %s (%s)' %\
                      (path, str(e))")
            else:
                # srm = 'srm://head01.aglt2.org'
                srm = 'srm://' + hostname

                # does seopt contain any matching srm's?
                sematch = self.getSEMatchFromSEOpt(srm)
                if sematch != "":
                    path = path.replace(srm, sematch)
                    self.log("Replaced %s with %s (from seopt) in path: %s" % (srm, sematch, path))
                else:
                     se = readpar('se').split(",")[0]
                     _dummytoken, se = self.extractSE(se)
                     tolog("Using SE: %s" % (se))

                     path = path.replace(srm, se)
                     self.log("Replaced %s with %s (from se) in path: %s" % (srm, se, path))

                # add port number from se to getfile if necessary
                path = self.addPortToPath(se, path)

        siteInformation = SiteInformation()
        path = siteInformation.getCopyPrefixPath(path, stageIn=True)

        return path
        surl = surl.replace("s3+rucio", "s3")
        if surl.startswith("root:"):
            sitemover = xrootdObjectstoreSiteMover(self.getSetup())
            return sitemover. put_data(source, destination, fsize, fchecksum, **pdict)
        if surl.startswith("s3:"):
            sitemover = S3ObjectstoreSiteMover(self.getSetup(), self._useTimerCommand)
            return sitemover. put_data(source, surl, fsize, fchecksum, **pdict)
        return -1, "No objectstore sitemover found for this scheme(%s)" % destination, destination, fsize, fchecksum, config_sm.ARCH_DEFAULT


if __name__ == '__main__':

    os.environ['PilotHomeDir'] = os.getcwd()
    from SiteInformation import SiteInformation
    s1 = SiteInformation()
    #s1.getObjectstoresField("os_access_key", "eventservice", queuename='BNL_EC2W2_MCORE')

    f = objectstoreSiteMover()

    gpfn = "nonsens_gpfn"
    lfn = "AOD.310713._000004.pool.root.1"
    path = os.getcwd()
    fsize = "4261010441"
    fchecksum = "9145af38"
    dsname = "data11_7TeV.00177986.physics_Egamma.merge.AOD.r2276_p516_p523_tid310713_00"
    report = {}

    #print f.getGlobalFilePaths(dsname)
    #print f.findGlobalFilePath(lfn, dsname)
    #print f.getLocalROOTSetup()
Beispiel #6
0
    def executePayload(self, thisExperiment, job):

        t0 = os.times()
        res_tuple = None

        # loop over all run commands (only >1 for multi-trfs)
        getstatusoutput_was_interrupted = False
        job_status = None
        tolog("About to launch ARGO job")
        # Poll MQ for Job Status
        try:
            # Initiate MQ interface and send job
            self.argo_job.job_status_routing_key = '%s_job_status' % job.jobId  #'status_' + jobID
            si = SiteInformation()
            mi = MessageInterface()
            mi.host = 'atlasgridftp02.hep.anl.gov'
            mi.port = 5671
            mi.ssl_cert = si.getSSLCertificate(
            )  #'/grid/atlas/hpc/pilot_certs/xrootdsrv-cert.pem'
            proxy_cert_path = si.getSSLCertificate()
            mi.ssl_cert = os.path.dirname(
                proxy_cert_path) + "/rabbitmq-cert.pem"
            if 'X509_USER_CERT' in os.environ.keys():
                mi.ssl_cert = os.environ[
                    'X509_USER_CERT']  #'/users/hpcusers/balsam_dev/gridsecurity/jchilders/xrootdsrv-cert.pem'

            mi.ssl_key = mi.ssl_cert  #'/grid/atlas/hpc/pilot_certs/xrootdsrv-key.pem'
            mi.ssl_key = os.path.dirname(proxy_cert_path) + "/rabbitmq-key.pem"
            if 'X509_USER_KEY' in os.environ.keys():
                mi.ssl_key = os.environ[
                    'X509_USER_KEY']  #'/users/hpcusers/balsam_dev/gridsecurity/jchilders/xrootdsrv-key.pem'

            #mi.ssl_ca_certs = os.path.dirname(proxy_cert_path) + "/rabbitmq-cacerts.pem"
            mi.ssl_ca_certs = '/grid/atlas/hpc/pilot_certs/cacerts.pem'
            #if 'X509_CA_CERTS' in os.environ.keys():
            #    mi.ssl_ca_certs = os.environ['X509_CA_CERTS'] #'/users/hpcusers/balsam_dev/gridsecurity/jchilders/cacerts.pem'
            #tolog("CA certs: %s" % (mi.ssl_ca_certs))
            ca_certs = os.path.dirname(
                proxy_cert_path) + "/rabbitmq-cacerts.pem"
            if os.path.isfile(ca_certs):
                mi.ssl_ca_certs = ca_certs

            mi.exchange_name = 'argo_users'

            #Create queue to get messages about ARGO Job status from MQ
            tolog('Opening connection with MQ')
            mi.open_blocking_connection()
            tolog('Create queue [%s]  to retrieve messages with job status' %
                  self.argo_job.job_status_routing_key)

            mi.create_queue(self.argo_job.job_status_routing_key,
                            self.argo_job.job_status_routing_key)

            # submit ARGO job to MQ

            #tolog('Opening connection with MQ')
            #mi.open_blocking_connection()
            routing_key = 'argo_job'
            if self.dev:
                routing_key = 'argo_job_dev'
            tolog('Sending msg with job to ARGO')
            mi.send_msg(self.argo_job.serialize(), routing_key)
            tolog(' done sending ')

            # Waiting till job done or failed
            ARGO_err_msg = ''
            while True:
                time.sleep(5)
                message = mi.receive_msg(self.argo_job.job_status_routing_key,
                                         True)
                if message[2]:
                    tolog(
                        "Got message from queue [%s]: method [%s], properties [%s], body [ %s ]"
                        % (self.argo_job.job_status_routing_key, message[0],
                           message[1], message[2]))
                    job_status = ArgoJobStatus.get_from_message(message[2])
                    job.hpcStatus = job_status.state
                    rt = RunJobUtilities.updatePilotServer(
                        job, self.getPilotServer(), self.getPilotPort())

                    tolog("Extracted state: %s" % job_status.state)
                    if job_status.state == job_status.HISTORY:
                        res_tuple = (0, "Done")
                        break
                    elif job_status.is_failed():
                        res_tuple = (1, "Failed")
                        ARGO_err_msg = ARGO_err_msg + ' ' + job_status.message
                    elif job_status.state == job_status.FAILED:
                        res_tuple = (1, "Failed")
                        ARGO_err_msg = ARGO_err_msg + ' ' + job_status.message
                        runJob.failJob(1,
                                       0,
                                       job,
                                       ins=job.inFiles,
                                       pilotErrorDiag=ARGO_err_msg)
                        break
                time.sleep(5)

            mi.close()
            tolog(' closing connection to MQ')

            tolog("Job State: %s" % (job_status.state))
            #job.timeExe = int(fork_job.finished - fork_job.started)

            ####################################################

        except Exception, e:
            tolog("!!FAILED!!3000!! Failed to run command %s" % str(e))
            getstatusoutput_was_interrupted = True
            res_tuple = (1, "Failed")
            self.failJob(0,
                         self.__error.ERR_GENERALERROR,
                         job,
                         pilotErrorDiag=str(e))
Beispiel #7
0
class curlSiteMover(SiteMover.SiteMover):
    """ SiteMover for curl """

    copyCommand = "curl"
    checksum_command = "adler32"
    has_mkdir = False
    has_df = False
    has_getsize = False
    has_md5sum = True
    has_chmod = False
    timeout = 3600
    """ get proxy """

    si = SiteInformation()
    sslCert = si.getSSLCertificate()
    sslKey = sslCert
    sslCertDir = si.getSSLCertificatesDirectory()

    def __init__(self, setup_path, *args, **kwrds):
        self._setup = setup_path

    def get_timeout(self):
        return self.timeout

    def check_space(self, ub):
        """ For when space availability is not verifiable """
        return 999999

    def core_get_data(self, envsetup, token, source_surl, dest_path,
                      experiment):
        """ stage-in core function, can be overridden (see stormSiteMover) """

        error = PilotErrors()

        # determine which timeout option to use
        timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout)

        sslCert = self.sslCert
        sslKey = self.sslKey
        sslCertDir = self.sslCertDir

        # used curl options:
        # --cert: <cert[:passwd]> Client certificate file and password (SSL)
        # --capath: <directory> CA directory (made using c_rehash) to verify
        # --location: Follow Location: hints (H)
        # --output: <file> Write output to <file> instead of stdout
        # --cilent: Makes Curl mute
        # --show-error: When used with -s it makes curl show error message if it fails
        # Removed for SL6: --ciphers <list of ciphers> (SSL)  Specifies  which  ciphers  to use in the connection.
        """ define curl command string """
        _cmd_str = 'lcg-gt %s https' % (source_surl)
        try:
            s, o = commands.getstatusoutput(_cmd_str)
            tolog("Executing command: %s" % (_cmd_str))
        except Exception, e:
            tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" %
                  (str(e), s, o))
            o = str(e)
        if s == 0:
            tolog("lcg-gt supported, get http path")
            source_surl = o.strip().split()
            source_surl = source_surl[0]
            _cmd_str = '%s curl --silent --show-error --cacert %s %s --capath %s --cert %s --key %s -L %s -o %s' % (
                envsetup, sslCert, timeout_option, sslCertDir, sslCert, sslKey,
                source_surl, dest_path)
#            _cmd_str = '%s curl --ciphers ALL:NULL --silent --show-error --cacert %s %s --capath %s --cert %s --key %s -L %s -o %s' % (envsetup, sslCert, timeout_option, sslCertDir, sslCert, sslKey, source_surl, dest_path)
        else:
            tolog(
                "lcg-gt not supported, get http path by replacing source_surl")
            _cmd_str = '%s curl --silent --show-error --cacert %s %s --capath %s --cert %s --key %s -L %s -o %s' % (
                envsetup, sslCert, timeout_option, sslCertDir, sslCert, sslKey,
                source_surl, dest_path)
            #            _cmd_str = '%s curl --ciphers ALL:NULL --silent --show-error --cacert %s %s --capath %s --cert %s --key %s -L %s -o %s' % (envsetup, sslCert, timeout_option, sslCertDir, sslCert, sslKey, source_surl, dest_path)
            _cmd_str = _cmd_str.replace("srm://", "https://")
        # add the full stage-out command to the job setup script
        #_cmd_str = _cmd_str.replace("file://", "-o ")

        # get the experiment object
        thisExperiment = getExperiment(experiment)

        to_script = _cmd_str
        to_script = to_script.lstrip(' ')  # remove any initial spaces
        if to_script.startswith('/'):
            to_script = 'source ' + to_script
        thisExperiment.updateJobSetupScript(os.path.dirname(dest_path),
                                            to_script=to_script)

        tolog("Executing command: %s" % (_cmd_str))
        s = -1
        o = '(not defined)'
        t0 = os.times()
        try:
            s, o = commands.getstatusoutput(_cmd_str)
        except Exception, e:
            tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" %
                  (str(e), s, o))
            o = str(e)