Example #1
0
    def setupStageOutHPCEvent(self):
        if self.__job.prodDBlockTokenForOutput is not None and len(
                self.__job.prodDBlockTokenForOutput
        ) > 0 and self.__job.prodDBlockTokenForOutput[0] != 'NULL':
            siteInfo = getSiteInformation(self.getExperiment())
            objectstore_orig = siteInfo.readpar("objectstore")
            #siteInfo.replaceQueuedataField("objectstore", self.__job.prodDBlockTokenForOutput[0])
            espath = getFilePathForObjectStore(filetype="eventservice")
        else:
            #siteInfo = getSiteInformation(self.getExperiment())
            #objectstore = siteInfo.readpar("objectstore")
            espath = getFilePathForObjectStore(filetype="eventservice")
        self.__espath = getFilePathForObjectStore(filetype="eventservice")
        tolog("EventServer objectstore path: " + espath)

        siteInfo = getSiteInformation(self.getExperiment())
        # get the copy tool
        setup = siteInfo.getCopySetup(stageIn=False)
        tolog("Copy Setup: %s" % (setup))

        dsname, datasetDict = self.getDatasets()
        self.__report = getInitialTracingReport(
            userid=self.__job.prodUserID,
            sitename=self.__jobSite.sitename,
            dsname=dsname,
            eventType="objectstore",
            analysisJob=self.__analysisJob,
            jobId=self.__job.jobId,
            jobDefId=self.__job.jobDefinitionID,
            dn=self.__job.prodUserID)
        self.__siteMover = objectstoreSiteMover(setup)
Example #2
0
    def setup(self, experiment=None, surl=None):
        """ setup env """

        if os.environ.get("http_proxy") and hostname and hostname.endswith(
                "bnl.gov"):
            del os.environ['http_proxy']
        if os.environ.get("https_proxy") and hostname and hostname.endswith(
                "bnl.gov"):
            del os.environ['https_proxy']

        si = getSiteInformation(experiment)
        self.os_name = si.getObjectstoresField("os_name",
                                               os_bucket_name="eventservice")
        self.os_endpoint = si.getObjectstoresField(
            "os_endpoint", os_bucket_name="eventservice")
        self.os_bucket_endpoint = si.getObjectstoresField(
            "os_bucket_endpoint", os_bucket_name="eventservice")
        self.public_key = si.getObjectstoresField(
            "os_access_key", os_bucket_name="eventservice")
        self.private_key = si.getObjectstoresField(
            "os_secret_key", os_bucket_name="eventservice")
        if not (self.os_ddmendpoint and self.os_ddmendpoint != ""
                and self.os_bucket_endpoint and self.os_bucket_endpoint != ""):
            tolog("Failed to get S3 objectstore name")
            return PilotErrors.ERR_GETKEYPAIR, "Failed to get S3 objectstore name"

        return 0, ""
Example #3
0
 def getDefaultResources(self):
     siteInfo = getSiteInformation(self.getExperiment())
     catchalls = siteInfo.readpar("catchall")
     values = {}
     for catchall in catchalls.split(","):
         if '=' in catchall:
             values[catchall.split('=')[0]] = catchall.split('=')[1]
     res = {}
     res['queue'] = values.get('queue', 'regular')
     res['mppwidth'] = values.get('mppwidth', 48)
     res['mppnppn'] = values.get('mppnppn', 1)
     res['walltime_m'] = values.get('walltime_m', 30)
     res['ATHENA_PROC_NUMBER'] = values.get('ATHENA_PROC_NUMBER', 23)
     res['max_nodes'] = values.get('max_nodes', 3)
     res['min_walltime_m'] = values.get('min_walltime_m', 20)
     res['max_walltime_m'] = values.get('max_walltime_m', 30)
     res['nodes'] = values.get('nodes', 2)
     res['min_nodes'] = values.get('min_nodes', 2)
     res['cpu_per_node'] = values.get('cpu_per_node', 24)
     res['partition'] = values.get('partition', None)
     res['repo'] = values.get('repo', None)
     res['max_events'] = values.get('max_events', 10000)
     res['initialtime_m'] = values.get('initialtime_m', 15)
     res['time_per_event_m'] = values.get('time_per_event_m', 10)
     res['mode'] = values.get('mode', 'normal')
     res['backfill_queue'] = values.get('backfill_queue', 'regular')
     res['stageout_threads'] = int(values.get('stageout_threads', 4))
     res['copy_input_files'] = values.get('copy_input_files',
                                          'false').lower()
     return res
Example #4
0
 def getDefaultResources(self):
     siteInfo = getSiteInformation(self.getExperiment())
     catchalls = siteInfo.readpar("catchall")
     values = {}
     for catchall in catchalls.split(","):
         if '=' in catchall:
             values[catchall.split('=')[0]] = catchall.split('=')[1]
     res = {}
     res['queue'] = values.get('queue', 'regular')
     res['mppwidth'] = values.get('mppwidth', 48)
     res['mppnppn'] = values.get('mppnppn', 1)
     res['walltime_m'] = values.get('walltime_m', 30)
     res['ATHENA_PROC_NUMBER'] = values.get('ATHENA_PROC_NUMBER', 23)
     res['max_nodes'] = values.get('max_nodes', 3)
     res['min_walltime_m'] = values.get('min_walltime_m', 20)
     res['max_walltime_m'] = values.get('max_walltime_m', 30)
     res['nodes'] = values.get('nodes', 2)
     res['min_nodes'] = values.get('min_nodes', 2)
     res['cpu_per_node'] = values.get('cpu_per_node', 24)
     res['partition'] = values.get('partition', None)
     res['repo'] = values.get('repo', None)
     res['max_events'] = values.get('max_events', 10000)
     res['initialtime_m'] = values.get('initialtime_m', 15)
     res['time_per_event_m'] = values.get('time_per_event_m', 10)
     res['mode'] = values.get('mode', 'normal')
     res['backfill_queue'] = values.get('backfill_queue', 'regular')
     res['stageout_threads'] = int(values.get('stageout_threads', 4))
     res['copy_input_files'] = values.get('copy_input_files', 'false').lower()
     return res
Example #5
0
    def setup(self, experiment=None, surl=None):
        """ setup env """
        try:
            import boto
            import boto.s3.connection
            from boto.s3.key import Key
        except ImportError:
            tolog("Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path")
            sys.path.append('/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/')
            try:
                import boto
                import boto.s3.connection
                from boto.s3.key import Key
            except ImportError:
                tolog("Failed to import boto again. exit")
                return PilotErrors.ERR_UNKNOWN, "Failed to import boto"

        if os.environ.get("http_proxy"):
            del os.environ['http_proxy']
        if os.environ.get("https_proxy"):
            del os.environ['https_proxy']

        si = getSiteInformation(experiment)
        keyPair = None
        if re.search("^s3://.*\.usatlas\.bnl\.gov:8443", surl) != None:
            keyPair = si.getSecurityKey('BNL_ObjectStoreKey', 'BNL_ObjectStoreKey.pub')
        if surl.startswith("s3://s3.amazonaws.com:80"):
            keyPair = si.getSecurityKey('Amazon_ObjectStoreKey', 'Amazon_ObjectStoreKey.pub')
        if keyPair == None or keyPair["publicKey"] == None or keyPair["privateKey"] == None:
            tolog("Failed to get the keyPair for S3 objectstore %s " % (surl))
            return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore"

        self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"])
        return 0, ""
Example #6
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'xrdcp', lfn, guid)

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            alt=alt,
            sitemover=self)  # quick workaround
        if ec != 0:
            reportState = {}
            reportState["clientState"] = tracer_error
            self.prepareReport(reportState, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # get the RSE from ToA
        try:
            _RSE = self.getRSE(surl=surl)
        except Exception, e:
            tolog(
                "Warning: Failed to get RSE: %s (can not add this info to tracing report)"
                % str(e))
Example #7
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        experiment = pdict.get('experiment', '')
        outputDir = pdict.get('outputDir', '')
        os_bucket_id = pdict.get('os_bucket_id', -1)
        timeout = pdict.get('timeout', None)
        if not timeout:
            timeout = self.timeout

        # get the site information object
        si = getSiteInformation(experiment)

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid)

        parsed = urlparse.urlparse(destination)
        scheme = parsed.scheme
        hostname = parsed.netloc.partition(':')[0]
        port = int(parsed.netloc.partition(':')[2])
        report['remoteSite'] = '%s://%s:%s' % (scheme, hostname, port)

        filename = os.path.basename(source)
        surl = destination
        self.log("surl=%s, timeout=%s" % (surl, timeout))
        if "log.tgz" in surl:
            surl = surl.replace(lfn, "%s:%s"%(scope,lfn))
        else:
            report['eventType'] = 'put_es'

        status, output, size, checksum = self.stageOut(source, surl, token, experiment, outputDir=outputDir, timeout=timeout, os_bucket_id=os_bucket_id, report=report)
        if status !=0:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"
            # self.__sendReport(state, report)
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)

        state = "DONE"
        # self.__sendReport(state, report)
        self.prepareReport(state, report)
        return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
Example #8
0
    def setup(self, experiment=None, surl=None):
        """ setup env """
        try:
            import boto
            import boto.s3.connection
            from boto.s3.key import Key
        except ImportError:
            tolog(
                "Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path"
            )
            sys.path.append(
                '/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/'
            )
            try:
                import boto
                import boto.s3.connection
                from boto.s3.key import Key
            except ImportError:
                tolog("Failed to import boto again. exit")
                return PilotErrors.ERR_UNKNOWN, "Failed to import boto"

        if os.environ.get("http_proxy"):
            del os.environ['http_proxy']
        if os.environ.get("https_proxy"):
            del os.environ['https_proxy']

        si = getSiteInformation(experiment)
        os_access_key = si.getObjectstoresField("os_access_key",
                                                "eventservice")
        os_secret_key = si.getObjectstoresField("os_secret_key",
                                                "eventservice")
        if os_access_key and os_access_key != "" and os_secret_key and os_secret_key != "":
            keyPair = si.getSecurityKey(os_secret_key, os_access_key)
        else:
            tolog("Failed to get the keyPair for S3 objectstore")
            return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore"

        os_is_secure = si.getObjectstoresField("os_is_secure", "eventservice")
        self.s3Objectstore = S3ObjctStore(keyPair["privateKey"],
                                          keyPair["publicKey"], os_is_secure)

        #        keyPair = None
        #        if re.search("^s3://.*\.usatlas\.bnl\.gov:8443", surl) != None:
        #            keyPair = si.getSecurityKey('BNL_ObjectStoreKey', 'BNL_ObjectStoreKey.pub')
        #        if re.search("^s3://.*\.cern\.ch:443", surl) != None:
        #            keyPair = si.getSecurityKey('CERN_ObjectStoreKey', 'CERN_ObjectStoreKey.pub')
        #        if surl.startswith("s3://s3.amazonaws.com:80"):
        #            keyPair = si.getSecurityKey('Amazon_ObjectStoreKey', 'Amazon_ObjectStoreKey.pub')
        #        if keyPair == None or keyPair["publicKey"] == None or keyPair["privateKey"] == None:
        #            tolog("Failed to get the keyPair for S3 objectstore %s " % (surl))
        #            return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore"
        #
        #        self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"])
        return 0, ""
Example #9
0
    def setupStageOutHPCEvent(self):
        if self.__job.prodDBlockTokenForOutput is not None and len(self.__job.prodDBlockTokenForOutput) > 0 and self.__job.prodDBlockTokenForOutput[0] != 'NULL':
            siteInfo = getSiteInformation(self.getExperiment())
            objectstore_orig = siteInfo.readpar("objectstore")
            #siteInfo.replaceQueuedataField("objectstore", self.__job.prodDBlockTokenForOutput[0])
            espath = getFilePathForObjectStore(filetype="eventservice")
        else:
            #siteInfo = getSiteInformation(self.getExperiment())
            #objectstore = siteInfo.readpar("objectstore")
            espath = getFilePathForObjectStore(filetype="eventservice")
        self.__espath = getFilePathForObjectStore(filetype="eventservice")
        tolog("EventServer objectstore path: " + espath)

        siteInfo = getSiteInformation(self.getExperiment())
        # get the copy tool
        setup = siteInfo.getCopySetup(stageIn=False)
        tolog("Copy Setup: %s" % (setup))

        dsname, datasetDict = self.getDatasets()
        self.__report = getInitialTracingReport(userid=self.__job.prodUserID, sitename=self.__jobSite.sitename, dsname=dsname, eventType="objectstore", analysisJob=self.__analysisJob, jobId=self.__job.jobId, jobDefId=self.__job.jobDefinitionID, dn=self.__job.prodUserID)
        self.__siteMover = objectstoreSiteMover(setup)
Example #10
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""


        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        jobId = pdict.get('jobId', '')
        jobSetID = pdict.get('jobsetID', '')
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        #token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')
        outputDir = pdict.get('outputDir', '')
        timeout = pdict.get('timeout', None)
        pandaProxySecretKey = pdict.get('pandaProxySecretKey')
        if not timeout:
            timeout = self.timeout

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 's3objectstorepresignedurl', lfn, guid)

        filename = os.path.basename(source)
        surl = destination
        status, output, size, checksum = self.stageOut(source, jobId, lfn, jobSetID, pandaProxySecretKey, experiment, outputDir=outputDir, timeout=timeout)
        if status !=0:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"
            # self.__sendReport(state, report)
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)

        state = "DONE"
        # self.__sendReport(state, report)
        # self.prepareReport(state, report)
        return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """ copy input file from SE to local dir """

        error = PilotErrors()

        # Get input parameters from pdict
        jobId = pdict.get('jobId', '')
        workDir = pdict.get('workDir', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)

        # try to get the direct reading control variable (False for direct reading mode; file should not be copied)
        useCT = pdict.get('usect', True)
        prodDBlockToken = pdict.get('access', '')

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn,
                                           guid)

        if path == '': path = './'
        fullname = os.path.join(path, lfn)

        # get the site information object
        si = getSiteInformation(experiment)
        ret_path = si.getCopyPrefixPathNew(gpfn, stageIn=True)
        if not ret_path.startswith("s3:"):
            errorLog = "Failed to use copyprefix to convert the current path to S3 path."
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            status = PilotErrors.ERR_STAGEINFAILED
            state = "PSTAGE_FAIL"
            output = errorLog
        else:
            gpfn = ret_path
            status, output = self.stageIn(gpfn, fullname, fsize, fchecksum,
                                          experiment)

        if status == 0:
            updateFileState(lfn,
                            workDir,
                            jobId,
                            mode="file_state",
                            state="transferred",
                            type="input")
            state = "DONE"
        else:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"

        self.prepareReport(state, report)
        return status, output
Example #12
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        logPath = pdict.get('logPath', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'],
                                           'xrootdObjectstore', lfn, guid)

        filename = os.path.basename(source)

        if logPath != "":
            surl = logPath
        else:
            surl = os.path.join(destination, lfn)

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=surl)
        except Exception, e:
            tolog(
                "Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)"
                % str(e))
Example #13
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 's3objectstore',
                                           lfn, guid)

        filename = os.path.basename(source)
        surl = destination
        status, output, size, checksum = self.stageOut(source, surl, token,
                                                       experiment)
        if status != 0:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)

        state = "DONE"
        self.prepareReport(state, report)
        return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
Example #14
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""


        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')
        outputDir = pdict.get('outputDir', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid)


        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, sitemover=self) # quick workaround
        if ec != 0:
            reportState = {}
            reportState["clientState"] = tracer_error
            self.prepareReport(reportState, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # get the RSE from ToA
        try:
            _RSE = self.getRSE(surl=surl)
        except Exception, e:
            tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
Example #15
0
    def setup(self, experiment):
        """ setup env """
        if self.__isSetuped:
            return 0, None
        self.__experiment = experiment
        thisExperiment = getExperiment(experiment)
        self.useTracingService = thisExperiment.useTracingService()
        si = getSiteInformation(experiment)
        self._defaultSetup = self.getLocalROOTSetup(si)

        _setupStr = self._defaultSetup  #self.getSetup()

        # get the user proxy if available
        envsetupTest = _setupStr.strip()
        if envsetupTest != "" and not envsetupTest.endswith(';'):
            envsetupTest += ";"
        if os.environ.has_key('X509_USER_PROXY'):
            envsetupTest += " export X509_USER_PROXY=%s;" % (
                os.environ['X509_USER_PROXY'])

        self.log("to verify site setup: %s " % envsetupTest)
        status, output = self.verifySetup(envsetupTest, experiment)
        self.log("site setup verifying: status: %s, output: %s" %
                 (status, output["errorLog"]))
        if status == 0:
            self._setup = envsetupTest
            self.__isSetuped = True
            return status, output
        else:
            if self._defaultSetup:
                #try to use default setup
                self.log("Try to use default envsetup")
                envsetupTest = self._defaultSetup.strip()
                if envsetupTest != "" and not envsetupTest.endswith(';'):
                    envsetupTest += ";"
                if os.environ.has_key('X509_USER_PROXY'):
                    envsetupTest += " export X509_USER_PROXY=%s;" % (
                        os.environ['X509_USER_PROXY'])

                self.log("verify default setup: %s " % envsetupTest)
                status, output = self.verifySetup(envsetupTest, experiment)
                self.log("default setup verifying: status: %s, output: %s" %
                         (status, output["errorLog"]))
                if status == 0:
                    self._setup = envsetupTest
                    self.__isSetuped = True
                    return status, output

        return status, output
Example #16
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""


        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid)


        filename = os.path.basename(source)
        surl = destination
        status, output, size, checksum = self.stageOut(source, surl, token, experiment)
        if status !=0:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)

        state = "DONE"
        self.prepareReport(state, report)
        return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""


        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        logPath = pdict.get('logPath', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'xrootdObjectstore', lfn, guid)


        filename = os.path.basename(source)

        if logPath != "":
            surl = logPath
        else:
            surl = os.path.join(destination, lfn)

        # get the RSE from ToA
        try:
            _RSE = self.getRSE(surl=surl)
        except Exception, e:
            tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
Example #18
0
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """ copy input file from SE to local dir """

        error = PilotErrors()

        # Get input parameters from pdict
        jobId = pdict.get('jobId', '')
        workDir = pdict.get('workDir', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        os_bucket_id = pdict.get('os_bucket_id', -1)

        # try to get the direct reading control variable (False for direct reading mode; file should not be copied)
        useCT = pdict.get('usect', True)
        prodDBlockToken = pdict.get('access', '')

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid)

        if path == '': path = './'
        fullname = os.path.join(path, lfn)

        # get the site information object
        si = getSiteInformation(experiment)
        ret_path = si.getCopyPrefixPathNew(gpfn, stageIn=True)
        if not ret_path.startswith("s3:"):
            errorLog = "Failed to use copyprefix to convert the current path to S3 path."
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            status = PilotErrors.ERR_STAGEINFAILED
            state = "PSTAGE_FAIL"
            output = errorLog
        else:
            gpfn = ret_path
            status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment, os_bucket_id=os_bucket_id)

        if status == 0:
            updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input")
            state = "DONE"
        else:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"

        self.prepareReport(state, report)
        return status, output
Example #19
0
    def setup(self, experiment):
        """ setup env """
        if self.__isSetuped:
            return 0, None
        self.__experiment = experiment
        thisExperiment = getExperiment(experiment)
        self.useTracingService = thisExperiment.useTracingService()
        si = getSiteInformation(experiment)
        self._defaultSetup = self.getLocalROOTSetup(si)

        _setupStr = self._defaultSetup #self.getSetup()

        # get the user proxy if available
        envsetupTest = _setupStr.strip()
        if envsetupTest != "" and not envsetupTest.endswith(';'):
            envsetupTest += ";"
        if os.environ.has_key('X509_USER_PROXY'):
            envsetupTest += " export X509_USER_PROXY=%s;" % (os.environ['X509_USER_PROXY'])

        self.log("to verify site setup: %s " % envsetupTest)
        status, output = self.verifySetup(envsetupTest, experiment)
        self.log("site setup verifying: status: %s, output: %s" % (status, output["errorLog"]))
        if status == 0:
            self._setup = envsetupTest
            self.__isSetuped = True
            return status, output
        else:
            if self._defaultSetup:
                #try to use default setup
                self.log("Try to use default envsetup")
                envsetupTest = self._defaultSetup.strip()
                if envsetupTest != "" and not envsetupTest.endswith(';'):
                     envsetupTest += ";"
                if os.environ.has_key('X509_USER_PROXY'):
                     envsetupTest += " export X509_USER_PROXY=%s;" % (os.environ['X509_USER_PROXY'])

                self.log("verify default setup: %s " % envsetupTest)
                status, output = self.verifySetup(envsetupTest, experiment)
                self.log("default setup verifying: status: %s, output: %s" % (status, output["errorLog"]))
                if status == 0:
                    self._setup = envsetupTest
                    self.__isSetuped = True
                    return status, output

        return status, output
    def setup(self, experiment=None, surl=None):
        """ setup env """

        if os.environ.get("http_proxy") and hostname and hostname.endswith("bnl.gov"):
             del os.environ['http_proxy']
        if os.environ.get("https_proxy") and hostname and hostname.endswith("bnl.gov"):
             del os.environ['https_proxy']

        si = getSiteInformation(experiment)
        self.os_name = si.getObjectstoresField("os_name", os_bucket_name="eventservice")
        self.os_endpoint = si.getObjectstoresField("os_endpoint", os_bucket_name="eventservice")
        self.os_bucket_endpoint = si.getObjectstoresField("os_bucket_endpoint", os_bucket_name="eventservice")
        self.public_key = si.getObjectstoresField("os_access_key", os_bucket_name="eventservice")
        self.private_key = si.getObjectstoresField("os_secret_key", os_bucket_name="eventservice")
        if not (self.os_ddmendpoint and self.os_ddmendpoint != "" and self.os_bucket_endpoint and self.os_bucket_endpoint != ""):
            tolog("Failed to get S3 objectstore name")
            return PilotErrors.ERR_GETKEYPAIR, "Failed to get S3 objectstore name"

        return 0, ""
Example #21
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """
        Move the file from the current local directory to the local pilot init dir

        Parameters are:
        source -- full path of the file in  local directory
        destinaion -- destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) NOT USED (pinitdir is used instead)
        fsize -- file size of the source file (evaluated if 0)
        fchecksum -- MD5 checksum of the source file (evaluated if 0)
        pdict -- to allow additional parameters that may make sense with specific movers
        
        Assume that the pilot init dir is locally mounted and its local path is the same as the remote path
        if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them
        returns: exitcode, pilotErrorDiag, gpfn, fsize, fchecksum
        """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        DN = pdict.get('DN', '')
        dsname = pdict.get('dsname', '')
        analJob = pdict.get('analJob', False)
        sitename = pdict.get('sitename', '')
        testLevel = pdict.get('testLevel', '0')
        pilot_initdir = pdict.get('pinitdir', '')
        experiment = pdict.get('experiment', "ATLAS")

        # get the site information object
        si = getSiteInformation(experiment)

        # are we on a tier 3?
        if si.isTier3():
            outputDir = self.getTier3Path(dsname, DN)
            tolog("Writing output on a Tier 3 site to: %s" % (outputDir))

            # create the dirs if they don't exist
            try:
                self.mkdirWperm(outputDir)
            except Exception, e:
                tolog("!!WARNING!!2999!! Could not create dir: %s, %s" %
                      (outputDir, str(e)))
Example #22
0
    def setup(self, experiment=None, surl=None):
        """ setup env """
        # unset proxy for BNL object store (direct access only). Todo: set/unset proxy before/after access to bnl.org
        if os.environ.get("http_proxy") and hostname and hostname.endswith("bnl.gov"):
             del os.environ['http_proxy']
        if os.environ.get("https_proxy") and hostname and hostname.endswith("bnl.gov"):
             del os.environ['https_proxy']

        si = getSiteInformation(experiment)
        # not used here: self.os_name = si.getObjectstoresField("os_name", os_bucket_name="eventservice") 
        
        self.os_endpoint = si.getObjectstoresField("os_endpoint", os_bucket_name="eventservice")
        if not self.os_endpoint or self.os_endpoint == "" :
            logStr = "Failed to get os_endpoint value"
            tolog(logStr)
            return PilotErrors.ERR_GETKEYPAIR, logStr
        if self.os_endpoint.endswith("/"):
            self.os_endpoint=self.os_endpoint[:-1]
 
        self.os_bucket_endpoint = si.getObjectstoresField("os_bucket_endpoint", os_bucket_name="eventservice")
        if not ( self.os_bucket_endpoint and self.os_bucket_endpoint != "" ) :
            logStr = "Failed to get os_bucket_endpoint value"
            tolog(logStr)
            return PilotErrors.ERR_GETKEYPAIR, logStr
        if self.os_bucket_endpoint.endswith("/"):
            self.os_bucket_endpoint=self.os_bucket_endpoint[:-1]
        if self.os_bucket_endpoint.startswith("/"):
            self.os_bucket_endpoint=self.os_bucket_endpoint[1:]
        
        self.public_key = si.getObjectstoresField("os_access_key", os_bucket_name="eventservice")
        if not ( self.public_key and self.public_key != "" ) :
            logStr = "Failed to get os_access_key (os public key) value"
            tolog(logStr)
            return PilotErrors.ERR_GETKEYPAIR, logStr
        
        self.private_key = si.getObjectstoresField("os_secret_key", os_bucket_name="eventservice")
        if not ( self.private_key and self.private_key != "" ) :
            logStr = "Failed to get os_secret_key (os private key) value"
            tolog(logStr)
            return PilotErrors.ERR_GETKEYPAIR, logStr
       
        return 0, ""
Example #23
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """
        Move the file from the current local directory to the local pilot init dir

        Parameters are:
        source -- full path of the file in  local directory
        destinaion -- destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) NOT USED (pinitdir is used instead)
        fsize -- file size of the source file (evaluated if 0)
        fchecksum -- MD5 checksum of the source file (evaluated if 0)
        pdict -- to allow additional parameters that may make sense with specific movers
        
        Assume that the pilot init dir is locally mounted and its local path is the same as the remote path
        if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them
        returns: exitcode, pilotErrorDiag, gpfn, fsize, fchecksum
        """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        DN = pdict.get('DN', '')
        dsname = pdict.get('dsname', '')
        analJob = pdict.get('analJob', False)
        sitename = pdict.get('sitename', '')
        testLevel = pdict.get('testLevel', '0')
        pilot_initdir = pdict.get('pinitdir', '')
        experiment = pdict.get('experiment', "ATLAS")

        # get the site information object
        si = getSiteInformation(experiment)

        # are we on a tier 3?
        if si.isTier3():
            outputDir = self.getTier3Path(dsname, DN)
            tolog("Writing output on a Tier 3 site to: %s" % (outputDir))

            # create the dirs if they don't exist
            try:
                self.mkdirWperm(outputDir)
            except Exception, e:
                tolog("!!WARNING!!2999!! Could not create dir: %s, %s" % (outputDir, str(e)))
Example #24
0
    def setup(self, experiment=None, surl=None, os_bucket_id=-1, label='r'):
        """ setup env """
        if not self.__isBotoLoaded:
            try:
                import boto
                import boto.s3.connection
                from boto.s3.key import Key
                self.__isBotoLoaded = True
            except ImportError:
                tolog("Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path")
                sys.path.append('/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/')
                try:
                    import boto
                    import boto.s3.connection
                    from boto.s3.key import Key
                    self.__isBotoLoaded = True
                except ImportError:
                    tolog("Failed to import boto again. exit")
                    return PilotErrors.ERR_UNKNOWN, "Failed to import boto"

        si = getSiteInformation(experiment)
        # os_bucket_id will only be set if the setup function is called, if setup via the init function - get the default bucket id
        if os_bucket_id == -1:
            ddmendpoint = si.getObjectstoreDDMEndpoint(os_bucket_name='eventservice') # assume eventservice
        else:
            ddmendpoint = si.getObjectstoreDDMEndpointFromBucketID(os_bucket_id)
        endpoint_id = si.getObjectstoreEndpointID(ddmendpoint=ddmendpoint, label=label, protocol='s3')
        os_access_key, os_secret_key, os_is_secure = si.getObjectstoreKeyInfo(endpoint_id, ddmendpoint=ddmendpoint)

        if os_access_key and os_access_key != "" and os_secret_key and os_secret_key != "":
            keyPair = si.getSecurityKey(os_secret_key, os_access_key)
            if "privateKey" not in keyPair or keyPair["privateKey"] is None:
                tolog("Failed to get the keyPair for S3 objectstore")
                return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore"
        else:
            tolog("Failed to get the keyPair name for S3 objectstore")
            return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair name for S3 objectstore"

        self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"], os_is_secure, self._useTimerCommand)

        return 0, ""
Example #25
0
    def getSpecialSetupCommand(self):
        """ Set special_setup_cmd if necessary """

        # Note: this special setup command is hardly used and could probably be removed
        # in case any special setup should be added to the setup string before the trf is executed, the command defined in this method
        # could be added to the run command by using method addSPSetupToCmd().
        # the special command is also forwarded to the get and put functions (currently not used)

        special_setup_cmd = ""

        # add envsetup to the special command setup on tier-3 sites
        # (unknown if this is still needed)

        si = getSiteInformation(self.__experiment)
        if si.isTier3():
            _envsetup = readpar('envsetup')
            if _envsetup != "":
                special_setup_cmd += _envsetup
                if not special_setup_cmd.endswith(';'):
                    special_setup_cmd += ";"

        return special_setup_cmd
Example #26
0
    def getSpecialSetupCommand(self):
        """ Set special_setup_cmd if necessary """

        # Note: this special setup command is hardly used and could probably be removed
        # in case any special setup should be added to the setup string before the trf is executed, the command defined in this method
        # could be added to the run command by using method addSPSetupToCmd().
        # the special command is also forwarded to the get and put functions (currently not used)

        special_setup_cmd = ""

        # add envsetup to the special command setup on tier-3 sites
        # (unknown if this is still needed)

        si = getSiteInformation(self.__experiment)
        if si.isTier3():
            _envsetup = readpar('envsetup')
            if _envsetup != "":
                special_setup_cmd += _envsetup
                if not special_setup_cmd.endswith(';'):
                    special_setup_cmd += ";"

        return special_setup_cmd
Example #27
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        proxycheck = pdict.get('proxycheck', False)
        experiment = pdict.get('experiment', '')
        analysisJob = pdict.get('analJob', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the DQ2 tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'curl'
            # mark the relative start
            report['catStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-','')

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32")
            if ec != 0:
                self.__sendReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        #if proxycheck:
        #    s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2)
        #    if s != 0:
        #        self.__sendReport('NO_PROXY', report)
        #        return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        #else:
        #    tolog("Proxy verification turned off")
        tolog("Proxy verification turned off")

        filename = os.path.basename(source)
        
        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope)
        if ec != 0:
            self.__sendReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)
	#here begins the new magic... from Vincenzo Lavorini
        sitemover = SiteMover.SiteMover()
        v_path = sitemover.getPathFromScope(scope, filename)
        rucio_c = Client()
	if "ATLAS" in token:
	   token_ok=token[+5:]
	else:
	   token_ok=token
        local_se_token=self.site_name+"_"+token_ok
        v_hostname= [j['hostname'] for j in rucio_c.get_protocols(local_se_token)]
        v_port= [j['port'] for j in rucio_c.get_protocols(local_se_token)]
        v_prefix= [j['prefix'] for j in rucio_c.get_protocols(local_se_token)]
        v_address= "https://%s:%s%s"%(v_hostname[0],v_port[0],v_prefix[0])
        tolog("prova1 address is %s" % (v_address))
        if "rucio/" in v_address  and "/rucio" in v_path:
           v_address=v_address[:-7]
           tolog("prova2 address is %s" % (v_address))
        elif "rucio" in v_address and "rucio" in v_path :
           v_address=v_address[:-6]
           tolog("prova3 address is %s" % (v_address))
        full_http_surl=v_address+v_path
        tolog("prova3 full_http__surl is %s" % (full_http_surl))

        full_surl =surl 
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
	#putfile=surl
        #tolog("putfile: %s" % (putfile))
        #tolog("full_surl: %s" % (full_surl))

        # get https surl
        #full_http_surl = full_surl.replace("srm://", "https://")
        
        # get the DQ2 site name from ToA ---why? Is it needed?
        #try:
        #    _dq2SiteName = self.getDQ2SiteName(surl=putfile)
        #except Exception, e:
        #    tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
        #else:
        #    report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
        #    tolog("DQ2 site name: %s" % (_dq2SiteName))

        if testLevel == "1":
            source = "thisisjustatest"

        # determine which timeout option to use
        #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout)
        timeout_option = "--connect-timeout 300"

        sslCert = self.sslCert
        sslKey = self.sslKey
        sslCertDir = self.sslCertDir

        # check htcopy if it is existed or env is set properly
        #_cmd_str = 'which htcopy'
        #try:
        #    s, o = commands.getstatusoutput(_cmd_str)
        #except Exception, e:
        #    tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o))
        #    o = str(e)
        
        #if s != 0:
        #    tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str))
        #    o = o.replace('\n', ' ')
        #    tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o)))
            #return 999999

        # cleanup the SURL if necessary (remove port and srm substring)
        #if token:
            # used lcg-cp options:
            # --srcsetype: specify SRM version
            #   --verbose: verbosity on
            #        --vo: specifies the Virtual Organization the user belongs to
            #          -s: space token description
            #          -b: BDII disabling
            #          -t: time-out
            # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC
            #          -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally
            #          -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case,
            #              the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is
            #              generated in the same format as with the Replica Manager
            # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\
            #           (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn)
            # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file]
            #               [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc]
            #               [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2]
            #               [-b,--nobdii] [-t timeout] [-v,--verbose]  [-V,--vo vo] [--version] src_file  dest_file

            # surl = putfile[putfile.index('srm://'):]
            #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token)
            #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl)
        #else:
            # surl is the same as putfile
            #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl)
            #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl)
        _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % (self.sslKey,self.sslKey,self.sslKey,self.sslCertDir,full_http_surl, source)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        _cmd=Popen(_cmd_str,stdout=PIPE,stderr=PIPE, shell=True )
	_cmd_out, _cmd_stderr= _cmd.communicate()
        report['relativeStart'] = time()
        report['transferStart'] =  time()
        report['validateStart'] = time()
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Curl command output = %s" % (_cmd_out))
        tolog("Command finished after %f s" % (t))
	if "bytes uploaded" not in _cmd_out:
            tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str))
        '''
            # check if file was partially transferred, if so, remove it
            _ec = self.removeFile(envsetup, self.timeout, dst_gpfn)
            if _ec == -2:
                pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out

            if "Could not establish context" in o:
                pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired"
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.__sendReport('CONTEXT_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
            elif "No such file or directory" in o:
                pilotErrorDiag += "No such file or directory: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.__sendReport('NO_FILE_DIR', report)
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            elif "globus_xio: System error" in o:
                pilotErrorDiag += "Globus system error: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.__sendReport('GLOBUS_FAIL', report)
                return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag)
            else:
                if len(o) == 0 and t >= self.timeout:
                    pilotErrorDiag += "Copy command self timed out after %d s" % (t)
                    tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                    self.__sendReport('CP_TIMEOUT', report)
                    return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag)
                else:
                    if len(o) == 0:
                        pilotErrorDiag += "Copy command returned error code %d but no output" % (ec)
                    else:
                        pilotErrorDiag += o
                    self.__sendReport('CP_ERROR', report)
                    return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
	'''
        verified = False
	#getting the remote checksum from Rucio:
	token_file=open('token_fle', 'r')
        token_rucio=token_file.readline()
	pos2print=token_rucio.find("CN")
        token_rucio2print=token_rucio[:pos2print]+'(Hidden token)'
        tolog("Token I am using: %s" %(token_rucio2print))
        httpredirector = readpar('httpredirector')

	trial_n=1
	remote_checksum="none"
	while (remote_checksum == "none" and trial_n<8):
	   trial_n+=1
           if not httpredirector:
               #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename)
               cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip "%(token_rucio,scope,filename)
               cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip "%(token_rucio2print,scope,filename)
           else:
               if "http" in httpredirector:
                   tolog("HTTP redirector I am using: %s" %(httpredirector))
                   cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip "%(token_rucio,httpredirector,scope,filename)
                   cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip "%(token_rucioi2print,httpredirector,scope,filename)
               else:
                   tolog("HTTP redirector I am using: %s" %(httpredirector))
                   cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip "%(token_rucio,httpredirector,reps[0].scope,reps[0].filename)
                   cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip "%(token_rucio2print,httpredirector,reps[0].scope,reps[0].filename)
   
           tolog("Getting remote checksum: command to be executed: %s" %(cmd2print))
           checksum_cmd=Popen(cmd, stdout=PIPE,stderr=PIPE, shell=True)
           remote_checksum, stderr=checksum_cmd.communicate()
           tolog("Remote checksum as given by rucio %s" %(remote_checksum))
	   if (remote_checksum == "none"):
               tolog("In checking checksum: command std error: %s" %(stderr))
               pilotErrorDiag = "Cannot get the checksum of file on SE"
               tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag))
               tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n))
	       time.sleep(3) 
               # try to get the remote checksum with lcg-get-checksum
               #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl)
               #if not remote_checksum:
               #    # try to grab the remote file info using lcg-ls command
               #    remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl)
               #else:
               #    tolog("Setting remote file size to None (not needed)")
               #    remote_fsize = None

        # compare the checksums if the remote checksum was extracted
        tolog("Remote checksum: %s" % str(remote_checksum))
        tolog("Local checksum: %s" % (fchecksum))

        if remote_checksum:
            if remote_checksum != fchecksum:
                pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                 (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum)
                tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag))
                if csumtype == "adler32":
                    self.__sendReport('AD_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl)
                else:
                    self.__sendReport('MD5_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl)
            else:
                tolog("Remote and local checksums verified")
                verified = True
        else:
            tolog("Skipped primary checksum verification (remote checksum not known)")

        # if lcg-ls could not be used
        if "/pnfs/" in surl and not remote_checksum:
            # for dCache systems we can test the checksum with the use method
            tolog("Detected dCache system: will verify local checksum with the local SE checksum")
            # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....
            path = surl[surl.find('/pnfs/'):]
            # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....#
            tolog("File path: %s" % (path))

            _filename = os.path.basename(path)
            _dir = os.path.dirname(path)

            # get the remote checksum
            tolog("Local checksum: %s" % (fchecksum))
            try:
                remote_checksum = self.getdCacheChecksum(_dir, _filename)
            except Exception, e:
                pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str(e)
                tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            else:
                if remote_checksum == "NOSUCHFILE":
                    pilotErrorDiag = "The pilot will fail the job since the remote file does not exist"
                    tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
                    self.__sendReport('NOSUCHFILE', report)
                    return self.put_data_retfail(error.ERR_NOSUCHFILE, pilotErrorDiag)
                elif remote_checksum:
                    tolog("Remote checksum: %s" % (remote_checksum))
                else:
                    tolog("Could not get remote checksum")

            if remote_checksum:
                if remote_checksum != fchecksum:
                    pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                     (csumtype, _filename, remote_checksum, fchecksum)
                    if csumtype == "adler32":
                        self.__sendReport('AD_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl)
                    else:
                        self.__sendReport('MD5_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl)
                else:
                    tolog("Remote and local checksums verified")
                    verified = True
Example #28
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'lcg2', lfn, guid)

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32")
            if ec != 0:
                self.__sendReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        if alt:
            # use a cvmfs setup for stage-out to alternative SE
            envsetup = si.getLocalEMISetup()
            if envsetup[-1] != ";":
                envsetup += "; "
        else:
            envsetup = self.getEnvsetup(alt=alt)

        ec, pilotErrorDiag = verifySetupCommand(error, envsetup)
        if ec != 0:
            self.__sendReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag) 

        # get the experiment object
        thisExperiment = getExperiment(experiment)

        if proxycheck:
            s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2)
            if s != 0:
                self.__sendReport('NO_PROXY', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        else:
            tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt)
        if ec != 0:
            self.__sendReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        putfile = surl
        full_surl = putfile
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
        tolog("putfile = %s" % (putfile))
        tolog("full_surl = %s" % (full_surl))

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=putfile)
        except Exception, e: 
            tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
Example #29
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from local dir to SE and register into dataset and catalogues """

        # Get input parameters from pdict
        # Mancinelli: added sitename and appid variable 
        sitename = pdict.get('sitename', '')
        appid = pdict.get('report').get('appid','')

        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        dsname = pdict.get('dsname', '')
        workDir = pdict.get('workDir', '')
        analyJob = pdict.get('analJob', False)
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analyJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analyJob = False

        # get the DQ2 tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'local'
            # mark the relative start
            report['relativeStart'] = time()
            # the current file
            report['filename'] = lfn
            report['guid'] = guid.replace('-','')
#            report['dataset'] = dsname

        filename = os.path.basename(source)

        # get the local file size and checksum
        csumtype = self.checksum_command
        if fsize == 0 or fchecksum == 0:
            ec, self.__pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype=csumtype)
            if ec != 0:
                self.__sendReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, self.__pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        ec, pilotErrorDiag = verifySetupCommand(self.__error, envsetup)
        if ec != 0:
            self.__sendReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag) 

        #Mancinelli: TODO change. This is a Hack.. need to undrestand how to get Job data in a proper manner
        #JobData=  '%s/Job_%s.py' % (os.path.dirname(source), appid)
        JobData= '%s/jobState-%s-test.pickle' % (os.path.dirname(source), appid)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(self.__error, analyJob, token, prodSourceLabel, dsname, filename, sitename, JobData)
        if ec != 0:
            self.__sendReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        dst_gpfn = surl
        tolog("dst_gpfn: %s" % (dst_gpfn))

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=dst_gpfn)
        except Exception, e:
            tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
Example #30
0
    def put_data(self, pfn, destination, fsize=0, fchecksum=0, dsname='', extradirs='', **pdict):
        """ copy output file from disk to local SE """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        logFile = pdict.get('logFile', '')
        sitename = pdict.get('sitename', '')
        proxycheck = pdict.get('proxycheck', False)
        experiment = pdict.get('experiment', '')
        analysisJob = pdict.get('analJob', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        filename = pfn.split('/')[-1]

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'lcg', lfn, guid)

        # is the dataset defined?
        if dsname == '':
            pilotErrorDiag = "Dataset name not specified to put_data"
            tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            self.__sendReport('DSN_UNDEF', report)
            return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(pfn, csumtype="adler32")
            if ec != 0:
                self.__sendReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag) 

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        ec, pilotErrorDiag = verifySetupCommand(error, envsetup)
        if ec != 0:
            self.__sendReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag) 

        # do we need to check the user proxy?
        if proxycheck:
            s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2)
            if s != 0:
                self.__sendReport('PROXY_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        else:
            tolog("Proxy verification turned off")

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename)
        if ec != 0:
            self.__sendReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        lfclfn = os.path.join(lfcdir, lfn)
        # LFC LFN = /grid/atlas/dq2/testpanda/testpanda.destDB.dfb45803-1251-43bb-8e7a-6ad2b6f205be_sub01000492/
        #364aeb74-8b62-4c8f-af43-47b447192ced_0.job.log.tgz

        # putfile is the SURL
        putfile = surl
        full_surl = putfile
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
        tolog("putfile = %s" % (putfile))
        tolog("full_surl = %s" % (full_surl))

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=putfile)
        except:
            # WARNING: do not print the exception here since it can sometimes not be converted to a string! (problem seen at Taiwan)
            tolog("Warning: Failed to get the DQ2 site name (can not add this info to tracing report)")
        else:
            report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
            tolog("DQ2 site name: %s" % (_dq2SiteName))

        # get the absolute (full) path to the file
        fppfn = os.path.abspath(pfn)
        tolog("pfn=%s" % (pfn))

        cmd = '%s echo "LFC_HOST=$LFC_HOST"; lfc-mkdir -p %s' % (envsetup, lfcdir)
        # export LFC_HOST=lfc0448.gridpp.rl.ac.uk ; echo "LFC_HOST=$LFC_HOST";
        #lfc-mkdir -p /grid/atlas/dq2/testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647
        tolog("Executing command: %s" % (cmd))
        s, o = commands.getstatusoutput(cmd)
        if s == 0:
            tolog("LFC setup and mkdir succeeded")
            tolog("Command output: %s" % (o))
        else:
            tolog("!!WARNING!!2990!! LFC setup and mkdir failed. Status=%s Output=%s" % (s, o))
            if o == "Could not establish context":
                pilotErrorDiag = "Could not establish context: Proxy / VO extension of proxy has probably expired"
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.dumpExtendedProxy(envsetup)
                self.__sendReport('CONTEXT_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
            else:
                pilotErrorDiag = "LFC setup and mkdir failed: %s" % (o)
                self.__sendReport('LFC_SETUP_FAIL', report)
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

        # determine which timeout option to use
        if self.isNewLCGVersion("%s lcg-cr" % (envsetup)):
            timeout_option = "--srm-timeout=%d --connect-timeout=300 --sendreceive-timeout=%d" % (self.timeout, self.timeout)
        else:
            timeout_option = "-t %d" % (self.timeout)

        # used lcg-cr options:
        # --verbose: verbosity on
        #      --vo: specifies the Virtual Organization the user belongs to
        #        -T: specify SRM version
        #        -s: space token description
        #        -b: BDII disabling
        #        -t: time-out
        #        -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC
        #        -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally
        #        -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case,
        #            the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is
        #            generated in the same format as with the Replica Manager
        if token:
            surl = putfile[putfile.index('srm://'):]
            _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas -T srmv2 -s %s -b %s -l %s -g %s -d %s file:%s' % (envsetup, token, timeout_option, lfclfn, guid, surl, fppfn)
        else:
            surl = putfile
            _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas %s -l %s -g %s -d %s file:%s' % (envsetup, timeout_option, lfclfn, guid, surl, fppfn)
        
        tolog("Executing command: %s" % (_cmd_str))
        s = -1
        t0 = os.times()
        report['relativeStart'] = time()
        report['transferStart'] =  time()
        try:
            s, o = commands.getstatusoutput(_cmd_str)
        except Exception, e:
            tolog("!!WARNING!!2990!! Exception caught: %s" % (str(e)))
            o = str(e)
Example #31
0
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """        
        Perform the move and, check size and md5sum correctness.
        Parameters are: 
        gpfn -- full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) - NOT USED (pinitdir replaces it)
        path -- destination absolute path (in a local file system). It is assumed to be there. get_data returns an error if the path is missing
        Return the status of the transfer. In case of failure it should remove the partially copied destination
        """
        # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn'
        # loc_... are the variables used to access the file in the locally exported file system
        # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename
        # dest vars: path

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        pilot_initdir = pdict.get('pinitdir', '')
        experiment = pdict.get('experiment', "ATLAS")

        # get the site information object
        si = getSiteInformation(experiment)

        if si.isTier3():
            inputDir = os.path.dirname(gpfn)
        else:
            inputDir = pdict.get('inputDir', '')
        if inputDir == "":
            tolog("Get function will use pilot launch dir as input file dir: %s" % (pilot_initdir))
            inputDir = pilot_initdir
        else:
            tolog("Get function will use requested input file dir: %s" % (inputDir))

        if inputDir == "":
            pilotErrorDiag = "Input dir not set (can not figure out where the input files are)"
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            return error.ERR_STAGEINFAILED, pilotErrorDiag

        src_loc_pfn = os.path.join(inputDir, lfn)
        src_loc_filename = lfn
        dest_file = os.path.join(path, src_loc_filename)

        # verify that the file exists
        if not os.path.exists(src_loc_pfn):
            pilotErrorDiag = "No such file or directory: %s" % (src_loc_pfn)
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            if src_loc_pfn.find("DBRelease") >= 0:
                ec = error.ERR_MISSDBREL
            else:
                ec = error.ERR_NOSUCHFILE
            return ec, pilotErrorDiag

        # make a symbolic link to the input file in the job work dir
        cmd = "ln -s %s %s" % (src_loc_pfn, dest_file)
        tolog("Executing command: %s" % (cmd))
        ec, rv = commands.getstatusoutput(cmd)
        if ec != 0:
            pilotErrorDiag = "Error linking the file: %d, %s" % (ec, rv)
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            return error.ERR_STAGEINFAILED, pilotErrorDiag

        return 0, pilotErrorDiag
Example #32
0
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """        
        Perform the move and, check size and md5sum correctness.
        Parameters are: 
        gpfn -- full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) - NOT USED (pinitdir replaces it)
        path -- destination absolute path (in a local file system). It is assumed to be there. get_data returns an error if the path is missing
        Return the status of the transfer. In case of failure it should remove the partially copied destination
        """
        # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn'
        # loc_... are the variables used to access the file in the locally exported file system
        # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename
        # dest vars: path

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        pilot_initdir = pdict.get('pinitdir', '')
        experiment = pdict.get('experiment', "ATLAS")

        # get the site information object
        si = getSiteInformation(experiment)

        if si.isTier3():
            inputDir = os.path.dirname(gpfn)
        else:
            inputDir = pdict.get('inputDir', '')
        if inputDir == "":
            tolog("Get function will use pilot launch dir as input file dir: %s" % (pilot_initdir))
            inputDir = pilot_initdir
        else:
            tolog("Get function will use requested input file dir: %s" % (inputDir))

        if inputDir == "":
            pilotErrorDiag = "Input dir not set (can not figure out where the input files are)"
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            return error.ERR_STAGEINFAILED, pilotErrorDiag

        src_loc_pfn = os.path.join(inputDir, lfn)
        src_loc_filename = lfn
        dest_file = os.path.join(path, src_loc_filename)

        # verify that the file exists
        if not os.path.exists(src_loc_pfn):
            pilotErrorDiag = "No such file or directory: %s" % (src_loc_pfn)
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            if src_loc_pfn.find("DBRelease") >= 0:
                ec = error.ERR_MISSDBREL
            else:
                ec = error.ERR_NOSUCHFILE
            return ec, pilotErrorDiag

        # make a symbolic link to the input file in the job work dir
        cmd = "ln -s %s %s" % (src_loc_pfn, dest_file)
        tolog("Executing command: %s" % (cmd))
        ec, rv = commands.getstatusoutput(cmd)
        if ec != 0:
            pilotErrorDiag = "Error linking the file: %d, %s" % (ec, rv)
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            return error.ERR_STAGEINFAILED, pilotErrorDiag

        return 0, pilotErrorDiag
Example #33
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""


        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid)

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, sitemover=self) # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # get local adler32 checksum
        status, output, adler_size, adler_checksum = self.getLocalFileInfo(source, checksumType="adler32")
        if status != 0:
            errorLog = 'Failed to get local file %s adler32 checksum: %s' % (source, output)
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            status = PilotErrors.ERR_STAGEINFAILED
            state = "PSTAGE_FAIL"
            output = errorLog
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)

        ret_path = si.getCopyPrefixPathNew(surl, stageIn=False)
        tolog("Convert destination: %s to new path: %s" % (surl, ret_path))
        if not ret_path.startswith("s3:"):
            errorLog = "Failed to use copyprefix to convert the current path to S3 path."
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            status = PilotErrors.ERR_STAGEINFAILED
            state = "PSTAGE_FAIL"
            output = errorLog
            size = None
            checksum = None
        else:
            status, output, size, checksum = self.stageOut(source, ret_path, token, experiment)

        if status !=0:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)
        else:
            if size == adler_size:
                tolog("The file size is not changed. Will check whether adler32 changed.")
                status, output, new_adler_size, new_adler_checksum = self.getLocalFileInfo(source, checksumType="adler32")
                if status != 0:
                    errorLog = 'Failed to get local file %s adler32 checksum: %s' % (source, output)
                    tolog("!!WARNING!!1777!! %s" % (errorLog))
                    status = PilotErrors.ERR_STAGEINFAILED
                    state = "PSTAGE_FAIL"
                    output = errorLog
                    self.prepareReport(state, report)
                    return self.put_data_retfail(status, output, surl)
                else:
                    if adler_checksum == new_adler_checksum:
                        tolog("The file checksum is not changed. Will use adler32 %s to replace the md5 checksum %s" % (adler_checksum, checksum))
                        checksum = adler_checksum
                    else:
                        errorLog = "The file checksum changed from %s(before transfer) to %s(after transfer)" % (adler_checksum, new_adler_checksum)
                        tolog("!!WARNING!!1777!! %s" % (errorLog))
                        status = PilotErrors.ERR_STAGEINFAILED
                        state = "PSTAGE_FAIL"
                        output = errorLog
                        self.prepareReport(state, report)
                        return self.put_data_retfail(status, output, surl)

        state = "DONE"
        self.prepareReport(state, report)
        return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
Example #34
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ Moves the file from the current local directory to a storage element
        source: full path of the file in  local directory
        destination: destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name)
        Assumes that the SE is locally mounted and its local path is the same as the remote path
        if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them
        returns: exitcode, gpfn,fsize, fchecksum
        """

        error = PilotErrors()

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        jobId = pdict.get('jobId', '')
        workDir = pdict.get('workDir', '')
        dsname = pdict.get('dsname', '')
        analyJob = pdict.get('analyJob', False)
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analyJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analyJob = False

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'xrootd', lfn,
                                           guid)

        if self._setup:
            _setup_str = "source %s; " % self._setup
        else:
            _setup_str = ''

        ec, pilotErrorDiag = verifySetupCommand(error, _setup_str)
        if ec != 0:
            self.prepareReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        report['relativeStart'] = time()

        ec = 0
        if fsize == 0 or fchecksum == 0:
            if not self.useExternalAdler32():
                # Can not use external adler32 command for remote file since the command is
                # not available (defaulting to md5sum for put operation)
                tolog(
                    "Command not found: adler32.sh (will switch to md5sum for local file checksum)"
                )
                csumtype = "default"
            else:
                csumtype = "adler32"
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(
                source, csumtype=csumtype)
        if ec != 0:
            self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        tolog("File destination: %s" % (destination))
        dst_se = destination
        # srm://dcsrm.usatlas.bnl.gov:8443/srm/managerv1?SFN=/pnfs/usatlas.bnl.gov/
        if (dst_se.find('SFN') != -1):
            s = dst_se.split('SFN=')
            dst_loc_se = s[1]
            dst_prefix = s[0] + 'SFN='
        else:
            _sentries = dst_se.split('/', 3)
            # 'method://host:port' is it always a ftp server? can it be srm? something else?
            dst_serv = _sentries[0] + '//' + _sentries[2]
            # dst_host = _sentries[2] # host and port
            dst_loc_se = '/' + _sentries[3]
            dst_prefix = dst_serv

        # use bare destination when it starts with root://
        if destination.startswith('root://'):
            dst_loc_se = destination
            dst_prefix = ''

#        report['dataset'] = dsname

# May be be a comma list but take first always
# (Remember that se can be a list where the first is used for output but any can be used for input)
        se = readpar('se').split(",")[0]
        _dummytoken, se = self.extractSE(se)
        tolog("Using SE: %s" % (se))

        filename = os.path.basename(source)

        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analyJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            sitemover=self)  # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # are we transfering to a space token?
        if token != None and token != "":
            # Special case for GROUPDISK (do not remove dst: bit before this stage, needed in several places)
            if "dst:" in token:
                token = token[len('dst:'):]
                tolog("Dropped dst: part of space token descriptor; token=%s" %
                      (token))
                token = "ATLASGROUPDISK"
                tolog("Space token descriptor reset to: %s" % (token))

            # get the proper destination
            #destination = self.getDestination(analyJob, token)

            #if destination == '':
            #    pilotErrorDiag = "put_data destination path in SE not defined"
            #    tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            #    self.prepareReport('SE_DEST_PATH_UNDEF', report)
            #    return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

            #tolog("Going to store job output at destination: %s" % (destination))
            # add the space token to the destination string
            #dst_loc_sedir = os.path.join(destination, os.path.join(extradirs, dsname))
            #dst_loc_pfn = os.path.join(dst_loc_sedir, filename)
            #dst_loc_pfn += "?oss.cgroup=%s" % (token)
            dst_loc_pfn = dst_gpfn + "?oss.cgroup=%s" % (token)
            #else:
            #dst_loc_sedir = os.path.join(dst_loc_se, os.path.join(extradirs, dsname))
            #dst_loc_pfn = os.path.join(dst_loc_sedir, filename)
            dst_loc_pfn = dst_gpfn

        dst_gpfn = dst_prefix + dst_loc_pfn
        tolog("Final destination path: %s" % (dst_loc_pfn))
        tolog("dst_gpfn: %s" % (dst_gpfn))

        # get the Rucio site name from ToA
        try:
            _RSE = self.getRSE(surl=dst_gpfn)
        except Exception, e:
            tolog(
                "Warning: Failed to get RSE: %s (can not add this info to tracing report)"
                % str(e))
Example #35
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the Rucio tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'curl'
            # mark the relative start
            report['catStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-', '')

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(
                source, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        # get the experiment object
        thisExperiment = getExperiment(experiment)

        if proxycheck:
            s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup,
                                                           limit=2)
            if s != 0:
                self.prepareReport('NO_PROXY', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        else:
            tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            sitemover=self)  # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        putfile = surl
        full_surl = putfile
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
        tolog("putfile: %s" % (putfile))
        tolog("full_surl: %s" % (full_surl))

        # get https surl
        full_http_surl = full_surl.replace("srm://", "https://")

        # get the RSE from ToA
        try:
            _RSE = self.getRSE(surl=putfile)
        except Exception, e:
            tolog(
                "Warning: Failed to get RSE: %s (can not add this info to tracing report)"
                % str(e))
Example #36
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ Moves the file from the current local directory to a storage element
        source: full path of the file in  local directory
        destination: destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name)
        Assumes that the SE is locally mounted and its local path is the same as the remote path
        if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them
        returns: exitcode, gpfn,fsize, fchecksum
        """

        error = PilotErrors()

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        jobId = pdict.get('jobId', '')
        workDir = pdict.get('workDir', '')
        dsname = pdict.get('dsname', '')
        analyJob = pdict.get('analyJob', False)
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analyJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analyJob = False

        # get the Rucio tracing report
        report = self.getStubTracingReport(pdict['report'], 'xrootd', lfn, guid)

        if self._setup:
            _setup_str = "source %s; " % self._setup
        else:
            _setup_str = ''

        ec, pilotErrorDiag = verifySetupCommand(error, _setup_str)
        if ec != 0:
            self.prepareReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        report['relativeStart'] = time()

        ec = 0
        if fsize == 0 or fchecksum == 0:
            if not self.useExternalAdler32():
                # Can not use external adler32 command for remote file since the command is
                # not available (defaulting to md5sum for put operation)
                tolog("Command not found: adler32.sh (will switch to md5sum for local file checksum)")
                csumtype = "default"
            else:
                csumtype = "adler32"
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype=csumtype)
        if ec != 0:
            self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        tolog("File destination: %s" % (destination))
        dst_se = destination
        # srm://dcsrm.usatlas.bnl.gov:8443/srm/managerv1?SFN=/pnfs/usatlas.bnl.gov/
        if( dst_se.find('SFN') != -1 ):
            s = dst_se.split('SFN=')
            dst_loc_se = s[1]
            dst_prefix = s[0] + 'SFN='
        else:
            _sentries = dst_se.split('/', 3)
            # 'method://host:port' is it always a ftp server? can it be srm? something else?
            dst_serv = _sentries[0] + '//' + _sentries[2]
            # dst_host = _sentries[2] # host and port
            dst_loc_se = '/'+ _sentries[3]
            dst_prefix = dst_serv

        # use bare destination when it starts with root://
        if destination.startswith('root://'):
            dst_loc_se = destination
            dst_prefix = ''

#        report['dataset'] = dsname

        # May be be a comma list but take first always
        # (Remember that se can be a list where the first is used for output but any can be used for input)
        se = readpar('se').split(",")[0]
        _dummytoken, se = self.extractSE(se)
        tolog("Using SE: %s" % (se))

        filename = os.path.basename(source)

        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analyJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # are we transfering to a space token?
        if token != None and token != "":
            # Special case for GROUPDISK (do not remove dst: bit before this stage, needed in several places)
            if "dst:" in token:
                token = token[len('dst:'):]
                tolog("Dropped dst: part of space token descriptor; token=%s" % (token))
                token = "ATLASGROUPDISK"
                tolog("Space token descriptor reset to: %s" % (token))

            # get the proper destination
            #destination = self.getDestination(analyJob, token)

            #if destination == '':
            #    pilotErrorDiag = "put_data destination path in SE not defined"
            #    tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            #    self.prepareReport('SE_DEST_PATH_UNDEF', report)
            #    return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

            #tolog("Going to store job output at destination: %s" % (destination))
            # add the space token to the destination string
            #dst_loc_sedir = os.path.join(destination, os.path.join(extradirs, dsname))
            #dst_loc_pfn = os.path.join(dst_loc_sedir, filename)
            #dst_loc_pfn += "?oss.cgroup=%s" % (token)
            dst_loc_pfn = dst_gpfn + "?oss.cgroup=%s" % (token)
        #else:
            #dst_loc_sedir = os.path.join(dst_loc_se, os.path.join(extradirs, dsname))
            #dst_loc_pfn = os.path.join(dst_loc_sedir, filename)
            dst_loc_pfn = dst_gpfn

        dst_gpfn = dst_prefix + dst_loc_pfn
        tolog("Final destination path: %s" % (dst_loc_pfn))
        tolog("dst_gpfn: %s" % (dst_gpfn))

        # get the Rucio site name from ToA
        try:
            _RSE = self.getRSE(surl=dst_gpfn)
        except Exception, e:
            tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
Example #37
0
    def put_data(self, pfn, destination, fsize=0, fchecksum=0, dsname='', extradirs='', **pdict):
        """ copy output file from disk to local SE """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        logFile = pdict.get('logFile', '')
        sitename = pdict.get('sitename', '')
        proxycheck = pdict.get('proxycheck', False)
        experiment = pdict.get('experiment', '')
        analysisJob = pdict.get('analJob', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        filename = pfn.split('/')[-1]

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'lcg', lfn, guid)

        # is the dataset defined?
        if dsname == '':
            pilotErrorDiag = "Dataset name not specified to put_data"
            tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            self.prepareReport('DSN_UNDEF', report)
            return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(pfn, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        ec, pilotErrorDiag = verifySetupCommand(error, envsetup)
        if ec != 0:
            self.prepareReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # get the experiment object
        thisExperiment = getExperiment(experiment)

        # do we need to check the user proxy?
        if proxycheck:
            s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2)
            if s != 0:
                self.prepareReport('PROXY_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        else:
            tolog("Proxy verification turned off")

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag, surl=dst_gpfn)

        lfclfn = os.path.join(lfcdir, lfn)
        # LFC LFN = /grid/atlas/dq2/testpanda/testpanda.destDB.dfb45803-1251-43bb-8e7a-6ad2b6f205be_sub01000492/
        #364aeb74-8b62-4c8f-af43-47b447192ced_0.job.log.tgz

        # putfile is the SURL
        putfile = surl
        full_surl = putfile
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
        tolog("putfile = %s" % (putfile))
        tolog("full_surl = %s" % (full_surl))

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=putfile)
        except:
            # WARNING: do not print the exception here since it can sometimes not be converted to a string! (problem seen at Taiwan)
            tolog("Warning: Failed to get the DQ2 site name (can not add this info to tracing report)")
        else:
            report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
            tolog("DQ2 site name: %s" % (_dq2SiteName))

        # get the absolute (full) path to the file
        fppfn = os.path.abspath(pfn)
        tolog("pfn=%s" % (pfn))

        cmd = '%s echo "LFC_HOST=$LFC_HOST"; lfc-mkdir -p %s' % (envsetup, lfcdir)
        # export LFC_HOST=lfc0448.gridpp.rl.ac.uk ; echo "LFC_HOST=$LFC_HOST";
        #lfc-mkdir -p /grid/atlas/dq2/testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647
        tolog("Executing command: %s" % (cmd))
        s, o = commands.getstatusoutput(cmd)
        if s == 0:
            tolog("LFC setup and mkdir succeeded")
            tolog("Command output: %s" % (o))
        else:
            tolog("!!WARNING!!2990!! LFC setup and mkdir failed. Status=%s Output=%s" % (s, o))
            if o == "Could not establish context":
                pilotErrorDiag = "Could not establish context: Proxy / VO extension of proxy has probably expired"
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.dumpExtendedProxy(envsetup)
                self.prepareReport('CONTEXT_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag, surl=full_surl)
            else:
                pilotErrorDiag = "LFC setup and mkdir failed: %s" % (o)
                self.prepareReport('LFC_SETUP_FAIL', report)
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag, surl=full_surl)

        # determine which timeout option to use
        if self.isNewLCGVersion("%s lcg-cr" % (envsetup)):
            timeout_option = "--srm-timeout=%d --connect-timeout=300 --sendreceive-timeout=%d" % (self.timeout, self.timeout)
        else:
            timeout_option = "-t %d" % (self.timeout)

        # used lcg-cr options:
        # --verbose: verbosity on
        #      --vo: specifies the Virtual Organization the user belongs to
        #        -T: specify SRM version
        #        -s: space token description
        #        -b: BDII disabling
        #        -t: time-out
        #        -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC
        #        -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally
        #        -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case,
        #            the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is
        #            generated in the same format as with the Replica Manager
        if token:
            # Special case for GROUPDISK (do not remove dst: bit before this stage, needed in several places)
            if "dst:" in token:
                token = token[len('dst:'):]
                tolog("Dropped dst: part of space token descriptor; token=%s" % (token))
                token = "ATLASGROUPDISK"
                tolog("Space token descriptor reset to: %s" % (token))

            surl = putfile[putfile.index('srm://'):]
            _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas -T srmv2 -s %s -b %s -l %s -g %s -d %s file:%s' % (envsetup, token, timeout_option, lfclfn, guid, surl, fppfn)
        else:
            surl = putfile
            _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas %s -l %s -g %s -d %s file:%s' % (envsetup, timeout_option, lfclfn, guid, surl, fppfn)

        # GoeGrid testing: _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-crXXX --verbose --vo atlas %s -l %s -g %s -d %s file:%s' % (envsetup, timeout_option, lfclfn, guid, surl, fppfn)

        tolog("Executing command: %s" % (_cmd_str))
        s = -1
        t0 = os.times()
        report['relativeStart'] = time()
        report['transferStart'] =  time()
        try:
            s, o = commands.getstatusoutput(_cmd_str)
        except Exception, e:
            tolog("!!WARNING!!2990!! Exception caught: %s" % (str(e)))
            o = str(e)
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        proxycheck = pdict.get('proxycheck', False)
        experiment = pdict.get('experiment', '')
        analysisJob = pdict.get('analJob', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'curl'
            # mark the relative start
            report['catStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-', '')

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(
                source, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        #if proxycheck:
        #    s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2)
        #    if s != 0:
        #        self.prepareReport('NO_PROXY', report)
        #        return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        #else:
        #    tolog("Proxy verification turned off")
        tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            sitemover=self)  # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)
#here begins the new magic... from Vincenzo Lavorini
        sitemover = SiteMover.SiteMover()
        v_path = sitemover.getPathFromScope(scope, filename)
        rucio_c = Client()
        if "ATLAS" in token:
            token_ok = token[+5:]
        else:
            token_ok = token
        local_se_token = self.site_name + "_" + token_ok
        v_hostname = [
            j['hostname'] for j in rucio_c.get_protocols(local_se_token)
        ]
        v_port = [j['port'] for j in rucio_c.get_protocols(local_se_token)]
        v_prefix = [j['prefix'] for j in rucio_c.get_protocols(local_se_token)]
        v_address = "https://%s:%s%s" % (v_hostname[0], v_port[0], v_prefix[0])
        tolog("prova1 address is %s" % (v_address))
        if "rucio/" in v_address and "/rucio" in v_path:
            v_address = v_address[:-7]
            tolog("prova2 address is %s" % (v_address))
        elif "rucio" in v_address and "rucio" in v_path:
            v_address = v_address[:-6]
            tolog("prova3 address is %s" % (v_address))
        full_http_surl = v_address + v_path
        tolog("prova3 full_http__surl is %s" % (full_http_surl))

        full_surl = surl
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz

#putfile=surl
#tolog("putfile: %s" % (putfile))
#tolog("full_surl: %s" % (full_surl))

# get https surl
#full_http_surl = full_surl.replace("srm://", "https://")

# get the DQ2 site name from ToA ---why? Is it needed?
#try:
#    _dq2SiteName = self.getDQ2SiteName(surl=putfile)
#except Exception, e:
#    tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
#else:
#    report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
#    tolog("DQ2 site name: %s" % (_dq2SiteName))

        if testLevel == "1":
            source = "thisisjustatest"

        # determine which timeout option to use
        #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout)
        timeout_option = "--connect-timeout 300"

        sslCert = self.sslCert
        sslKey = self.sslKey
        sslCertDir = self.sslCertDir

        # check htcopy if it is existed or env is set properly
        #_cmd_str = 'which htcopy'
        #try:
        #    s, o = commands.getstatusoutput(_cmd_str)
        #except Exception, e:
        #    tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o))
        #    o = str(e)

        #if s != 0:
        #    tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str))
        #    o = o.replace('\n', ' ')
        #    tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o)))
        #return 999999

        # cleanup the SURL if necessary (remove port and srm substring)
        #if token:
        # used lcg-cp options:
        # --srcsetype: specify SRM version
        #   --verbose: verbosity on
        #        --vo: specifies the Virtual Organization the user belongs to
        #          -s: space token description
        #          -b: BDII disabling
        #          -t: time-out
        # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC
        #          -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally
        #          -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case,
        #              the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is
        #              generated in the same format as with the Replica Manager
        # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\
        #           (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn)
        # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file]
        #               [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc]
        #               [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2]
        #               [-b,--nobdii] [-t timeout] [-v,--verbose]  [-V,--vo vo] [--version] src_file  dest_file

        # surl = putfile[putfile.index('srm://'):]
        #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token)
        #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl)
        #else:
        # surl is the same as putfile
        #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl)
        #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl)
        _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % (
            self.sslKey, self.sslKey, self.sslKey, self.sslCertDir,
            full_http_surl, source)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        _cmd = Popen(_cmd_str, stdout=PIPE, stderr=PIPE, shell=True)
        _cmd_out, _cmd_stderr = _cmd.communicate()
        report['relativeStart'] = time()
        report['transferStart'] = time()
        report['validateStart'] = time()
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Curl command output = %s" % (_cmd_out))
        tolog("Command finished after %f s" % (t))
        if "bytes uploaded" not in _cmd_out:
            tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str))
        '''
            # check if file was partially transferred, if so, remove it
            _ec = self.removeFile(envsetup, self.timeout, dst_gpfn)
            if _ec == -2:
                pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out

            if "Could not establish context" in o:
                pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired"
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('CONTEXT_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
            elif "No such file or directory" in o:
                pilotErrorDiag += "No such file or directory: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('NO_FILE_DIR', report)
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            elif "globus_xio: System error" in o:
                pilotErrorDiag += "Globus system error: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('GLOBUS_FAIL', report)
                return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag)
            else:
                if len(o) == 0 and t >= self.timeout:
                    pilotErrorDiag += "Copy command self timed out after %d s" % (t)
                    tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                    self.prepareReport('CP_TIMEOUT', report)
                    return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag)
                else:
                    if len(o) == 0:
                        pilotErrorDiag += "Copy command returned error code %d but no output" % (ec)
                    else:
                        pilotErrorDiag += o
                    self.prepareReport('CP_ERROR', report)
                    return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
	'''
        verified = False
        #getting the remote checksum from Rucio:
        token_file = open('token_fle', 'r')
        token_rucio = token_file.readline()
        pos2print = token_rucio.find("CN")
        token_rucio2print = token_rucio[:pos2print] + '(Hidden token)'
        tolog("Token I am using: %s" % (token_rucio2print))
        httpredirector = readpar('httpredirector')

        trial_n = 1
        remote_checksum = "none"
        while (remote_checksum == "none" and trial_n < 8):
            trial_n += 1
            if not httpredirector:
                #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename)
                cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % (
                    token_rucio, scope, filename)
                cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % (
                    token_rucio2print, scope, filename)
            else:
                if "http" in httpredirector:
                    tolog("HTTP redirector I am using: %s" % (httpredirector))
                    cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % (
                        token_rucio, httpredirector, scope, filename)
                    cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % (
                        token_rucioi2print, httpredirector, scope, filename)
                else:
                    tolog("HTTP redirector I am using: %s" % (httpredirector))
                    cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % (
                        token_rucio, httpredirector, reps[0].scope,
                        reps[0].filename)
                    cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % (
                        token_rucio2print, httpredirector, reps[0].scope,
                        reps[0].filename)

            tolog("Getting remote checksum: command to be executed: %s" %
                  (cmd2print))
            checksum_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
            remote_checksum, stderr = checksum_cmd.communicate()
            tolog("Remote checksum as given by rucio %s" % (remote_checksum))
            if (remote_checksum == "none"):
                tolog("In checking checksum: command std error: %s" % (stderr))
                pilotErrorDiag = "Cannot get the checksum of file on SE"
                tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag))
                tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n))
                time.sleep(3)
                # try to get the remote checksum with lcg-get-checksum
                #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl)
                #if not remote_checksum:
                #    # try to grab the remote file info using lcg-ls command
                #    remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl)
                #else:
                #    tolog("Setting remote file size to None (not needed)")
                #    remote_fsize = None

        # compare the checksums if the remote checksum was extracted
        tolog("Remote checksum: %s" % str(remote_checksum))
        tolog("Local checksum: %s" % (fchecksum))

        if remote_checksum:
            if remote_checksum != fchecksum:
                pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                 (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum)
                tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag))
                if csumtype == "adler32":
                    self.prepareReport('AD_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTADMISMATCH,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
                else:
                    self.prepareReport('MD5_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTMD5MISMATCH,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
            else:
                tolog("Remote and local checksums verified")
                verified = True
        else:
            tolog(
                "Skipped primary checksum verification (remote checksum not known)"
            )

        # if lcg-ls could not be used
        if "/pnfs/" in surl and not remote_checksum:
            # for dCache systems we can test the checksum with the use method
            tolog(
                "Detected dCache system: will verify local checksum with the local SE checksum"
            )
            # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....
            path = surl[surl.find('/pnfs/'):]
            # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....#
            tolog("File path: %s" % (path))

            _filename = os.path.basename(path)
            _dir = os.path.dirname(path)

            # get the remote checksum
            tolog("Local checksum: %s" % (fchecksum))
            try:
                remote_checksum = self.getdCacheChecksum(_dir, _filename)
            except Exception, e:
                pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str(
                    e)
                tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            else:
                if remote_checksum == "NOSUCHFILE":
                    pilotErrorDiag = "The pilot will fail the job since the remote file does not exist"
                    tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
                    self.prepareReport('NOSUCHFILE', report)
                    return self.put_data_retfail(error.ERR_NOSUCHFILE,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
                elif remote_checksum:
                    tolog("Remote checksum: %s" % (remote_checksum))
                else:
                    tolog("Could not get remote checksum")

            if remote_checksum:
                if remote_checksum != fchecksum:
                    pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                     (csumtype, _filename, remote_checksum, fchecksum)
                    if csumtype == "adler32":
                        self.prepareReport('AD_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTADMISMATCH,
                                                     pilotErrorDiag,
                                                     surl=full_surl)
                    else:
                        self.prepareReport('MD5_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTMD5MISMATCH,
                                                     pilotErrorDiag,
                                                     surl=full_surl)
                else:
                    tolog("Remote and local checksums verified")
                    verified = True
Example #39
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'lcg2', lfn, guid)

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(
                source, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        if alt:
            # use a cvmfs setup for stage-out to alternative SE
            envsetup = si.getLocalEMISetup()
            if envsetup[-1] != ";":
                envsetup += "; "
        else:
            envsetup = self.getEnvsetup(alt=alt)

        ec, pilotErrorDiag = verifySetupCommand(error, envsetup)
        if ec != 0:
            self.prepareReport('RFCP_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # get the experiment object
        thisExperiment = getExperiment(experiment)

        if proxycheck:
            s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup,
                                                           limit=2)
            if s != 0:
                self.prepareReport('NO_PROXY', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        else:
            tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            alt=alt)
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag, surl=dst_gpfn)

        putfile = surl
        full_surl = putfile
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
        tolog("putfile = %s" % (putfile))
        tolog("full_surl = %s" % (full_surl))

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=putfile)
        except Exception, e:
            tolog(
                "Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)"
                % str(e))
Example #40
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog("Treating PanDA Mover job as a production job during stage-out")
            analysisJob = False

        # get the Rucio tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'curl'
            # mark the relative start
            report['catStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-','')

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        # get the experiment object
        thisExperiment = getExperiment(experiment)

        if proxycheck:
            s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2)
            if s != 0:
                self.prepareReport('NO_PROXY', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        else:
            tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        putfile = surl
        full_surl = putfile
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz
        tolog("putfile: %s" % (putfile))
        tolog("full_surl: %s" % (full_surl))

        # get https surl
        full_http_surl = full_surl.replace("srm://", "https://")

        # get the RSE from ToA
        try:
            _RSE = self.getRSE(surl=putfile)
        except Exception, e:
            tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
Example #41
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """
        Moves the file from the current local directory to a storage element
        source: full path of the file in  local directory
        destinaion: destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name)
        Assumes that the SE is locally mounted and its local path is the same as the remote path
        if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them
        returns: exitcode, gpfn,fsize, fchecksum
        """

        error = PilotErrors()

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        analyJob = pdict.get('analJob', False)
        dsname = pdict.get('dsname', '')
        sitename = pdict.get('sitename', '')
        cmtconfig = pdict.get('cmtconfig', '')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'xrdcp', lfn, guid)

        # get a proper setup
        _setup_str = self.getSetup()

#        if "CERN" in sitename:
#            _setup_str = "source /afs/cern.ch/project/xrootd/software/setup_stable_for_atlas.sh;"

        # PN, for now
        #_setup_str = ""
        tolog("xrdcpSiteMover put_data using setup: %s" % (_setup_str))

        lfcpath, pilotErrorDiag = self.getLFCPath(analyJob)
        if lfcpath == "":
            self.__sendReport('STAGEOUT_FAIL', report)
            return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

        ec = 0
        # get the file size and checksum of the local file
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32")
        if ec != 0:
            self.__sendReport('LOCAL_FILE_INFO_FAIL', report)
            return self.put_data_retfail(ec, pilotErrorDiag)
        tolog("Local checksum: %s, local file size: %s" % (fchecksum, str(fsize)))

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get all the proper paths
        filename = os.path.basename(source)
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analyJob, token, prodSourceLabel, dsname, filename)
        if ec != 0:
            self.__sendReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # correct the surl since it might contain the space token and the port info at the beginning
        surl = self.stripListSEs([surl])[0]

        tolog("dst_gpfn: %s" % (dst_gpfn))
        tolog("surl    : %s" % (surl))
        bare_dst_gpfn = dst_gpfn # ie starts with /.. (url and port will be added later, not good for rfmkdir eg)
        dst_loc_pfn = dst_gpfn
        dst_gpfn = surl

        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=dst_gpfn)
        except Exception, e:
            tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
    def setup(self, experiment=None, surl=None, os_bucket_id=-1, label='r'):
        """ setup env """
        if not self.__isBotoLoaded:
            try:
                import boto
                import boto.s3.connection
                from boto.s3.key import Key
                self.__isBotoLoaded = True
            except ImportError:
                tolog(
                    "Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path"
                )
                sys.path.append(
                    '/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/'
                )
                try:
                    import boto
                    import boto.s3.connection
                    from boto.s3.key import Key
                    self.__isBotoLoaded = True
                except ImportError:
                    tolog("Failed to import boto again. exit")
                    return PilotErrors.ERR_UNKNOWN, "Failed to import boto"

        hostname = None
        try:
            hostname = socket.getfqdn()
        except:
            tolog(traceback.format_exc())
        if os.environ.get("http_proxy") and hostname and hostname.endswith(
                "bnl.gov"):
            del os.environ['http_proxy']
        if os.environ.get("https_proxy") and hostname and hostname.endswith(
                "bnl.gov"):
            del os.environ['https_proxy']

        si = getSiteInformation(experiment)
        # os_bucket_id will only be set if the setup function is called, if setup via the init function - get the default bucket id
        if os_bucket_id == -1:
            ddmendpoint = si.getObjectstoreDDMEndpoint(
                os_bucket_name='eventservice')  # assume eventservice
        else:
            ddmendpoint = si.getObjectstoreDDMEndpointFromBucketID(
                os_bucket_id)
        endpoint_id = si.getObjectstoreEndpointID(ddmendpoint=ddmendpoint,
                                                  label=label,
                                                  protocol='s3')
        os_access_key, os_secret_key, os_is_secure = si.getObjectstoreKeyInfo(
            endpoint_id, ddmendpoint=ddmendpoint)

        if os_access_key and os_access_key != "" and os_secret_key and os_secret_key != "":
            keyPair = si.getSecurityKey(os_secret_key, os_access_key)
            if "privateKey" not in keyPair or keyPair["privateKey"] is None:
                tolog("Failed to get the keyPair for S3 objectstore")
                return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore"
        else:
            tolog("Failed to get the keyPair name for S3 objectstore")
            return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair name for S3 objectstore"

        self.s3Objectstore = S3ObjctStore(keyPair["privateKey"],
                                          keyPair["publicKey"], os_is_secure,
                                          self._useTimerCommand)

        #        keyPair = None
        #        if re.search("^s3://.*\.usatlas\.bnl\.gov:8443", surl) != None:
        #            keyPair = si.getSecurityKey('BNL_ObjectStoreKey', 'BNL_ObjectStoreKey.pub')
        #        if re.search("^s3://.*\.cern\.ch:443", surl) != None:
        #            keyPair = si.getSecurityKey('CERN_ObjectStoreKey', 'CERN_ObjectStoreKey.pub')
        #        if surl.startswith("s3://s3.amazonaws.com:80"):
        #            keyPair = si.getSecurityKey('Amazon_ObjectStoreKey', 'Amazon_ObjectStoreKey.pub')
        #        if keyPair == None or keyPair["publicKey"] == None or keyPair["privateKey"] == None:
        #            tolog("Failed to get the keyPair for S3 objectstore %s " % (surl))
        #            return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore"
        #
        #        self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"])
        return 0, ""
Example #43
0
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """
        Move the file from the current local directory to the local pilot init dir

        Parameters are:
        source -- full path of the file in  local directory
        destinaion -- destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) NOT USED (pinitdir is used instead)
        fsize -- file size of the source file (evaluated if 0)
        fchecksum -- MD5 checksum of the source file (evaluated if 0)
        pdict -- to allow additional parameters that may make sense with specific movers
        
        Assume that the pilot init dir is locally mounted and its local path is the same as the remote path
        if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them
        returns: exitcode, pilotErrorDiag, gpfn, fsize, fchecksum
        """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        DN = pdict.get('DN', '')
        dsname = pdict.get('dsname', '')
        analJob = pdict.get('analJob', False)
        sitename = pdict.get('sitename', '')
        testLevel = pdict.get('testLevel', '0')
        pilot_initdir = pdict.get('pinitdir', '')
        experiment = pdict.get('experiment', '')
        token = pdict.get('token', '')
        prodSourceLabel = pdict.get('prodSourceLabel', '')
        dsname = pdict.get('dsname', '')
        scope = pdict.get('scope', '')
        alt = pdict.get('alt', False)
        jobId = pdict.get('jobId', '')
        
        tolog("jobId: %s" % jobId)

        # get the site information object
        si = getSiteInformation(experiment)
        
        outputDir = pdict.get('outputDir', '')
#         # are we on a tier 3?
#         if si.isTier3():
#             outputDir = self.getTier3Path(dsname, DN)
#             tolog("Writing output on a Tier 3 site to: %s" % (outputDir))
# 
#             # create the dirs if they don't exist
#             try:
#                 self.mkdirWperm(outputDir)
#             except Exception, e:
#                 tolog("!!WARNING!!2999!! Could not create dir: %s, %s" % (outputDir, str(e)))
#         else:
#             outputDir = pdict.get('outputDir', '')
        if outputDir == "":
            tolog("Put function will use pilot launch dir as output file dir: %s" % (pilot_initdir))
            outputDir = pilot_initdir
        else:
            if not os.path.isdir(outputDir):
                pilotErrorDiag = "Output directory does not exist: %s" % (outputDir)
                tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            else:
                tolog("Put function will use requested output file dir: %s" % (outputDir))

        if outputDir == "":
            pilotErrorDiag = "Pilot init dir not set (can not figure out where the output files should be moved to)"
            tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag))
            return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)

        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32")
            if ec != 0:
                return self.put_data_retfail(ec, pilotErrorDiag)

        dst_loc_sedir = outputDir
        filename = os.path.basename(source)
        dst_loc_pfn = os.path.join(dst_loc_sedir, filename)
        
        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, jobId=jobId, jobPars=self.jobPars, sitemover=self) # quick workaround
        if ec != 0:
            reportState = {}
            reportState["clientState"] = tracer_error
            self.prepareReport(reportState, report)
            return self.put_data_retfail(ec, pilotErrorDiag)
        tolog('dst_gpfn: %s' % dst_gpfn)
        tolog('lfcdir: %s' % lfcdir)
        dst_loc_sedir = lfcdir
        dst_loc_pfn = dst_gpfn
        
        # for CERNVM, use dst_loc_sedir as a starting point for creating a directory structure
        if sitename == "CERNVM":
            # NOTE: LFC registration is not done here but some of the LFC variables are used to find out
            # the disk path so the code has to be partially repeated here

            lfcpath, pilotErrorDiag = self.getLFCPath(analJob)
            if lfcpath == "":
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            else:
                tolog("Got LFC path: %s" % (lfcpath))

            dst_loc_sedir, _dummy = self.getLCGPaths(outputDir, dsname, filename, lfcpath)
            tolog("Got LCG paths: %s" % (dst_loc_sedir))

            # create the sub directories
            try:
                self.mkdirWperm(dst_loc_sedir)
            except Exception, e:
                pilotErrorDiag = "Could not create dir: %s, %s" % (dst_loc_sedir, e)
                tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            else:
                tolog("Successfully created sub-directories: %s" % (dst_loc_sedir))
    def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None):
        """
        Update the job status with the jobdispatcher web server.
        State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp)
        log = log extracts
        xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function)
        jr = job recovery mode
        """
    
        tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState()))

        # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately
        # removes any work directory after the LSF job finishes which of course makes job recovery impossible)
        if not self.__jobrec:
            if job.result[0] == 'holding' and site.sitename != "CERNVM":
                job.result[0] = 'failed'
                tolog("This site does not support job recovery: HOLDING state reset to FAILED")

        # note: any changed job state above will be lost for fake server updates, does it matter?

        # get the node structure expected by the server
        node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log)

        # skip the server update (e.g. on NG)
        if not self.__updateServer:
            tolog("(fake server update)")
            return 0, node

        tolog("xmlstr = %s" % (xmlstr))

        # get the xml
        node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr)

        # stdout tail in case job.debug == 'true'
        if job.debug.lower() == "true" and stdout_tail != "":
            # protection for potentially large tails
            stdout_tail = stdout_tail[-2048:]
            node['stdout'] = stdout_tail
            tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail)))

            # also send the full stdout to a text indexer if required
            if stdout_path != "":
                if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path):
                    tolog("Will send payload stdout to text indexer")

                    # get the user name, which we will use to create a proper filename
                    from SiteMover import SiteMover
                    s = SiteMover()
                    username = s.extractUsername(job.prodUserID)

                    # get setup path for xrdcp
                    try:
                        si = getSiteInformation(job.experiment)
                        setup_path = si.getLocalROOTSetup()

                        filename = "PanDA_payload_stdout-%s.txt" % (job.jobId)
                        dateDirs = self.getDateDirs()
                        remotePath = os.path.join(os.path.join(username, dateDirs), filename)
                        url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath)
                        cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url)
                        tolog("Executing command: %s" % (cmd))
                        rc, rs = getstatusoutput(cmd)
                        tolog("rc=%d, rs=%s" % (rc, rs))
                    except Exception, e:
                        tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e))
            else:
                tolog("stdout_path not set")
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        alt = pdict.get('alt', False)
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        analysisJob = pdict.get('analJob', False)
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        experiment = pdict.get('experiment', '')
        proxycheck = pdict.get('proxycheck', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn,
                                           guid)

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            alt=alt,
            sitemover=self)  # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)

        # get local adler32 checksum
        status, output, adler_size, adler_checksum = self.getLocalFileInfo(
            source, checksumType="adler32")
        if status != 0:
            errorLog = 'Failed to get local file %s adler32 checksum: %s' % (
                source, output)
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            status = PilotErrors.ERR_STAGEINFAILED
            state = "PSTAGE_FAIL"
            output = errorLog
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)

        ret_path = si.getCopyPrefixPathNew(surl, stageIn=False)
        tolog("Convert destination: %s to new path: %s" % (surl, ret_path))
        if not ret_path.startswith("s3:"):
            errorLog = "Failed to use copyprefix to convert the current path to S3 path."
            tolog("!!WARNING!!1777!! %s" % (errorLog))
            status = PilotErrors.ERR_STAGEINFAILED
            state = "PSTAGE_FAIL"
            output = errorLog
            size = None
            checksum = None
        else:
            status, output, size, checksum = self.stageOut(
                source, ret_path, token, experiment)

        if status != 0:
            errors = PilotErrors()
            state = errors.getErrorName(status)
            if state == None:
                state = "PSTAGE_FAIL"
            self.prepareReport(state, report)
            return self.put_data_retfail(status, output, surl)
        else:
            if size == adler_size:
                tolog(
                    "The file size is not changed. Will check whether adler32 changed."
                )
                status, output, new_adler_size, new_adler_checksum = self.getLocalFileInfo(
                    source, checksumType="adler32")
                if status != 0:
                    errorLog = 'Failed to get local file %s adler32 checksum: %s' % (
                        source, output)
                    tolog("!!WARNING!!1777!! %s" % (errorLog))
                    status = PilotErrors.ERR_STAGEINFAILED
                    state = "PSTAGE_FAIL"
                    output = errorLog
                    self.prepareReport(state, report)
                    return self.put_data_retfail(status, output, surl)
                else:
                    if adler_checksum == new_adler_checksum:
                        tolog(
                            "The file checksum is not changed. Will use adler32 %s to replace the md5 checksum %s"
                            % (adler_checksum, checksum))
                        checksum = adler_checksum
                    else:
                        errorLog = "The file checksum changed from %s(before transfer) to %s(after transfer)" % (
                            adler_checksum, new_adler_checksum)
                        tolog("!!WARNING!!1777!! %s" % (errorLog))
                        status = PilotErrors.ERR_STAGEINFAILED
                        state = "PSTAGE_FAIL"
                        output = errorLog
                        self.prepareReport(state, report)
                        return self.put_data_retfail(status, output, surl)

        state = "DONE"
        self.prepareReport(state, report)
        return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
Example #46
0
    def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None):
        """
        Update the job status with the jobdispatcher web server.
        State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp)
        log = log extracts
        xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function)
        jr = job recovery mode
        """
    
        tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState()))

        # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately
        # removes any work directory after the LSF job finishes which of course makes job recovery impossible)
        if not self.__jobrec:
            if job.result[0] == 'holding' and site.sitename != "CERNVM":
                job.result[0] = 'failed'
                tolog("This site does not support job recovery: HOLDING state reset to FAILED")

        # note: any changed job state above will be lost for fake server updates, does it matter?

        # get the node structure expected by the server
        node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log)

        # skip the server update (e.g. on NG)
        if not self.__updateServer:
            tolog("(fake server update)")
            return 0, node

        tolog("xmlstr = %s" % (xmlstr))

        # get the xml
        node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr)

        # stdout tail in case job.debug == 'true'
        if job.debug.lower() == "true" and stdout_tail != "":
            # protection for potentially large tails
            stdout_tail = stdout_tail[-2048:]
            node['stdout'] = stdout_tail
            tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail)))

            # also send the full stdout to a text indexer if required
            if stdout_path != "":
                if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path):
                    tolog("Will send payload stdout to text indexer")

                    # get the user name, which we will use to create a proper filename
                    from SiteMover import SiteMover
                    s = SiteMover()
                    username = s.extractUsername(job.prodUserID)

                    # get setup path for xrdcp
                    try:
                        si = getSiteInformation(job.experiment)
                        setup_path = si.getLocalROOTSetup()

                        filename = "PanDA_payload_stdout-%s.txt" % (job.jobId)
                        dateDirs = self.getDateDirs()
                        remotePath = os.path.join(os.path.join(username, dateDirs), filename)
                        url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath)
                        cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url)
                        tolog("Executing command: %s" % (cmd))
                        rc, rs = getstatusoutput(cmd)
                        tolog("rc=%d, rs=%s" % (rc, rs))
                    except Exception, e:
                        tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e))
            else:
                tolog("stdout_path not set")
Example #47
0
    def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict):
        """
        Moves a DS file from a remote SE to the working directory.
        Performs the copy and, for systems supporting it, checks size and md5sum correctness
        gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename) IGNORED HERE, will use dq-list-files to get it 
        path: destination absolute path (in a local file system)
        returns the status of the transfer. In case of failure it should remove the partially copied destination
        """

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        guid = pdict.get("guid", "")
        useCT = pdict.get("usect", True)
        jobId = pdict.get("jobId", "")
        dsname = pdict.get("dsname", "")
        workDir = pdict.get("workDir", "")
        experiment = pdict.get("experiment", "")
        prodDBlockToken = pdict.get("access", "")

        # get the site information object
        tolog("get_data: experiment=%s" % (experiment))
        si = getSiteInformation(experiment)

        # get the DQ2 tracing report
        report = self.getStubTracingReport(pdict["report"], "fax", lfn, guid)

        src_loc_filename = lfn  # os.path.basename(src_loc_pfn)
        # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename
        # dest vars: path

        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # should the root file be copied or read directly by athena? (note: this section is necessary in case FAX is used as primary site mover)
        directIn = self.checkForDirectAccess(lfn, useCT, workDir, jobId, prodDBlockToken)
        if directIn:
            report["relativeStart"] = None
            report["transferStart"] = None
            self.__sendReport("FOUND_ROOT", report)
            return error.ERR_DIRECTIOFILE, pilotErrorDiag

        # local destination path
        dest_file = os.path.join(path, src_loc_filename)

        # the initial gpfn is ignored since the pilot will get it from the global redirector
        # however, the lfn can differ e.g. for files the has the __DQ2-* bit in it. In that case
        # the global redirector will not give the correct name, and the pilot need to correct for it
        # so better to use the lfn taken from the initial gpfn right away
        # warning: tests at CERN has shown that this is not true. the global redirector will not find a file with __DQ2- in it
        initial_lfn = os.path.basename(gpfn)
        tolog("Initial LFN=%s" % (initial_lfn))

        # get the global path
        #        if gpfn != "":
        #            tolog("Ignoring initial GPFN since pilot will get it using the global redirector (%s)" % (gpfn))
        gpfn = self.findGlobalFilePath(src_loc_filename, dsname)
        if gpfn == "":
            ec = error.ERR_STAGEINFAILED
            pilotErrorDiag = "Failed to get global paths for FAX transfer"
            tolog("!!WARNING!!3330!! %s" % (pilotErrorDiag))
            self.__sendReport("RFCP_FAIL", report)
            return ec, pilotErrorDiag

        tolog("GPFN=%s" % (gpfn))
        global_lfn = os.path.basename(gpfn)
        if global_lfn != initial_lfn:
            #            tolog("WARNING: Global LFN not the same as the initial LFN. Will try to use the initial LFN")
            tolog("WARNING: Global LFN not the same as the initial LFN. Will use the global LFN")
        #            gpfn = gpfn.replace(global_lfn, initial_lfn)
        #            tolog("Updated GPFN=%s" % (gpfn))

        # setup ROOT locally
        _setup_str = self.getLocalROOTSetup()

        # define the copy command
        cmd = "%s xrdcp -d 1 -f %s %s" % (_setup_str, gpfn, dest_file)

        # transfer the file
        report["transferStart"] = time()
        rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True)
        report["validateStart"] = time()
        if rc != 0:
            self.__sendReport("COPY_FAIL", report)

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            return rc, pilotErrorDiag
        else:
            tolog("Successfully transferred file")

        # get file size from the command output if not known already
        if fsize == 0:
            fsize = self.getFileSize(rs)

        # get checksum from the command output if not known already
        if fchecksum == 0:
            fchecksum = self.getChecksum(rs)
        else:
            if fchecksum == 0 or fchecksum == None:
                fchecksum = ""
            else:
                tolog("fchecksum = %s" % (fchecksum))

        # get destination (local) file size and checksum
        ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype)
        tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum))
        if ec != 0:
            self.__sendReport("LOCAL_FILE_INFO_FAIL", report)

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            return ec, pilotErrorDiag

        # compare remote and local file checksum
        if fchecksum != "" and fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum):
            pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % (
                csumtype,
                os.path.basename(gpfn),
                fchecksum,
                dstfchecksum,
            )
            tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            if csumtype == "adler32":
                self.__sendReport("AD_MISMATCH", report)
                return error.ERR_GETADMISMATCH, pilotErrorDiag
            else:
                self.__sendReport("MD5_MISMATCH", report)
                return error.ERR_GETMD5MISMATCH, pilotErrorDiag

        # compare remote and local file size (skip test if remote/source file size is not known)
        if dstfsize != fsize and fsize != 0 and fsize != "":
            pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" % (
                os.path.basename(gpfn),
                str(dstfsize),
                str(fsize),
            )
            tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag))
            self.__sendReport("FS_MISMATCH", report)

            # remove the local file before any get retry is attempted
            _status = self.removeLocal(dest_file)
            if not _status:
                tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail")

            return error.ERR_GETWRONGSIZE, pilotErrorDiag

        updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input")
        self.__sendReport("DONE", report)
        return 0, pilotErrorDiag