예제 #1
0
    def stageIn(self, source, destination, fspec):
        """
        Override stageIn rather than stageInFile since most of stageIn is
        unnecessary.
        Make a link from the downloaded file to the pilot working directory.

        :param source:      original (remote) file location - not used
        :param destination: where to create the link
        :param fspec:  dictionary containing destination replicas, scope, lfn
        :return:       destination file details (checksumtype, checksum, size)
        """

        src = os.path.join(self.init_dir, fspec.lfn)
        self.log('Creating link from %s to %s' % (fspec.lfn, src))
        try:
            os.symlink(src, fspec.lfn)
        except OSError as e:
            raise PilotException('stageIn failed: %s' % str(e))

        if not os.path.exists(fspec.lfn):
            raise PilotException('stageIn failed: symlink points to non-existent file')

        self.log('Symlink successful')
        checksum, checksum_type = fspec.get_checksum()
        return {'checksum_type': checksum_type,
                'checksum': checksum,
                'filesize': fspec.filesize}
예제 #2
0
파일: base.py 프로젝트: PalNilsson/pilot
    def check_availablespace(self, maxinputsize, files):
        """
            Verify that enough local space is available to stage in and run the job
        """

        if not self.shouldVerifyStageIn():
            return

        totalsize = reduce(lambda x, y: x + y.filesize, files, 0)

        # verify total filesize
        if maxinputsize and totalsize > maxinputsize:
            error = "Too many/too large input files (%s). Total file size=%s B > maxinputsize=%s B" % (
                len(files), totalsize, maxinputsize)
            raise PilotException(error, code=PilotErrors.ERR_SIZETOOLARGE)

        self.log(
            "Total input file size=%s B within allowed limit=%s B (zero value means unlimited)"
            % (totalsize, maxinputsize))

        # get available space
        wn = Node()
        wn.collectWNInfo(self.workDir)

        available_space = int(wn.disk) * 1024**2  # convert from MB to B

        self.log("Locally available space: %d B" % available_space)

        # are we wihin the limit?
        if totalsize > available_space:
            error = "Not enough local space for staging input files and run the job (need %d B, but only have %d B)" % (
                totalsize, available_space)
            raise PilotException(error, code=PilotErrors.ERR_NOLOCALSPACE)
예제 #3
0
    def _getPresignedUrl(self, pandaProxyURL, jobId, osPrivateKey, osPublicKey, pandaProxySecretKey, s3URL, stageIn=False):
        try:
            if not pandaProxySecretKey or pandaProxySecretKey == "":
                raise PilotException("Panda proxy secret key is not set for panda proxy operations")

            data = {'pandaID': jobId,
                    'secretKey':'%s' % pandaProxySecretKey,
                    'publicKey': 'publicKey:%s' % osPublicKey,
                    'privateKey': 'privateKey:%s' % osPrivateKey,
                    'url':'%s' % s3URL}
            if stageIn:
                data['method'] = 'GET'


            requestedURL = pandaProxyURL+'/getPresignedURL'
            self.log("agb: get presinged url: requested url='%s',  data=%s" % (requestedURL, data) )

            res = requests.post(requestedURL, data=data)
            self.log("result=%s" % res)
            self.log("res.text.encode('ascii') = %s" % res.text.encode('ascii'))
            if res.status_code == 200:
                tmpDict = cgi.parse_qs(res.text.encode('ascii'))
                if int(tmpDict['StatusCode'][0]) == 0:
                    return tmpDict['presignedURL'][0]
                else:
                    raise PilotException( "get remote path presigned url from panda proxy error %s: %s" % (tmpDict['StatusCode'][0], tmpDict['ErrorMsg'][0]) )
            raise PilotException( "failed to get remote path presigned url from panda proxy, status code:  %s" % res.status_code)
        except Exception as e:
            raise PilotException( "failure when get presigned url from panda proxy: %s" % str(e))
예제 #4
0
    def stageIn(self, turl, dst, fspec):
        """
        Use the rucio download command to stage in the file.

        :param turl:  overrides parent signature -- unused
        :param dst:   overrides parent signature -- unused
        :param fspec: dictionary containing destination replicas, scope, lfn
        :return:      destination file details (ddmendpoint, surl, pfn)
        """

        if fspec.replicas:
            if not fspec.allowAllInputRSEs:
                cmd = 'rucio download --dir %s --rse %s %s:%s' % (
                    dirname(dst), fspec.replicas[0][0], fspec.scope, fspec.lfn)
            else:
                cmd = 'rucio download --dir %s %s:%s' % (
                    dirname(dst), fspec.scope, fspec.lfn)
        else:
            cmd = 'rucio download --dir %s --rse %s --pfn %s %s:%s' % (dirname(
                dst), fspec.ddmendpoint, fspec.turl, fspec.scope, fspec.lfn)
        # Prepend the command with singularity if necessary
        from Singularity import singularityWrapper
        cmd = singularityWrapper(cmd, fspec.cmtconfig, dirname(dst))

        tolog('stageIn: %s' % cmd)
        s, o = getstatusoutput(cmd)
        if s:
            raise PilotException(
                'stageIn failed -- rucio download did not succeed: %s' %
                o.replace('\n', ''))

        # TODO: fix in rucio download to set specific outputfile
        #       https://its.cern.ch/jira/browse/RUCIO-2063
        cmd = 'mv %s %s' % (dirname(dst) + '/%s/%s' %
                            (fspec.scope, fspec.lfn), dst)
        tolog('stageInCmd: %s' % cmd)
        s, o = getstatusoutput(cmd)
        tolog('stageInOutput: %s' % o)

        if s:
            raise PilotException(
                'stageIn failed -- could not move downloaded file to destination: %s'
                % o.replace('\n', ''))

        if not fspec.replicas:
            fspec.filesize = os.path.getsize(dst)

        return {
            'ddmendpoint':
            fspec.replicas[0][0] if fspec.replicas else fspec.ddmendpoint,
            'surl': None,
            'pfn': fspec.lfn
        }
예제 #5
0
    def put_files(self, ddmendpoints, activity, files):
        """
        Copy files to dest SE:
           main control function, it should care about alternative stageout and retry-policy for diffrent ddmenndpoints
        :ddmendpoint: list of DDMEndpoints where the files will be send (base DDMEndpoint SE + alternative SEs??)
        :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint
        :raise: PilotException in case of error
        """

        if not ddmendpoints:
            raise PilotException("Failed to put files: Output ddmendpoint list is not set", code=PilotErrors.ERR_NOSTORAGE)
        if not files:
            raise PilotException("Failed to put files: empty file list to be transferred")

        missing_ddms = set(ddmendpoints) - set(self.ddmconf)

        if missing_ddms:
            self.ddmconf.update(self.si.resolveDDMConf(missing_ddms))

        ddmprot = self.protocols.setdefault(activity, self.si.resolvePandaProtocols(ddmendpoints, activity))

        output = []

        for ddm in ddmendpoints:
            protocols = ddmprot.get(ddm)
            if not protocols:
                self.log('Failed to resolve protocols data for ddmendpoint=%s .. skipped processing..' % ddm)
                continue

            success_transfers, failed_transfers = [], []

            try:
                success_transfers, failed_transfers = self.do_put_files(ddm, protocols, files)
                is_success = len(success_transfers) == len(files)
                output.append((is_success, success_transfers, failed_transfers, None))

                if is_success:
                    # NO additional transfers to another next DDMEndpoint/SE ?? .. fix me later if need
                    break

            #except PilotException, e:
            #    self.log('put_files: caught exception: %s' % e)
            except Exception, e:
                self.log('put_files: caught exception: %s' % e)
                # is_success, success_transfers, failed_transfers, exception
                import traceback
                self.log(traceback.format_exc())
                output.append((False, [], [], e))

            ### TODO: implement proper logic of put-policy: how to handle alternative stage out (processing of next DDMEndpoint)..

            self.log('put_files(): Failed to put files to ddmendpoint=%s .. successfully transferred files=%s/%s, failures=%s: will try next ddmendpoint from the list ..' % (ddm, len(success_transfers), len(files), len(failed_transfers)))
예제 #6
0
    def stageOut(self, src, dst, fspec):
        """
        Use the rucio upload command to stage out the file.

        :param src:   overrides parent signature -- unused
        :param dst:   overrides parent signature -- unused
        :param fspec: dictionary containing destination ddmendpoint, scope, lfn
        :return:      destination file details (ddmendpoint, surl, pfn)
        """

        if fspec.objectstoreId and int(fspec.objectstoreId) > 0:
            cmd = 'rucio upload --no-register --rse %s --scope %s --pfn %s %s' % (
                fspec.ddmendpoint, fspec.scope, fspec.turl,
                fspec.pfn if fspec.pfn else fspec.lfn)
        else:
            guid = ' --guid %s' % fspec.guid if fspec.lfn and '.root' in fspec.lfn else ''
            cmd = 'rucio upload%s --no-register --rse %s --scope %s %s' % (
                guid, fspec.ddmendpoint, fspec.scope,
                fspec.pfn if fspec.pfn else fspec.lfn)
        tolog('stageOutCmd: %s' % cmd)
        s, o = getstatusoutput(cmd)
        tolog('stageOutOutput: %s' % o)

        if s:
            raise PilotException(
                'stageOut failed -- rucio upload did not succeed: %s' %
                o.replace('\n', ''))

        return {
            'ddmendpoint': fspec.ddmendpoint,
            'surl': fspec.surl,
            'pfn': fspec.lfn
        }
예제 #7
0
    def _stagefile(self, source, destination, filesize, is_stagein):
        """
            Stage the file
            mode is stagein or stageout
            :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        if self.checksum_type not in ['adler32']:  # exclude md5
            raise PilotException(
                "Failed to stage file: internal error: unsupported checksum_type=%s .. "
                % self.checksum_type,
                code=PilotErrors.ERR_STAGEINFAILED
                if is_stagein else PilotErrors.ERR_STAGEOUTFAILED,
                state='BAD_CSUMTYPE')

        cmd = '%s -np -f %s %s %s' % (self.copy_command, self.coption, source,
                                      destination)
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        timeout = self.getTimeOut(filesize)
        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: xrdcp threw an exception: %s" % e)
            rcode, output = -1, str(e)
예제 #8
0
    def stageOut(self, source, destination, fspec):
        """
        Override stageOut rather than stageOutFile since most of stageOut is
        unnecessary.
        Move the output file from the pilot working directory to the top level
        directory.
        Create the output file list for ARC CE.

        :param source:      local file location
        :param destination: remote location to copy file
        :param fspec:  dictionary containing destination replicas, scope, lfn
        :return:       destination file details (checksumtype, checksum, size)
        """

        src = os.path.realpath(fspec.lfn)
        dest = os.path.join(self.init_dir, fspec.lfn)
        self.log('Moving %s to %s' % (src, dest))
        try:
            shutil.move(src, dest)
        except IOError as e:
            raise PilotException('stageOut failed: %s' % str(e))

        self.log('Copy successful')

        # Create output list for ARC CE
        self.createOutputList(fspec, dest)

        checksum, checksum_type = fspec.get_checksum()
        return {'checksum_type': checksum_type,
                'checksum': checksum,
                'filesize': fspec.filesize}
예제 #9
0
    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """
        # resolve token value from fspec.ddmendpoint

        token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
        if not token:
            raise PilotException(
                "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint"
                % (fspec.ddmendpoint, source, destination, fspec),
                code=PilotErrors.ERR_STAGEOUTFAILED,
                state='UNKNOWN_DDMENDPOINT')
        filesize = os.path.getsize(source)
        timeout = self.getTimeOut(filesize)
        cmd = '%s --verbose --vo atlas -b -U srmv2 --connect-timeout=300 --srm-timeout=%s --sendreceive-timeout=%s -S %s %s %s' % (
            self.copy_command, timeout, timeout, token, source, destination)
        return self._stagefile(cmd,
                               source,
                               destination,
                               filesize,
                               is_stagein=False)
예제 #10
0
    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        # resolve token value from fspec.ddmendpoint
        token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
        if not token:
            raise PilotException(
                "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint"
                % (fspec.ddmendpoint, source, destination, fspec))

        filesize = os.path.getsize(source)
        timeout = self.getTimeOut(filesize)

        src_checksum, src_checksum_type = fspec.get_checksum()
        checksum_opt = ''
        if src_checksum:
            checksum_opt = '-K %s:%s' % (src_checksum_type, src_checksum)

        src = "file://%s" % os.path.abspath(source)
        cmd = '%s --verbose %s -p -f -t %s -D "SRM PLUGIN:TURL_PROTOCOLS=gsiftp" -S %s %s %s' % (
            self.copy_command, checksum_opt, timeout, token, src, destination)

        return self._stagefile(cmd,
                               source,
                               destination,
                               filesize,
                               is_stagein=False)
예제 #11
0
    def stageInFile(self, turl, dst, fspec):
        """
        Use the rucio download command to stage in the file.

        :param turl:  overrides parent signature -- unused
        :param dst:   overrides parent signature -- unused
        :param fspec: dictionary containing destination replicas, scope, lfn
        :return:      destination file details (ddmendpoint, surl, pfn)
        """

        num_retries = 2
        success = False
        try_counter = 0
        error_msg = None
        while not success and try_counter != num_retries:
            try_counter += 1
            tolog('StageIn, attempt %s/%s' %
                  (str(try_counter), str(num_retries)))
            try:
                self._stageInApi(dst, fspec)
                success = True
            except Exception as error:
                error_msg = error

        if error_msg and not success:
            raise PilotException('stageIn with API failed:  %s' % error,
                                 code=PilotErrors.ERR_STAGEINFAILED)

        # TODO: fix in rucio download to set specific outputfile
        cmd = 'mv %s %s' % (dirname(dst) + '/%s/%s' %
                            (fspec.scope, fspec.lfn), dst)
        tolog('stageInCmd: %s' % cmd)
        s, o = getstatusoutput(cmd)
        tolog('stageInOutput: s=%s o=%s' % (s, o))

        if s:
            raise PilotException(
                'stageIn failed -- could not move downloaded file to destination: %s'
                % o.replace('\n', ''),
                code=PilotErrors.ERR_STAGEOUTFAILED)

        if not fspec.replicas and not fspec.filesize:
            fspec.filesize = os.path.getsize(dst)

        return None, None
예제 #12
0
    def stageOut(self, src, dst, fspec):
        """
        Use the rucio upload command to stage out the file.

        :param src:   overrides parent signature -- unused
        :param dst:   overrides parent signature -- unused
        :param fspec: dictionary containing destination ddmendpoint, scope, lfn
        :return:      destination file details (ddmendpoint, surl, pfn)
        """

        num_retries = 2
        success = False
        try_counter = 0
        error_msg = None
        while not success and try_counter != num_retries:
            try_counter += 1
            tolog('StageOut, attempt %s/%s' %
                  (str(try_counter), str(num_retries)))
            try:
                self._stageOutApi(src, fspec)
                success = True
            except Exception as error:
                error_msg = error

        #physical check after upload
        if success and self.shouldVerifyStageOut():
            try:
                file_exists = self.VerifyStageOut(fspec.ddmendpoint, fspec)
                tolog('File exists at the storage: %s' % str(file_exists))
                if not file_exists:
                    raise PilotException(
                        'stageOut: Physical check after upload failed.')
            except Exception as e:
                msg = 'stageOut: File existence verification failed with: %s' % e
                tolog(msg)
                raise PilotException(msg)

        if error_msg and not success:
            raise PilotException('stageOut with API failed:  %s' % error_msg)

        return {
            'ddmendpoint': fspec.ddmendpoint,
            'surl': fspec.surl,
            'pfn': fspec.lfn
        }
    def stageOutFile(self, source, destination):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        if self.checksum_type not in ['adler32']: # exclude md5
            raise PilotException("Failed to stageOutFile(): internal error: unsupported checksum_type=%s .. " % self.checksum_type, code=PilotErrors.ERR_STAGEOUTFAILED, state='BAD_CSUMTYPE')

        cmd = "%s -h" % self.copy_command
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log("Execute command (%s) to decide which option should be used to calc file checksum.." % cmd)

        c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
        output = c.communicate()[0]

        self.log("status: %s, output: %s" % (c.returncode, output))

        coption = ""

        if c.returncode:
            self.log('FAILED to execute command=%s: %s' % (cmd, output))
        else:
            if "--cksum" in output:
                coption = "--cksum %s:print" % self.checksum_type
            elif "-adler" in output and self.checksum_type == 'adler32':
                coption = "-adler"
            elif "-md5" in output and self.checksum_type == 'md5':
                coption = "-md5"

        if coption:
            self.log("Use %s option to get the checksum" % coption)
        else:
            self.log("Cannot find neither -adler nor --cksum. will not use checksum")

        cmd = '%s -np -f %s %s %s' % (self.copy_command, coption, source, destination)
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        timeout = self.getTimeOut(os.path.getsize(source))
        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: xrdcp threw an exception: %s" % e)
            rcode, output = -1, str(e)
예제 #14
0
파일: base.py 프로젝트: PalNilsson/pilot
 def copysetup(self, value):
     value = os.path.expandvars(value.strip())
     if value and not os.access(value, os.R_OK):
         self.log(
             "WARNING: copysetup=%s is invalid: file is not readdable" %
             value)
         raise PilotException(
             "Failed to set copysetup: passed invalid file name=%s" % value,
             code=PilotErrors.ERR_NOSUCHFILE,
             state="RFCP_FAIL")
     self._setup = value
예제 #15
0
    def stageIn(self, source, destination, fspec):
        """
        Override stageIn rather than stageInFile since most of stageIn is
        unnecessary.
        Make a link from the downloaded file to the pilot working directory.

        :param source:      original (remote) file location - not used
        :param destination: where to create the link
        :param fspec:  dictionary containing destination replicas, scope, lfn
        :return:       destination file details (checksumtype, checksum, size)
        """
        # block pre-load input file BEGIN
        # Alexander B.: the next block is necessary for testing of BOINC pilot on GRID resources.
        # it works only if the special variable "PRELOAD_STAGIN_FILES_FOR_MV_SITEMOVER" is set in external environment
        fileExpectedLocation = '%s/%s' % (
            self.init_dir, fspec.lfn
        )  # the place where original mv_sitemover expect to find the file
        if not os.path.exists(fileExpectedLocation):
            preloadFilesFlag = os.environ.get(
                "PRELOAD_STAGIN_FILES_FOR_MV_SITEMOVER")
            if preloadFilesFlag and (preloadFilesFlag == '1'
                                     or preloadFilesFlag == "yes"
                                     or preloadFilesFlag == "on"):
                # the expected behavior actions:
                # rucio download valid1:EVNT.01416937._000001.pool.root.1
                # mv valid1/EVNT.01416937._000001.pool.root.1 ./EVNT.09355665._094116.pool.root.1

                self.log(
                    'pp: pre-load files for mv_sitemover: download locally stageIn the file: scope=%s file=%s'
                    % (fspec.scope, fspec.lfn))

                cmd = 'rucio download %s:%s' % (fspec.scope, fspec.lfn)
                self.log("Executing command: %s" % cmd)

                from subprocess import Popen, PIPE, STDOUT
                c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
                output = c.communicate()[0]
                if c.returncode:
                    raise Exception(output)

                fileRucioLocation = '%s/%s' % (
                    fspec.scope, fspec.lfn
                )  # the place where Rucio downloads file
                self.log('pp: move from %s to %s' %
                         (fileRucioLocation, fileExpectedLocation))
                try:
                    os.rename(fileRucioLocation, fileExpectedLocation)
                except OSError, e:
                    raise PilotException(
                        'stageIn failed when rename the file from rucio location: %s'
                        % str(e),
                        code=PilotErrors.ERR_STAGEINFAILED)
예제 #16
0
    def put_logfiles(self, files):
        """
        Copy log files to dest SE
        :files: list of files to be moved
        """

        activity = 'pl'
        ddms = self.job.ddmEndPointLog

        if not ddms:
            raise PilotException("Output ddmendpoint list (job.ddmEndPointLog) is not set", code=PilotErrors.ERR_NOSTORAGE)

        return self.put_files(ddms, activity, files)
예제 #17
0
    def put_outfiles(self, files):
        """
        Copy output files to dest SE
        :files: list of files to be moved
        :raise: an exception in case of errors
        """

        activity = 'pw'
        ddms = self.job.ddmEndPointOut

        if not ddms:
            raise PilotException("Output ddmendpoint list (job.ddmEndPointOut) is not set", code=PilotErrors.ERR_NOSTORAGE)

        return self.put_files(ddms, activity, files)
예제 #18
0
    def stageIn(self, turl, dst, fspec):
        """
        Use the rucio download command to stage in the file.

        :param turl:  overrides parent signature -- unused
        :param dst:   overrides parent signature -- unused
        :param fspec: dictionary containing destination replicas, scope, lfn
        :return:      destination file details (ddmendpoint, surl, pfn)
        """

        cmd = 'rucio download --dir %s --rse %s %s:%s' % (
            dirname(dst), fspec.replicas[0][0], fspec.scope, fspec.lfn)
        tolog('stageIn: %s' % cmd)
        s, o = getstatusoutput(cmd)
        if s:
            raise PilotException(
                'stageIn failed -- rucio download did not succeed: %s' %
                o.replace('\n', ''))

        # TODO: fix in rucio download to set specific outputfile
        #       https://its.cern.ch/jira/browse/RUCIO-2063
        cmd = 'mv %s %s' % (dirname(dst) + '/%s/%s' %
                            (fspec.scope, fspec.lfn), dst)
        tolog('stageInCmd: %s' % cmd)
        s, o = getstatusoutput(cmd)
        tolog('stageInOutput: %s' % o)

        if s:
            raise PilotException(
                'stageIn failed -- could not move downloaded file to destination: %s'
                % o.replace('\n', ''))

        return {
            'ddmendpoint': fspec.replicas[0][0],
            'surl': None,
            'pfn': fspec.lfn
        }
예제 #19
0
    def is_stagein_allowed(self, fspec, job):
        """
            check if stage-in operation is allowed for the mover
            apply additional job specific checks here if need
            Should be overwritten by custom sitemover
            :return: True in case stage-in transfer is allowed
            :raise: PilotException in case of controlled error
        """

        # for analysis jobs, failure transfer of (non lib) input file if the file is on tape (not pre-staged)
        if job.isAnalysisJob() and not '.lib.tgz' in fspec.lfn: # check if file is on tape
            if not self.isFileStaged(fspec):
                raise PilotException("File %s is not staged and will be skipped for analysis job: stage-in is not allowed" % fspec.lfn, code=PilotErrors.ERR_FILEONTAPE, state='FILE_ON_TAPE')

        return True
예제 #20
0
    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implemented by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """
        self.log(
            "gfalcopy_sitemover: stageOutFile() arguments: src=%s, dst=%s fspec=%s"
            % (source, destination, fspec))
        # we need to store the value of isDynafedCloud it in the private variable in order to use it later in getRemoteFileChecksum()  , as
        # getRemoteFileChecksum() without fspec will be called after stageOutFile from the base.stageOut()
        self._isDynafedCloud = self.detectDynafedCloud(fspec.ddmendpoint)
        gfal_prop = self.gfal_prop_dynacloud if self._isDynafedCloud else self.gfal_prop_grid

        # in ES workflow only fspec.pfn is correct, but it may be not set for normal workflow
        src = fspec.pfn if fspec.pfn else source

        # resolve token value from fspec.ddmendpoint
        token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
        if not token:
            raise PilotException(
                "stageOutFile: Failed to resolve token value for ddmendpoint=%s: src=%s, destination=%s, fspec=%s .. unknown ddmendpoint"
                % (fspec.ddmendpoint, src, destination, fspec))

        filesize = os.path.getsize(src)
        timeout = self.getTimeOut(filesize)

        src_checksum, src_checksum_type = fspec.get_checksum()
        checksum_opt = ''
        if src_checksum and not self._isDynafedCloud:
            checksum_opt = '-K %s:%s' % (src_checksum_type, src_checksum)

        srcUrl = "file://%s" % os.path.abspath(
            src)  # may be omitted, gfal-utils understand local file paths
        cmd = '%s --verbose %s -p -f -t %s %s -S %s %s %s' % (
            self.copy_command, checksum_opt, timeout, gfal_prop, token, srcUrl,
            destination)

        # Prepend the command with singularity if necessary
        from Singularity import singularityWrapper
        cmd = singularityWrapper(cmd, fspec.cmtconfig, dirname(src))

        return self._stagefile(cmd,
                               src,
                               destination,
                               filesize,
                               is_stagein=False)
예제 #21
0
    def stageIn(self, source, destination, fspec):
        """
        Query HTTP for etag, then symlink to the pilot working directory.

        :param source:      original file location
        :param destination: where to create the link
        :param fspec:       dictionary containing destination replicas, scope, lfn
        :return:            destination file details (checksumtype, checksum, size)
        """

        self.log('source: %s' % str(source))
        self.log('destination: %s' % str(destination))
        self.log('fspec: %s' % str(fspec))
        self.log('fspec.scope: %s' % str(fspec.scope))
        self.log('fspec.lfn: %s' % str(fspec.lfn))
        self.log('fspec.ddmendpoint: %s' % str(fspec.ddmendpoint))

        # figure out the HTTP SURL from Rucio

        from rucio.client import ReplicaClient

        rc = ReplicaClient()
        http_surl_reps = [
            r for r in rc.list_replicas(dids=[{
                'scope': fspec.scope,
                'name': fspec.lfn
            }],
                                        schemes=['https'],
                                        rse_expression=fspec.ddmendpoint)
        ]
        self.log('http_surl_reps: %s' % http_surl_reps)

        http_surl = http_surl_reps[0]['rses'][fspec.ddmendpoint][0].rsplit(
            '_-')[0]
        self.log('http_surl: %s' % http_surl)

        # retrieve the TURL from the webdav etag
        cmd = 'davix-http --capath /cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase/etc/grid-security-emi/certificates --cert $X509_USER_PROXY -X PROPFIND %s' % http_surl
        self.log('ETAG retrieval: %s' % cmd)
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=10)
        except Exception, e:
            self.log('FATAL: could not retrieve STORM WebDAV ETag: %s' % e)
            raise PilotException('Could not retrieve STORM WebDAV ETag: %s' %
                                 e)
예제 #22
0
파일: mover.py 프로젝트: PalNilsson/pilot
    def resolve_replicas(self, files):
        """
            populates fdat.inputddms and fdat.replicas of each entry from `files` list
            fdat.replicas = [(ddmendpoint, replica, ddm_se)]
            ddm_se -- integration logic -- is used to manualy form TURL when ignore_rucio_replicas=True
            (quick stab until all protocols are properly populated in Rucio from AGIS)
        """

        # build list of local ddmendpoints grouped by site

        # load ALL ddmconf
        self.ddmconf.update(self.si.resolveDDMConf([]))
        ddms = {}
        for ddm, dat in self.ddmconf.iteritems():
            ddms.setdefault(dat['site'], []).append(dat)

        for fdat in files:

            # build and order list of local ddms
            ddmdat = self.ddmconf.get(fdat.ddmendpoint)
            if not ddmdat:
                raise Exception(
                    "Failed to resolve ddmendpoint by name=%s send by Panda job, please check configuration. fdat=%s"
                    % (fdat.ddmendpoint, fdat))
            if not ddmdat['site']:
                raise Exception(
                    "Failed to resolve site name of ddmendpoint=%s. please check ddm declaration: ddmconf=%s ... fdat=%s"
                    % (fdat.ddmendpoint, ddmconf, fdat))
            localddms = ddms.get(ddmdat['site'])
            # sort/filter ddms (as possible input source)
            fdat.inputddms = self._prepare_input_ddm(ddmdat, localddms)

        # load replicas from Rucio
        from rucio.client import Client
        c = Client()

        dids = [dict(scope=e.scope, name=e.lfn) for e in files]
        schemes = ['srm', 'root', 'https', 'gsiftp']

        # Get the replica list
        try:
            replicas = c.list_replicas(dids, schemes=schemes)
        except Exception, e:
            raise PilotException("Failed to get replicas from Rucio: %s" % e,
                                 code=PilotErrors.ERR_FAILEDLFCGETREPS)
예제 #23
0
    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        # resolve token value from fspec.ddmendpoint
        token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
        if not token:
            raise PilotException(
                "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint"
                % (fspec.ddmendpoint, source, destination, fspec))
        filesize = os.path.getsize(source)

        checksum = fspec.get_checksum()
        if not checksum[0]:  # checksum is not available => do calculate
            checksum = self.calc_file_checksum(source)
            fspec.set_checksum(checksum[0], checksum[1])

        if not checksum[1]:
            checksum = checksum[0]
        else:
            checksum = "%s:%s" % (checksum[1], checksum[0])

        opts = {
            '--size': filesize,
            '-t': token,
            '--checksum': checksum,
            '--guid': fspec.guid
        }
        opts = " ".join(["%s %s" % (k, v) for (k, v) in opts.iteritems()])

        cmd = 'lsm-put %s %s %s' % (opts, source, destination)

        return self._stagefile(cmd,
                               source,
                               destination,
                               filesize,
                               is_stagein=False)
예제 #24
0
    def stageOut(self, source, destination, fspec):
        """
        Copy the output file from the pilot working directory to the destination
        directory.

        :param source:      local file location
        :param destination: remote location to copy file
        :param fspec:       dictionary containing destination replicas, scope, lfn
        :return:            destination file details (checksumtype, checksum, size)
        """

        src = os.path.realpath(fspec.lfn)
        dest = os.path.join(self.init_dir, fspec.lfn)
        self.log('Moving %s to %s' % (src, dest))

        # copy the output
        try:
            shutil.move(src, dst)
        except Exception, e:
            self.log('FATAL: could not move outputfile: %s' % e)
            raise PilotException('Could not move outputfile: %s' % e)
예제 #25
0
class lcgcpSiteMover(BaseSiteMover):
    """ SiteMover that uses lcg-cp for both get and put """

    name = "lcgcp"
    copy_command = "lcg-cp"
    checksum_type = "adler32"
    checksum_command = "lcg-get-checksum"

    schemes = ['srm', 'gsiftp']  # list of supported schemes for transfers

    def _stagefile(self, cmd, source, destination, filesize, is_stagein):
        """
            Stage the file
            mode is stagein or stageout
            :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        timeout = self.getTimeOut(filesize)

        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" %
                     (self.copy_command, e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" %
                 (is_timeout, rcode, output))

        if is_timeout or rcode:  ## do clean up
            if is_stagein:  # stage-in clean up: check if file was partially transferred
                self.removeLocal(destination)

        if is_timeout:
            raise PilotException(
                "Copy command self timed out after %s, timeout=%s, output=%s" %
                (dt, timeout, output),
                code=PilotErrors.ERR_GETTIMEOUT
                if is_stagein else PilotErrors.ERR_PUTTIMEOUT,
                state='CP_TIMEOUT')

        if rcode:
            self.log(
                'WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s'
                % (is_stagein, cmd, rcode, output.replace("\n", " ")))
            error = self.resolveStageErrorFromOutput(output,
                                                     source,
                                                     is_stagein=is_stagein)
            rcode = error.get('rcode')
            if not rcode:
                rcode = PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED
            state = error.get('state')
            if not state:
                state = 'COPY_FAIL'  #'STAGEIN_FAILED' if is_stagein else 'STAGEOUT_FAILED'

            raise PilotException(error.get('error'), code=rcode, state=state)

        # extract filesize and checksum values from output
        # check stage-out: not used at the moment

        return None, None
예제 #26
0
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" % ('gfal-rm', e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" %
                 (is_timeout, rcode, output))

        if is_timeout:
            raise PilotException(
                "removeRemoteFile self timed out after %s, timeout=%s, output=%s"
                % (dt, timeout, output),
                code=PilotErrors.ERR_GENERALERROR,
                state='RM_TIMEOUT')

        if rcode:
            raise PilotException("Failed to remove remote file",
                                 code=PilotErrors.ERR_GENERALERROR,
                                 state='RM_FAILED')

    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """
예제 #27
0
파일: mover.py 프로젝트: PalNilsson/pilot
    def do_put_files(self, ddmendpoint, protocols,
                     files):  # old function : TO BE DEPRECATED ...
        """
        Copy files to dest SE
        :ddmendpoint: DDMEndpoint name used to store files
        :return: (list of transferred_files details, list of failed_transfers details)
        :raise: PilotException in case of error
        """

        self.log(
            '[deprecated do_put_files()]Prepare to copy files=%s to ddmendpoint=%s using protocols data=%s'
            % (files, ddmendpoint, protocols))
        self.log("[deprecated do_put_files()]Number of stage-out tries: %s" %
                 self.stageoutretry)

        # get SURL for Panda calback registration
        # resolve from special protocol activity=SE # fix me later to proper name of activitiy=SURL (panda SURL, at the moment only 2-letter name is allowed on AGIS side)
        # if SE is not found, try to fallback to a
        surl_prot = [
            dict(se=e[0], path=e[2]) for e in
            sorted(self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get(
                'SE',
                self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get(
                    'a', [])),
                   key=lambda x: x[1])
        ]

        if not surl_prot:
            self.log('FAILED to resolve default SURL path for ddmendpoint=%s' %
                     ddmendpoint)
            return [], []
        surl_prot = surl_prot[0]  # take first
        self.log("[do_put_files] SURL protocol to be used: %s" % surl_prot)

        self.trace_report.update(localSite=ddmendpoint, remoteSite=ddmendpoint)

        transferred_files, failed_transfers = [], []

        for dat in protocols:

            copytool, copysetup = dat.get('copytool'), dat.get('copysetup')

            try:
                sitemover = getSiteMover(copytool)(copysetup,
                                                   workDir=self.job.workdir)
                sitemover.trace_report = self.trace_report
                sitemover.protocol = dat  # ##
                sitemover.ddmconf = self.ddmconf  # quick workaround  ###
                sitemover.setup()
            except Exception, e:
                self.log(
                    '[do_put_files] WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s'
                    % (e, dat))
                continue

            self.log("[do_put_files] Copy command: %s, sitemover=%s" %
                     (copytool, sitemover))
            self.log("[do_put_files] Copy setup: %s" % copysetup)

            self.trace_report.update(protocol=copytool)

            se, se_path = dat.get('se', ''), dat.get('path', '')

            self.log("[do_put_files] Found N=%s files to be transferred: %s" %
                     (len(files), [e.get('pfn') for e in files]))

            for fdata in files:
                scope, lfn, pfn = fdata.get(
                    'scope', ''), fdata.get('lfn'), fdata.get('pfn')
                guid = fdata.get('guid', '')

                surl = sitemover.getSURL(
                    surl_prot.get('se'), surl_prot.get('path'), scope, lfn,
                    self.job
                )  # job is passing here for possible JOB specific processing
                turl = sitemover.getSURL(
                    se, se_path, scope, lfn, self.job
                )  # job is passing here for possible JOB specific processing

                self.trace_report.update(scope=scope,
                                         dataset=fdata.get('dsname_report'),
                                         url=surl)
                self.trace_report.update(catStart=time.time(),
                                         filename=lfn,
                                         guid=guid.replace('-', ''))

                self.log(
                    "[do_put_files] Preparing copy for pfn=%s to ddmendpoint=%s using copytool=%s: mover=%s"
                    % (pfn, ddmendpoint, copytool, sitemover))
                self.log("[do_put_files] lfn=%s: SURL=%s" % (lfn, surl))
                self.log("[do_put_files] TURL=%s" % turl)

                if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK):
                    error = "Erron: input pfn file is not exist: %s" % pfn
                    self.log(error)
                    raise PilotException(
                        error,
                        code=PilotErrors.ERR_MISSINGOUTPUTFILE,
                        state="FILE_INFO_FAIL")

                filename = os.path.basename(pfn)

                # update the current file state
                updateFileState(filename,
                                self.workDir,
                                self.job.jobId,
                                mode="file_state",
                                state="not_transferred")
                dumpFileStates(self.workDir, self.job.jobId)

                # loop over multple stage-out attempts
                for _attempt in xrange(1, self.stageoutretry + 1):

                    if _attempt > 1:  # if not first stage-out attempt, take a nap before next attempt
                        self.log(
                            " -- Waiting %d seconds before next stage-out attempt for file=%s --"
                            % (self.stageout_sleeptime, filename))
                        time.sleep(self.stageout_sleeptime)

                    self.log(
                        "[do_put_files] Put attempt %d/%d for filename=%s" %
                        (_attempt, self.stageoutretry, filename))

                    try:
                        # quick work around
                        from Job import FileSpec
                        stub_fspec = FileSpec(ddmendpoint=ddmendpoint,
                                              guid=guid,
                                              scope=scope,
                                              lfn=lfn)
                        result = sitemover.stageOut(pfn, turl, stub_fspec)
                        break  # transferred successfully
                    except PilotException, e:
                        result = e
                        self.log(traceback.format_exc())

                    except Exception, e:
                        self.log(traceback.format_exc())
                        result = PilotException(
                            "stageOut failed with error=%s" % e,
                            code=PilotErrors.ERR_STAGEOUTFAILED)

                    self.log(
                        'WARNING [do_put_files]: Error in copying file (attempt %s): %s'
                        % (_attempt, result))
예제 #28
0
파일: mover.py 프로젝트: PalNilsson/pilot
    def stageout(self, activity, files):
        """
            Copy files to dest SE:
            main control function, it should care about alternative stageout and retry-policy for diffrent ddmendpoints
        :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint
        :return: (transferred_files, failed_transfers)
        :raise: PilotException in case of error
        """

        if not files:
            raise PilotException(
                "Failed to put files: empty file list to be transferred")

        pandaqueue = self.si.getQueueName()  # FIX ME LATER
        protocols = self.protocols.setdefault(
            activity,
            self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue])
        copytools = self.si.resolvePandaCopytools(pandaqueue,
                                                  activity)[pandaqueue]

        self.log(
            "Mover.stageout() [new implementation] started for activity=%s, files=%s, protocols=%s, copytools=%s"
            % (activity, files, protocols, copytools))

        # check if file exists before actual processing
        # populate filesize if need

        for fspec in files:
            pfn = os.path.join(self.job.workdir, fspec.lfn)
            if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK):
                error = "Erron: input pfn file is not exist: %s" % pfn
                self.log(error)
                raise PilotException(error,
                                     code=PilotErrors.ERR_MISSINGOUTPUTFILE,
                                     state="FILE_INFO_FAIL")
            fspec.filesize = os.path.getsize(pfn)

        totalsize = reduce(lambda x, y: x + y.filesize, files, 0)

        transferred_files, failed_transfers = [], []

        self.log(
            "Found N=%s files to be transferred, total_size=%.3f MB: %s" %
            (len(files), totalsize / 1024. / 1024., [e.lfn for e in files]))

        # first resolve protocol settings from PQ specific aprotocols settings
        # then resolve settings from default ddm.protocols supported by copytools

        # group protocols, files by ddmendpoint
        ddmprotocols, ddmfiles = {}, {}
        for e in files:
            ddmfiles.setdefault(e.ddmendpoint, []).append(e)

        # load DDM conf/protocols
        self.ddmconf.update(self.si.resolveDDMConf(ddmfiles.keys()))

        for e in protocols:
            if e['ddm'] not in ddmfiles:  # skip not affected protocols settings
                continue
            e['copytools'] = [{
                'copytool': e['copytool'],
                'copysetup': e['copysetup']
            }]
            ddmprotocols.setdefault(e['ddm'], []).append(e)

        # generate default protocols from copytools/schemes and ddmconf
        unknown_ddms = set(ddmfiles) - set(ddmprotocols)
        for ddmendpoint in unknown_ddms:
            dd = self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {})
            dat = dd.get(activity, []) or dd.get('w', [])
            dprotocols = [
                dict(se=e[0], path=e[2], resolve_scheme=True)
                for e in sorted(dat, key=lambda x: x[1])
            ]
            ddmprotocols.setdefault(ddmendpoint, dprotocols)

        unknown_ddms = set(ddmfiles) - set(ddmprotocols)
        if unknown_ddms:
            raise PilotException(
                "Failed to put files: no protocols defined for output ddmendpoints=%s .. check aprotocols schedconfig settings for activity=%s or default ddm.aprotocols entries"
                % (unknown_ddms, activity),
                code=PilotErrors.ERR_NOSTORAGE)

        self.log(
            "[stage-out] [%s] filtered protocols to be used to transfer files: protocols=%s"
            % (activity, ddmprotocols))

        # get SURL endpoint for Panda callback registration
        # resolve from special protocol activity='SE' or fallback to activity='a', then to 'r'

        surl_protocols, no_surl_ddms = {}, set()

        for fspec in files:
            if not fspec.surl:  # initialize only if not already set
                d = self.ddmconf.get(fspec.ddmendpoint,
                                     {}).get('aprotocols', {})
                xprot = d.get('SE', [])
                if not xprot:
                    xprot = [
                        e for e in d.get('a', d.get('r', []))
                        if e[0] and e[0].startswith('srm')
                    ]
                surl_prot = [
                    dict(se=e[0], path=e[2])
                    for e in sorted(xprot, key=lambda x: x[1])
                ]
                if surl_prot:
                    surl_protocols.setdefault(fspec.ddmendpoint, surl_prot[0])
                else:
                    no_surl_ddms.add(fspec.ddmendpoint)

        if no_surl_ddms:  # failed to resolve SURLs
            self.log(
                'FAILED to resolve default SURL path for ddmendpoints=%s' %
                list(no_surl_ddms))
            raise PilotException(
                "Failed to put files: no SE/SURL protocols defined for output ddmendpoints=%s .. check ddmendpoints aprotocols settings for activity=SE/a/r"
                % list(no_surl_ddms),
                code=PilotErrors.ERR_NOSTORAGE)

        sitemover_objects = {}

        # try to iterate over protocol of given ddmendpoint until successfull transfer
        for ddmendpoint, iprotocols in ddmprotocols.iteritems():

            for dat in iprotocols:

                remain_files = [
                    e for e in ddmfiles.get(ddmendpoint)
                    if e.status not in ['transferred']
                ]
                if not remain_files:
                    self.log(
                        'INFO: all files to be transfered to ddm=%s have been successfully processed for activity=%s ..'
                        % (ddmendpoint, activity))
                    # stop checking other protocols of ddmendpoint
                    break

                if not 'copytools' in dat:
                    # use allowed copytools
                    cdat = []
                    for cp, settings in copytools:
                        cdat.append({
                            'copytool': cp,
                            'copysetup': settings.get('setup')
                        })
                    dat['copytools'] = cdat

                if not dat['copytools']:
                    msg = 'FAILED to resolve final copytools settings for ddmendpoint=%s, please check schedconf.copytools settings: copytools=%s, iprotocols=' % list(
                        ddmendpoint, copytools, iprotocols)
                    self.log(msg)
                    raise PilotException(msg, code=PilotErrors.ERR_NOSTORAGE)

                for cpsettings in dat.get('copytools', []):
                    copytool, copysetup = cpsettings.get(
                        'copytool'), cpsettings.get('copysetup')

                    try:
                        sitemover = sitemover_objects.get(copytool)
                        if not sitemover:
                            sitemover = getSiteMover(copytool)(
                                copysetup, workDir=self.job.workdir)
                            sitemover_objects.setdefault(copytool, sitemover)

                            sitemover.trace_report = self.trace_report
                            sitemover.protocol = dat  # ##
                            sitemover.ddmconf = self.ddmconf  # quick workaround  ###
                            sitemover.setup()
                        if dat.get('resolve_scheme'):
                            dat['scheme'] = sitemover.schemes
                    except Exception, e:
                        self.log(
                            'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s'
                            % (e, dat))
                        continue

                    if dat.get(
                            'scheme'
                    ):  # filter protocols by accepted scheme from copytool
                        should_skip = True
                        for scheme in dat.get('scheme'):
                            if dat['se'].startswith(scheme):
                                should_skip = False
                                break
                        if should_skip:
                            self.log(
                                "[stage-out] protocol=%s of ddmendpoint=%s is skipped since copytool=%s does not support it, accepted schemes=%s"
                                % (dat['se'], ddmendpoint, copytool,
                                   dat['scheme']))

                            continue

                    self.log("Copy command [stage-out]: %s, sitemover=%s" %
                             (copytool, sitemover))
                    self.log("Copy setup   [stage-out]: %s" % copysetup)

                    self.trace_report.update(protocol=copytool,
                                             localSite=ddmendpoint,
                                             remoteSite=ddmendpoint)

                    # validate se value?
                    se, se_path = dat.get('se', ''), dat.get('path', '')

                    for fdata in remain_files:

                        if not fdata.surl:
                            fdata.surl = sitemover.getSURL(
                                surl_protocols[fdata.ddmendpoint].get('se'),
                                surl_protocols[fdata.ddmendpoint].get('path'),
                                fdata.scope, fdata.lfn, self.job
                            )  # job is passing here for possible JOB specific processing

                        updateFileState(fdata.lfn,
                                        self.workDir,
                                        self.job.jobId,
                                        mode="file_state",
                                        state="not_transferred",
                                        ftype="output")

                        fdata.turl = sitemover.getSURL(
                            se, se_path, fdata.scope, fdata.lfn, self.job
                        )  # job is passing here for possible JOB specific processing

                        self.log(
                            "[stage-out] resolved SURL=%s to be used for lfn=%s, ddmendpoint=%s"
                            % (fdata.surl, fdata.lfn, fdata.ddmendpoint))

                        self.log(
                            "[stage-out] resolved TURL=%s to be used for lfn=%s, ddmendpoint=%s"
                            % (fdata.turl, fdata.lfn, fdata.ddmendpoint))

                        self.log(
                            "[stage-out] Prepare to put_data: ddmendpoint=%s, protocol=%s, fspec=%s"
                            % (ddmendpoint, dat, fdata))

                        self.trace_report.update(catStart=time.time(),
                                                 filename=fdata.lfn,
                                                 guid=fdata.guid.replace(
                                                     '-', ''))
                        self.trace_report.update(
                            scope=fdata.scope,
                            dataset=fdata.destinationDblock,
                            url=fdata.turl)

                        self.log(
                            "[stage-out] Preparing copy for lfn=%s using copytool=%s: mover=%s"
                            % (fdata.lfn, copytool, sitemover))
                        #dumpFileStates(self.workDir, self.job.jobId, ftype="output")

                        # loop over multple stage-out attempts
                        for _attempt in xrange(1, self.stageoutretry + 1):

                            if _attempt > 1:  # if not first stage-out attempt, take a nap before next attempt
                                self.log(
                                    " -- Waiting %s seconds before next stage-out attempt for file=%s --"
                                    % (self.stageout_sleeptime, fdata.lfn))
                                time.sleep(self.stageout_sleeptime)

                            self.log("Put attempt %s/%s for filename=%s" %
                                     (_attempt, self.stageoutretry, fdata.lfn))

                            try:
                                result = sitemover.put_data(fdata)
                                fdata.status = 'transferred'  # mark as successful
                                if result.get('surl'):
                                    fdata.surl = result.get('surl')
                                #if result.get('pfn'):
                                #    fdata.turl = result.get('pfn')

                                #self.trace_report.update(url=fdata.surl) ###
                                self.trace_report.update(url=fdata.turl)  ###

                                # finalize and send trace report
                                self.trace_report.update(clientState='DONE',
                                                         stateReason='OK',
                                                         timeEnd=time.time())
                                self.sendTrace(self.trace_report)

                                updateFileState(fdata.lfn,
                                                self.workDir,
                                                self.job.jobId,
                                                mode="file_state",
                                                state="transferred",
                                                ftype="output")
                                dumpFileStates(self.workDir,
                                               self.job.jobId,
                                               ftype="output")

                                self.updateSURLDictionary(
                                    fdata.guid, fdata.surl, self.workDir, self.
                                    job.jobId)  # FIXME LATER: isolate later

                                fdat = result.copy()
                                #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl)
                                transferred_files.append(fdat)

                                break  # transferred successfully
                            except PilotException, e:
                                result = e
                                self.log(traceback.format_exc())
                            except Exception, e:
                                result = PilotException(
                                    "stageOut failed with error=%s" % e,
                                    code=PilotErrors.ERR_STAGEOUTFAILED)
                                self.log(traceback.format_exc())

                            self.log(
                                'WARNING: Error in copying file (attempt %s/%s): %s'
                                % (_attempt, self.stageoutretry, result))

                        if isinstance(result, Exception):  # failure transfer
                            failed_transfers.append(result)
예제 #29
0
파일: mover.py 프로젝트: PalNilsson/pilot
    def stagein(self):
        """
            :return: (transferred_files, failed_transfers)
        """

        activity = 'pr'

        pandaqueue = self.si.getQueueName()  # FIX ME LATER
        protocols = self.protocols.setdefault(
            activity,
            self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue])
        copytools = self.si.resolvePandaCopytools(pandaqueue,
                                                  activity)[pandaqueue]

        self.log("stage-in: pq.aprotocols=%s, pq.copytools=%s" %
                 (protocols, copytools))

        files = self.job.inData
        self.resolve_replicas(
            files)  # populates also self.ddmconf = self.si.resolveDDMConf([])

        maxinputsize = self.getMaxInputSize()
        totalsize = reduce(lambda x, y: x + y.filesize, files, 0)

        transferred_files, failed_transfers = [], []

        self.log(
            "Found N=%s files to be transferred, total_size=%.3f MB: %s" %
            (len(files), totalsize / 1024. / 1024., [e.lfn for e in files]))

        # process first PQ specific protocols settings
        # then protocols supported by copytools

        # protocol generated from aprotocols is {'copytool':'', 'copysetup':'', 'se':'', 'ddm':''}
        # protocol generated from  copytools is {'copytool':'', 'copysetup', 'scheme':''}

        # build accepted schemes from allowed copytools
        cprotocols = []
        for cp, settings in copytools:
            cprotocols.append({
                'resolve_scheme': True,
                'copytool': cp,
                'copysetup': settings.get('setup')
            })

        protocols = protocols + cprotocols
        if not protocols:
            raise PilotException(
                "Failed to get files: neither aprotocols nor allowed copytools defined for input. check copytools/acopytools/aprotocols schedconfig settings for activity=%s, pandaqueue=%s"
                % (activity, pandaqueue),
                code=PilotErrors.ERR_NOSTORAGE)

        sitemover_objects = {}

        for dat in protocols:

            remain_files = [
                e for e in files
                if e.status not in ['direct_access', 'transferred']
            ]
            if not remain_files:
                self.log(
                    'INFO: all input files have been successfully processed')
                break

            copytool, copysetup = dat.get('copytool'), dat.get('copysetup')

            try:
                sitemover = sitemover_objects.get(copytool)
                if not sitemover:
                    sitemover = getSiteMover(copytool)(
                        copysetup, workDir=self.job.workdir)
                    sitemover_objects.setdefault(copytool, sitemover)

                    sitemover.trace_report = self.trace_report
                    sitemover.ddmconf = self.ddmconf  # self.si.resolveDDMConf([]) # quick workaround  ###
                    sitemover.setup()
                if dat.get('resolve_scheme'):
                    dat['scheme'] = sitemover.schemes
            except Exception, e:
                self.log(
                    'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s'
                    % (e, dat))
                continue

            self.log("Copy command [stage-in]: %s, sitemover=%s" %
                     (copytool, sitemover))
            self.log("Copy setup   [stage-in]: %s" % copysetup)

            self.trace_report.update(protocol=copytool)

            # verify file sizes and available space for stagein
            sitemover.check_availablespace(maxinputsize, remain_files)

            for fdata in remain_files:

                updateFileState(fdata.lfn,
                                self.workDir,
                                self.job.jobId,
                                mode="file_state",
                                state="not_transferred",
                                ftype="input")

                self.log(
                    "[stage-in] Prepare to get_data: protocol=%s, fspec=%s" %
                    (dat, fdata))

                # check if protocol and fdata.ddmendpoint belong to same site
                #
                if dat.get('ddm'):
                    protocol_site = self.ddmconf.get(dat.get('ddm'),
                                                     {}).get('site')
                    replica_site = self.ddmconf.get(fdata.ddmendpoint,
                                                    {}).get('site')

                    if protocol_site != replica_site:
                        self.log(
                            'INFO: cross-sites checks: protocol_site=%s and (fdata.ddmenpoint) replica_site=%s mismatched .. skip file processing for copytool=%s (protocol=%s)'
                            % (protocol_site, replica_site, copytool, dat))
                        continue

                r = sitemover.resolve_replica(fdata, dat)

                # quick stub: propagate changes to FileSpec
                if r.get('surl'):
                    fdata.surl = r[
                        'surl']  # TO BE CLARIFIED if it's still used and need
                if r.get('pfn'):
                    fdata.turl = r['pfn']
                if r.get('ddmendpoint'):
                    fdata.ddmendpoint = r['ddmendpoint']

                self.log(
                    "[stage-in] found replica to be used: ddmendpoint=%s, pfn=%s"
                    % (fdata.ddmendpoint, fdata.turl))

                # check if protocol and found replica belong to same site
                if dat.get('ddm'):
                    protocol_site = self.ddmconf.get(dat.get('ddm'),
                                                     {}).get('site')
                    replica_site = self.ddmconf.get(fdata.ddmendpoint,
                                                    {}).get('site')

                    if protocol_site != replica_site:
                        self.log(
                            'INFO: cross-sites checks: protocol_site=%s and replica_site=%s mismatched .. skip file processing for copytool=%s'
                            % (protocol_site, replica_site, copytool))
                        continue

                # check direct access
                self.log(
                    "fdata.is_directaccess()=%s, job.accessmode=%s, mover.is_directaccess()=%s"
                    % (fdata.is_directaccess(), self.job.accessmode,
                       self.is_directaccess()))

                is_directaccess = self.is_directaccess()
                if self.job.accessmode == 'copy':
                    is_directaccess = False
                elif self.job.accessmode == 'direct':
                    is_directaccess = True
                if fdata.is_directaccess(
                ) and is_directaccess:  # direct access mode, no transfer required
                    fdata.status = 'direct_access'
                    updateFileState(fdata.lfn,
                                    self.workDir,
                                    self.job.jobId,
                                    mode="transfer_mode",
                                    state="direct_access",
                                    ftype="input")

                    self.log(
                        "Direct access mode will be used for lfn=%s .. skip transfer the file"
                        % fdata.lfn)
                    continue

                # apply site-mover custom job-specific checks for stage-in
                try:
                    is_stagein_allowed = sitemover.is_stagein_allowed(
                        fdata, self.job)
                    if not is_stagein_allowed:
                        reason = 'SiteMover does not allowed stage-in operation for the job'
                except PilotException, e:
                    is_stagein_allowed = False
                    reason = e
                except Exception:
                    raise
예제 #30
0
파일: mover.py 프로젝트: PalNilsson/pilot
                            fdata.ddmendpoint = result.get('ddmendpoint')
                        if result.get('surl'):
                            fdata.surl = result.get('surl')
                        if result.get('pfn'):
                            fdata.turl = result.get('pfn')

                        #self.trace_report.update(url=fdata.surl) ###
                        self.trace_report.update(url=fdata.turl)  ###

                        break  # transferred successfully
                    except PilotException, e:
                        result = e
                        self.log(traceback.format_exc())
                    except Exception, e:
                        result = PilotException(
                            "stageIn failed with error=%s" % e,
                            code=PilotErrors.ERR_STAGEINFAILED)
                        self.log(traceback.format_exc())

                    self.log(
                        'WARNING: Error in copying file (attempt %s/%s): %s' %
                        (_attempt, self.stageinretry, result))

                if not isinstance(result,
                                  Exception):  # transferred successfully

                    # finalize and send trace report
                    self.trace_report.update(clientState='DONE',
                                             stateReason='OK',
                                             timeEnd=time.time())
                    self.sendTrace(self.trace_report)
예제 #31
0
파일: mover.py 프로젝트: complynx/pilot
                        if result.get('ddmendpoint'):
                            fdata.ddmendpoint = result.get('ddmendpoint')
                        if result.get('surl'):
                            fdata.surl = result.get('surl')
                        if result.get('pfn'):
                            fdata.turl = result.get('pfn')

                        #self.trace_report.update(url=fdata.surl) ###
                        self.trace_report.update(url=fdata.turl) ###

                        break # transferred successfully
                    except PilotException, e:
                        result = e
                        self.log(traceback.format_exc())
                    except Exception, e:
                        result = PilotException("stageIn failed with error=%s" % e, code=PilotErrors.ERR_STAGEINFAILED)
                        self.log(traceback.format_exc())

                    self.log('WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageinretry, result))

                if not isinstance(result, Exception): # transferred successfully

                    # finalize and send trace report
                    self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time())
                    self.sendTrace(self.trace_report)

                    updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="input")
                    dumpFileStates(self.workDir, self.job.jobId, ftype="input")

                    ## self.updateSURLDictionary(guid, surl, self.workDir, self.job.jobId) # FIX ME LATER