Esempio n. 1
0
    def stageOut(self, src, dst, fspec):
        """
        Use the rucio upload command to stage out the file.

        :param src:   overrides parent signature -- unused
        :param dst:   overrides parent signature -- unused
        :param fspec: dictionary containing destination ddmendpoint, scope, lfn
        :return:      destination file details (ddmendpoint, surl, pfn)
        """

        if fspec.storageId and int(fspec.storageId) > 0:
            cmd = 'rucio upload --no-register --rse %s --scope %s --pfn %s %s' % (
                fspec.ddmendpoint, fspec.scope, fspec.turl,
                fspec.pfn if fspec.pfn else fspec.lfn)
        else:
            guid = ' --guid %s' % fspec.guid if fspec.lfn and '.root' in fspec.lfn else ''
            cmd = 'rucio upload%s --no-register --rse %s --scope %s %s' % (
                guid, fspec.ddmendpoint, fspec.scope,
                fspec.pfn if fspec.pfn else fspec.lfn)

        # Prepend the command with singularity if necessary
        from Singularity import singularityWrapper
        cmd = singularityWrapper(cmd, fspec.cmtconfig, dirname(src))

        tolog('stageOutCmd: %s' % cmd)
        s, o = getstatusoutput(cmd)
        tolog('stageOutOutput: %s' % o)

        if s:
            raise PilotException(
                'stageOut failed -- rucio upload did not succeed: %s' %
                o.replace('\n', ''))

        return {
            'ddmendpoint': fspec.ddmendpoint,
            'surl': fspec.surl,
            'pfn': fspec.lfn
        }
Esempio n. 2
0
    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """
        # resolve token value from fspec.ddmendpoint

        token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
        if not token:
            raise PilotException(
                "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint"
                % (fspec.ddmendpoint, source, destination, fspec))
        filesize = os.path.getsize(source)
        timeout = self.getTimeOut(filesize)
        cmd = '%s --verbose --vo atlas -b -U srmv2 --connect-timeout=300 --srm-timeout=%s --sendreceive-timeout=%s -S %s %s %s' % (
            self.copy_command, timeout, timeout, token, source, destination)
        return self._stagefile(cmd,
                               source,
                               destination,
                               filesize,
                               is_stagein=False)
Esempio n. 3
0
    def stageOut(self, source, destination, fspec):
        """
        Override stageOut rather than stageOutFile since most of stageOut is
        unnecessary.
        Move the output file from the pilot working directory to the top level
        directory.
        Create the output file list for ARC CE.

        :param source:      local file location
        :param destination: remote location to copy file
        :param fspec:  dictionary containing destination replicas, scope, lfn
        :return:       destination file details (checksumtype, checksum, size)
        """

        src = os.path.realpath(fspec.lfn)
        dest = os.path.join(self.init_dir, fspec.lfn)
        self.log('Moving %s to %s' % (src, dest))
        try:
            # OS copy is done first so don't move
            if fspec.activity != 'pls':
                shutil.move(src, dest)
        except IOError as e:
            raise PilotException('stageOut failed: %s' % str(e),
                                 code=PilotErrors.ERR_STAGEOUTFAILED)

        self.log('Copy successful')

        # Create output list for ARC CE
        self.createOutputList(fspec, dest)

        checksum, checksum_type = fspec.get_checksum()
        return {
            'checksum_type': checksum_type,
            'checksum': checksum,
            'filesize': fspec.filesize
        }
Esempio n. 4
0
    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        # resolve token value from fspec.ddmendpoint
        token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
        if not token:
            raise PilotException(
                "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint"
                % (fspec.ddmendpoint, source, destination, fspec))

        filesize = os.path.getsize(source)
        timeout = self.getTimeOut(filesize)

        src_checksum, src_checksum_type = fspec.get_checksum()
        checksum_opt = ''
        if src_checksum:
            checksum_opt = '-K %s:%s' % (src_checksum_type, src_checksum)

        src = "file://%s" % os.path.abspath(source)
        cmd = '%s --verbose %s -p -f -t %s -D "SRM PLUGIN:TURL_PROTOCOLS=gsiftp" -S %s %s %s' % (
            self.copy_command, checksum_opt, timeout, token, src, destination)

        # Prepend the command with singularity if necessary
        from Singularity import singularityWrapper
        cmd = singularityWrapper(cmd, fspec.cmtconfig, dirname(source))

        return self._stagefile(cmd,
                               source,
                               destination,
                               filesize,
                               is_stagein=False)
Esempio n. 5
0
class mvSiteMover(BaseSiteMover):
    """ SiteMover that uses link for stage in and move for stage out """

    name = 'mv'
    # list of supported schemes for transfers - use them all since surl is not used
    schemes = ['file', 'srm', 'gsiftp', 'https', 'root', 'davs', 's3']

    require_replicas = False  ## quick hack to avoid query Rucio to resolve input replicas

    def __init__(self, *args, **kwargs):
        super(mvSiteMover, self).__init__(*args, **kwargs)
        self.init_dir = os.environ['HOME']

    def check_availablespace(self, maxinputsize, files):
        """
            Verify that enough local space is available to stage in and run the job
            :raise: PilotException in case of not enough space
            Not applicable for given Mover
        """
        pass

    def createOutputList(self, fspec, dest):

        if fspec.turl.startswith('s3://'):
            # Use Rucio proxy to upload to OS
            turl = fspec.turl
            turl = re.sub(r'^s3', 's3+rucio', turl)
            # Add failureallowed option so failed upload does not fail job
            rucio = 'rucio://rucio-lb-prod.cern.ch;failureallowed=yes/objectstores'
            rse = fspec.ddmendpoint
            activity = 'write'
            destsurl = '/'.join([rucio, turl, rse, activity])
        else:
            # Calculate checksum of file - even though it is already known by pilot
            # it is not passed through to new movers
            checksum = self.calc_file_checksum(dest)[0]
            token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token')
            # Add ARC options to SURL
            destsurl = re.sub(r'((:\d+)/)',
                              r'\2;autodir=no;spacetoken=%s/' % token,
                              fspec.turl)
            destsurl += ':checksumtype=%s:checksumvalue=%s' % (
                self.checksum_type, checksum)

        self.log('Adding to output.list: %s %s' % (fspec.lfn, destsurl))
        # Write output.list
        with open(os.path.join(self.init_dir, 'output.list'), 'a') as f:
            f.write('%s %s\n' % (fspec.lfn, destsurl))

    def getSURL(self,
                se,
                se_path,
                scope,
                lfn,
                job=None,
                pathConvention=None,
                ddmEndpoint=None):
        """
        Override from base because it throws an exception for paths without
        '/rucio' so we need this to do OS uploads
        """

        ddmType = self.ddmconf.get(ddmEndpoint, {}).get('type')
        if ddmType and ddmType in ['OS_LOGS', 'OS_ES']:
            surl = se + os.path.join(se_path, "%s:%s" % (scope, lfn))
        else:
            surl = self.getSURLRucio(se, se_path, scope, lfn, job)
        return surl

    def stageIn(self, source, destination, fspec):
        """
        Override stageIn rather than stageInFile since most of stageIn is
        unnecessary.
        Make a link from the downloaded file to the pilot working directory.

        :param source:      original (remote) file location - not used
        :param destination: where to create the link
        :param fspec:  dictionary containing destination replicas, scope, lfn
        :return:       destination file details (checksumtype, checksum, size)
        """
        # block pre-load input file BEGIN
        # Alexander B.: the next block is necessary for testing of BOINC pilot on GRID resources.
        # it works only if the special variable "PRELOAD_STAGIN_FILES_FOR_MV_SITEMOVER" is set in external environment
        fileExpectedLocation = '%s/%s' % (
            self.init_dir, fspec.lfn
        )  # the place where original mv_sitemover expect to find the file
        if not os.path.exists(fileExpectedLocation):
            preloadFilesFlag = os.environ.get(
                "PRELOAD_STAGIN_FILES_FOR_MV_SITEMOVER")
            if preloadFilesFlag and (preloadFilesFlag == '1'
                                     or preloadFilesFlag == "yes"
                                     or preloadFilesFlag == "on"):
                # the expected behavior actions:
                # rucio download valid1:EVNT.01416937._000001.pool.root.1
                # mv valid1/EVNT.01416937._000001.pool.root.1 ./EVNT.09355665._094116.pool.root.1

                self.log(
                    'pp: pre-load files for mv_sitemover: download locally stageIn the file: scope=%s file=%s'
                    % (fspec.scope, fspec.lfn))

                cmd = 'rucio download %s:%s' % (fspec.scope, fspec.lfn)
                self.log("Executing command: %s" % cmd)

                from subprocess import Popen, PIPE, STDOUT
                c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
                output = c.communicate()[0]
                if c.returncode:
                    raise Exception(output)

                fileRucioLocation = '%s/%s' % (
                    fspec.scope, fspec.lfn
                )  # the place where Rucio downloads file
                self.log('pp: move from %s to %s' %
                         (fileRucioLocation, fileExpectedLocation))
                try:
                    os.rename(fileRucioLocation, fileExpectedLocation)
                except OSError, e:
                    raise PilotException(
                        'stageIn failed when rename the file from rucio location: %s'
                        % str(e),
                        code=PilotErrors.ERR_STAGEINFAILED)
        # block preload input file END

        src = os.path.join(self.init_dir, fspec.lfn)
        self.log('Creating link from %s to %s' % (fspec.lfn, src))
        try:
            os.symlink(src, fspec.lfn)
        except OSError as e:
            raise PilotException('stageIn failed: %s' % str(e),
                                 code=PilotErrors.ERR_STAGEINFAILED)

        if not os.path.exists(fspec.lfn):
            raise PilotException(
                'stageIn failed: symlink points to non-existent file',
                code=PilotErrors.ERR_STAGEINFAILED)

        self.log('Symlink successful')
        checksum, checksum_type = fspec.get_checksum()
        return {
            'checksum_type': checksum_type,
            'checksum': checksum,
            'filesize': fspec.filesize
        }
Esempio n. 6
0
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" %
                     (self.copy_command, e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode = %s, output = %s" %
                 (is_timeout, rcode, output.replace("\n", " ")))

        if is_timeout:
            raise PilotException(
                "Copy command self timed out after %s, timeout=%s, output=%s" %
                (dt, self.timeout, output),
                code=PilotErrors.ERR_GETTIMEOUT
                if is_stagein else PilotErrors.ERR_PUTTIMEOUT,
                state='CP_TIMEOUT')

        if rcode:
            self.log(
                'WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s'
                % (is_stagein, cmd, rcode, output.replace("\n", " ")))
            error = self.resolveStageErrorFromOutput(output,
                                                     source,
                                                     is_stagein=is_stagein)

            if is_stagein:  # do clean up: check if file was partially transferred
                self.removeLocal(destination)

            rcode = error.get('rcode')
Esempio n. 7
0
class BaseSiteMover(object):
    """
    File movers move files between a storage element (of different kinds) and a local directory
    get_data: SE->local
    put_data: local->SE
    check_space: available space in SE

    mkdirWperm -- create recursively dirs setting appropriate permissions
    getLocalFileInfo -- get size and checksum of a local file

    """

    name = "" # unique ID of the Mover implementation, if not set copy_command will be used
    copy_command = None

    timeout = 5*60 # 5 min

    checksum_type = "adler32"     # algorithm name of checksum calculation
    checksum_command = "adler32"  # command to be executed to get checksum, e.g. md5sum (adler32 is internal default implementation)

    #has_mkdir = True
    #has_df = True
    #has_getsize = True
    #has_md5sum = True
    #has_chmod = True
    #

    def __init__(self, setup_path='', **kwargs):
        self.copysetup = setup_path
        self.timeout = kwargs.get('timeout', self.timeout)

        #self.setup_command = self.getSetup()

        self.trace_report = {}

    def log(self, value): # quick stub
        #print value
        tolog(value)

    @property
    def copysetup(self):
        return self._setup

    @copysetup.setter
    def copysetup(self, value):
        value = os.path.expandvars(value.strip())
        if not os.access(value, os.R_OK):
            self.log("WARNING: copysetup=%s is invalid: file is not readdable" % value)
            raise Exception("Failed to set copysetup: passed invalid file name=%s" % value)
            # PilotErrors.ERR_NOSUCHFILE, state="RFCP_FAIL"
        self._setup = value

    @classmethod
    def getID(self):
        """
            return the ID/NAME string of Mover class used to resolve Mover classs
            name attribute helps to define various movers with the same copy command
        """
        return self.name or self.copy_command

    @classmethod
    def getRucioPath(self, scope, lfn, prefix='rucio'):
        """
            Construct a partial Rucio PFN using the scope and the LFN
        """

        # <prefix=rucio>/<scope>/md5(<scope>:<lfn>)[0:2]/md5(<scope:lfn>)[2:4]/<lfn>

        hash_hex = hashlib.md5('%s:%s' % (scope, lfn)).hexdigest()

        paths = [prefix] + scope.split('.') + [hash_hex[0:2], hash_hex[2:4], lfn]
        paths = filter(None, paths) # remove empty parts to avoid double /-chars
        return '/'.join(paths)

        #scope = os.path.join(*scope.split('.')) # correct scope
        #return os.path.join(prefix, scope, hash_hex[0:2], hash_hex[2:4], lfn)

    def getSURLRucio(self, se, se_path, scope, lfn, job=None):
        """
            Get final destination SURL of file to be moved
        """

        # ANALY/PROD job specific processing ??

        prefix = 'rucio'
        if se_path.rstrip('/').endswith('/' + prefix): # avoid double prefix
            prefix = ''

        surl = se + os.path.join(se_path, self.getRucioPath(scope, lfn, prefix=prefix))

        return surl

    def getSURL(self, se, se_path, scope, lfn, job=None):
        """
            Get final destination SURL of file to be moved
            job instance is passing here for possible JOB specific processing ?? FIX ME LATER
        """

        if '/rucio' in se_path:
            return self.getSURLRucio(se, se_path, scope, lfn)

        raise Exception("getSURLPath(): NOT IMPLEMENTED error: processing of non Rucio transfers is not impelemnted yet")

    def getSetup(self):
        """
            return full setup command to be executed
            Can be customized by different site mover
        """

        return 'source %s' % self.copysetup

    def setup(self):
        """
            Prepare site specific setup initializations
            Should be implemented by different site mover
        """

        # TODO: vertify setup??
        # raise in case of errors

        return True # rcode=0, output=''

    def getRemoteFileChecksum(self, filename):
        """
            get checksum of remote file
            Should be implemented by different site mover
            :return: (checksum, checksum_type)
            :raise: an exception in case of errors
        """

        return None, None

    def getRemoteFileSize(self, filename):
        """
            get size of remote file
            Should be implemented by different site mover
            :return: length of file
            :raise: an exception in case of errors
        """

        return None

    def stageOut(self, source, destination):
        """
            Stage out the source file: do stageout file + verify remote file output
            :return: remote file details: {'checksum': '', 'checksum_type':'', 'filesize':''}
            :raise: PilotException in case of controlled error
        """

        # do stageOutFle
        src_fsize = os.path.getsize(source)

        self.trace_report.update(relativeStart=time.time(), transferStart=time.time())

        dst_checksum, dst_checksum_type = self.stageOutFile(source, destination)

        # verify stageout by checksum
        self.trace_report.update(validateStart=time.time())

        try:
            if not dst_checksum:
                dst_checksum, dst_checksum_type = self.getRemoteFileChecksum(destination)
        except Exception, e:
            self.log("verify StageOut: caught exception while getting remote file checksum: %s .. skipped" % e)

        try:
            if dst_checksum and dst_checksum_type: # verify against source
                src_checksum, src_checksum_type = self.calc_file_checksum(source)

                is_verified = src_checksum and src_checksum_type and dst_checksum == src_checksum and dst_checksum_type == src_checksum_type

                self.log("Local  checksum [%s]: %s" % (src_checksum_type, src_checksum))
                self.log("Remote checksum [%s]: %s" % (dst_checksum_type, dst_checksum))
                self.log("checksum is_verified = %s" % is_verified)

                if not is_verified:
                    error = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % \
                                            (src_checksum_type, os.path.basename(destination), dst_checksum, src_checksum)
                    if src_checksum_type == 'adler32':
                        state = 'AD_MISMATCH'
                        rcode = PilotErrors.ERR_PUTADMISMATCH
                    else:
                        state = 'MD5_MISMATCH'
                        rcode = PilotErrors.ERR_PUTMD5MISMATCH
                    raise PilotException(error, code=rcode, state=state)

                self.log("verifying stageout done. [by checksum]")
                self.trace_report.update(clientState="DONE")
                return {'checksum': dst_checksum, 'checksum_type':dst_checksum_type, 'filesize':src_fsize}

        except PilotException:
            raise
        except Exception, e:
            self.log("verify StageOut: caught exception while doing file checksum verification: %s ..  skipped" % e)
Esempio n. 8
0
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" % ('gfal-rm', e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" %
                 (is_timeout, rcode, output))

        if is_timeout:
            raise PilotException(
                "removeRemoteFile self timed out after %s, timeout=%s, output=%s"
                % (dt, timeout, output),
                code=PilotErrors.ERR_GENERALERROR,
                state='RM_TIMEOUT')

        if rcode:
            raise PilotException("Failed to remove remote file",
                                 code=PilotErrors.ERR_GENERALERROR,
                                 state='RM_FAILED')

    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """
Esempio n. 9
0
    def put_files(self, ddmendpoints, activity,
                  files):  # old function : TO BE DEPRECATED ...
        """
        Copy files to dest SE:
           main control function, it should care about alternative stageout and retry-policy for diffrent ddmenndpoints
        :ddmendpoint: list of DDMEndpoints where the files will be send (base DDMEndpoint SE + alternative SEs??)
        :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint
        :raise: PilotException in case of error
        """

        if not ddmendpoints:
            raise PilotException(
                "Failed to put files: Output ddmendpoint list is not set",
                code=PilotErrors.ERR_NOSTORAGE)
        if not files:
            raise PilotException(
                "Failed to put files: empty file list to be transferred")

        missing_ddms = set(ddmendpoints) - set(self.ddmconf)

        if missing_ddms:
            self.ddmconf.update(self.si.resolveDDMConf(missing_ddms))

        pandaqueue = self.si.getQueueName()  # FIX ME LATER
        prot = self.protocols.setdefault(
            activity,
            self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue])

        # group by ddmendpoint
        ddmprot = {}
        for e in prot:
            ddmprot.setdefault(e['ddm'], []).append(e)

        output = []

        for ddm in ddmendpoints:
            protocols = ddmprot.get(ddm)
            if not protocols:
                self.log(
                    'Failed to resolve protocols data for ddmendpoint=%s and activity=%s.. skipped processing..'
                    % (ddm, activity))
                continue

            success_transfers, failed_transfers = [], []

            try:
                success_transfers, failed_transfers = self.do_put_files(
                    ddm, protocols, files)
                is_success = len(success_transfers) == len(files)
                output.append(
                    (is_success, success_transfers, failed_transfers, None))

                if is_success:
                    # NO additional transfers to another next DDMEndpoint/SE ?? .. fix me later if need
                    break

            #except PilotException, e:
            #    self.log('put_files: caught exception: %s' % e)
            except Exception, e:
                self.log('put_files: caught exception: %s' % e)
                # is_success, success_transfers, failed_transfers, exception
                import traceback
                self.log(traceback.format_exc())
                output.append((False, [], [], e))

            ### TODO: implement proper logic of put-policy: how to handle alternative stage out (processing of next DDMEndpoint)..

            self.log(
                'put_files(): Failed to put files to ddmendpoint=%s .. successfully transferred files=%s/%s, failures=%s: will try next ddmendpoint from the list ..'
                % (ddm, len(success_transfers), len(files),
                   len(failed_transfers)))
Esempio n. 10
0
    def stagein(self):
        """
            :return: (transferred_files, failed_transfers)
        """

        activity = 'pr'

        pandaqueue = self.si.getQueueName()  # FIX ME LATER
        protocols = self.protocols.setdefault(
            activity,
            self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue])
        copytools = self.si.resolvePandaCopytools(pandaqueue,
                                                  activity)[pandaqueue]

        self.log("stage-in: pq.aprotocols=%s, pq.copytools=%s" %
                 (protocols, copytools))

        files = self.job.inData
        self.resolve_replicas(
            files)  # populates also self.ddmconf = self.si.resolveDDMConf([])

        maxinputsize = self.getMaxInputSize()
        totalsize = reduce(lambda x, y: x + y.filesize, files, 0)

        transferred_files, failed_transfers = [], []

        self.log(
            "Found N=%s files to be transferred, total_size=%.3f MB: %s" %
            (len(files), totalsize / 1024. / 1024., [e.lfn for e in files]))

        # process first PQ specific protocols settings
        # then protocols supported by copytools

        # protocol generated from aprotocols is {'copytool':'', 'copysetup':'', 'se':'', 'ddm':''}
        # protocol generated from  copytools is {'copytool':'', 'copysetup', 'scheme':''}

        # build accepted schemes from allowed copytools
        cprotocols = []
        for cp, settings in copytools:
            cprotocols.append({
                'resolve_scheme': True,
                'copytool': cp,
                'copysetup': settings.get('setup')
            })

        protocols = protocols + cprotocols
        if not protocols:
            raise PilotException(
                "Failed to get files: neither aprotocols nor allowed copytools defined for input. check copytools/acopytools/aprotocols schedconfig settings for activity=%s, pandaqueue=%s"
                % (activity, pandaqueue),
                code=PilotErrors.ERR_NOSTORAGE)

        sitemover_objects = {}

        for dat in protocols:

            remain_files = [
                e for e in files
                if e.status not in ['direct_access', 'transferred']
            ]
            if not remain_files:
                self.log(
                    'INFO: all input files have been successfully processed')
                break

            copytool, copysetup = dat.get('copytool'), dat.get('copysetup')

            try:
                sitemover = sitemover_objects.get(copytool)
                if not sitemover:
                    sitemover = getSiteMover(copytool)(
                        copysetup, workDir=self.job.workdir)
                    sitemover_objects.setdefault(copytool, sitemover)

                    sitemover.trace_report = self.trace_report
                    sitemover.ddmconf = self.ddmconf  # self.si.resolveDDMConf([]) # quick workaround  ###
                    sitemover.setup()
                if dat.get('resolve_scheme'):
                    dat['scheme'] = sitemover.schemes
            except Exception, e:
                self.log(
                    'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s'
                    % (e, dat))
                continue

            self.log("Copy command [stage-in]: %s, sitemover=%s" %
                     (copytool, sitemover))
            self.log("Copy setup   [stage-in]: %s" % copysetup)

            self.trace_report.update(protocol=copytool)

            # verify file sizes and available space for stagein
            sitemover.check_availablespace(maxinputsize, remain_files)

            for fdata in remain_files:

                updateFileState(fdata.lfn,
                                self.workDir,
                                self.job.jobId,
                                mode="file_state",
                                state="not_transferred",
                                ftype="input")

                self.log(
                    "[stage-in] Prepare to get_data: protocol=%s, fspec=%s" %
                    (dat, fdata))

                # check if protocol and fdata.ddmendpoint belong to same site
                #
                if dat.get('ddm'):
                    protocol_site = self.ddmconf.get(dat.get('ddm'),
                                                     {}).get('site')
                    replica_site = self.ddmconf.get(fdata.ddmendpoint,
                                                    {}).get('site')

                    if protocol_site != replica_site:
                        self.log(
                            'INFO: cross-sites checks: protocol_site=%s and (fdata.ddmenpoint) replica_site=%s mismatched .. skip file processing for copytool=%s (protocol=%s)'
                            % (protocol_site, replica_site, copytool, dat))
                        continue

                r = sitemover.resolve_replica(fdata, dat)

                # quick stub: propagate changes to FileSpec
                if r.get('surl'):
                    fdata.surl = r[
                        'surl']  # TO BE CLARIFIED if it's still used and need
                if r.get('pfn'):
                    fdata.turl = r['pfn']
                if r.get('ddmendpoint'):
                    fdata.ddmendpoint = r['ddmendpoint']

                self.log(
                    "[stage-in] found replica to be used: ddmendpoint=%s, pfn=%s"
                    % (fdata.ddmendpoint, fdata.turl))

                # check if protocol and found replica belong to same site
                if dat.get('ddm'):
                    protocol_site = self.ddmconf.get(dat.get('ddm'),
                                                     {}).get('site')
                    replica_site = self.ddmconf.get(fdata.ddmendpoint,
                                                    {}).get('site')

                    if protocol_site != replica_site:
                        self.log(
                            'INFO: cross-sites checks: protocol_site=%s and replica_site=%s mismatched .. skip file processing for copytool=%s'
                            % (protocol_site, replica_site, copytool))
                        continue

                # check direct access
                self.log(
                    "fdata.is_directaccess()=%s, job.accessmode=%s, mover.is_directaccess()=%s"
                    % (fdata.is_directaccess(), self.job.accessmode,
                       self.is_directaccess()))

                is_directaccess = self.is_directaccess()
                if self.job.accessmode == 'copy':
                    is_directaccess = False
                elif self.job.accessmode == 'direct':
                    is_directaccess = True
                if fdata.is_directaccess(
                ) and is_directaccess:  # direct access mode, no transfer required
                    fdata.status = 'direct_access'
                    updateFileState(fdata.lfn,
                                    self.workDir,
                                    self.job.jobId,
                                    mode="transfer_mode",
                                    state="direct_access",
                                    ftype="input")

                    self.log(
                        "Direct access mode will be used for lfn=%s .. skip transfer the file"
                        % fdata.lfn)
                    continue

                # apply site-mover custom job-specific checks for stage-in
                try:
                    is_stagein_allowed = sitemover.is_stagein_allowed(
                        fdata, self.job)
                    if not is_stagein_allowed:
                        reason = 'SiteMover does not allowed stage-in operation for the job'
                except PilotException, e:
                    is_stagein_allowed = False
                    reason = e
                except Exception:
                    raise
Esempio n. 11
0
class gfalcopySiteMover(BaseSiteMover):
    """ SiteMover that uses gfal-copy for both get and put """
    name = "gfalcopy"
    copy_command = "gfal-copy"
    checksum_type = "adler32"
    checksum_command = "gfal-sum"
    # rm_command and ls_command : look at the code

    schemes = ['srm', 'gsiftp', 'https',
               'davs']  # list of supported schemes for transfers

    gfal_prop_grid = '-D "SRM PLUGIN:TURL_PROTOCOLS=gsiftp"'
    gfal_prop_dynacloud = ''

    def detectDynafedCloud(self, ddmendpoint):
        """
            Determine whether the storage is a cloud with dynafed frontend.
            It is necessary to distinguish cloud from the grid to treat them in different ways (plug-in properties, checksum)
        """
        ddmConf = self.ddmconf.get(ddmendpoint, {})
        is_mkdir = bool(
            ddmConf.get("is_mkdir"))  # "true" or "false" -> True or False
        webdav_se_flavour = (ddmConf.get("se_flavour") == "WEBDAV")
        isDynafedCloud = is_mkdir and webdav_se_flavour
        self.log(
            "gfalcopy_sitemover:  %s is defined as Dynafed with cloud backend: %s"
            % (ddmendpoint, isDynafedCloud))
        return isDynafedCloud

    def _stagefile(self, cmd, source, destination, filesize, is_stagein):
        """
            Stage the file (stagein or stageout respect to is_stagein value)
            :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        timeout = self.getTimeOut(filesize)

        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" %
                     (self.copy_command, e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" %
                 (is_timeout, rcode, output))

        if is_timeout or rcode:  ## do clean up
            if is_stagein:  # stage-in clean up: check if file was partially transferred
                self.removeLocal(destination)

        if is_timeout:
            raise PilotException(
                "Copy command self timed out after %s, timeout=%s, output=%s" %
                (dt, timeout, output),
                code=PilotErrors.ERR_GETTIMEOUT
                if is_stagein else PilotErrors.ERR_PUTTIMEOUT,
                state='CP_TIMEOUT')

        if rcode:
            self.log(
                'WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s'
                % (is_stagein, cmd, rcode, output.replace("\n", " ")))
            error = self.resolveStageErrorFromOutput(output,
                                                     source,
                                                     is_stagein=is_stagein)
            rcode = error.get('rcode')
            if not rcode:
                rcode = PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED
            state = error.get('state')
            if not state:
                state = 'COPY_FAIL'  #'STAGEIN_FAILED' if is_stagein else 'STAGEOUT_FAILED'

            raise PilotException(error.get('error'), code=rcode, state=state)

        # extract filesize and checksum values from output
        # check stage-out: not used at the moment

        return None, None
Esempio n. 12
0
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" % ('gfal-rm', e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0

        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" %
                 (is_timeout, rcode, output))

        if is_timeout:
            raise PilotException(
                "removeRemoteFile self timed out after %s, timeout=%s, output=%s"
                % (dt, timeout, output),
                code=PilotErrors.ERR_GENERALERROR,
                state='RM_TIMEOUT')

        if rcode:
            raise PilotException("Failed to remove remote file",
                                 code=PilotErrors.ERR_GENERALERROR,
                                 state='RM_FAILED')

    def stageOutFile(self, source, destination, fspec):
        """
            Stage out the file
            Should be implemented by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """
Esempio n. 13
0
    def getSURL(self,
                se,
                se_path,
                scope,
                lfn,
                job=None,
                pathConvention=None,
                taskId=None,
                ddmEndpoint=None):
        """
            Get final destination SURL of file to be moved
            job instance is passing here for possible JOB specific processing ?? FIX ME LATER
        """

        # quick fix: this actually should be reported back from Rucio upload in stageOut()
        # surl is currently (required?) being reported back to Panda in XML

        tolog("getSURL: pathConvention: %s, taskId: %s, ddmEndpoint: %s" %
              (pathConvention, taskId, ddmEndpoint))
        if pathConvention and pathConvention >= 1000:
            scope = 'transient'
            pathConvention = pathConvention - 1000
            if pathConvention == 0:
                pathConvention = None

        if not ddmEndpoint or self.isDeterministic(ddmEndpoint):
            return self.getSURLRucio(se, se_path, scope, lfn)

        ddmType = self.ddmconf.get(ddmEndpoint, {}).get('type')
        if not (ddmType and ddmType in ['OS_ES']):
            return self.getSURLRucio(se, se_path, scope, lfn)
        else:
            if pathConvention is None:
                surl = se + os.path.join(se_path, lfn)
            else:
                # If pathConvention is not None, it means multiple buckets are used.
                # If pathConvention is bigger than or equal 100:
                #     The bucket name is '<atlas-eventservice>-<taskid>-<pathConventionNumber>'
                #     Real pathConvention is pathConvention - 100
                # Else:
                #     The bucket name is '<atlas-eventservice>-<pathConventionNumber>'
                #     Real pathConvention is pathConvention.

                while se_path.endswith("/"):
                    se_path = se_path[:-1]

                if pathConvention >= 100:
                    pathConvention = pathConvention - 100
                    if taskId is None and job is None:
                        raise PilotException(
                            "getSURL with pathConvention(%s) failed becuase both taskId(%s) and job(%s) are None"
                            % (pathConvention, taskId, job),
                            code=PilotErrors.ERR_FAILEDLFCGETREPS)
                    if taskId is None:
                        taskId = job.taskID
                    se_path = "%s-%s-%s" % (se_path, taskId, pathConvention)
                else:
                    se_path = "%s-%s" % (se_path, pathConvention)

                surl = se + os.path.join(se_path, lfn)
        return surl
Esempio n. 14
0
class pandaproxySiteMover(BaseSiteMover):
    """ sitemover to upload files with panda proxy to object store"""
    version = '20170523.001'

    name = 'pandaproxy'
    schemes = ['pandaproxy', 's3'] # list of supported schemes for transfers

    require_replicas = False       ## quick hack to avoid query Rucio to resolve input replicas

    def __init__(self, *args, **kwargs):
        super(pandaproxySiteMover, self).__init__(*args, **kwargs)
        self.log('pandaproxy mover version: %s' % self.version)
        #self.os_endpoint = None
        #self.os_bucket_endpoint = None
        self.osPublicKey = 'CERN_ObjectStoreKey.pub'
        self.osPrivateKey = 'CERN_ObjectStoreKey'
        self.pandaProxyURL = 'http://aipanda084.cern.ch:25064/proxy/panda'
        #self.pandaProxyURL = 'https://aipanda084.cern.ch:25128/proxy/panda'
    
    def unproxify(func):
        """ decorator to unproxify https and http connections """
        def wrapper(*args, **kwargs):
            http_proxy = os.environ.get("http_proxy")
            https_proxy = os.environ.get("https_proxy")
            if http_proxy:
                del os.environ['http_proxy']
            if https_proxy:
                del os.environ['https_proxy']
            funcResult = func(*args, **kwargs)
            if http_proxy:
                os.environ['http_proxy'] = http_proxy
            if https_proxy:
                os.environ['https_proxy'] = https_proxy
            return funcResult
        return wrapper
  
    def _stagefile(self, cmd, source, destination, filesize, is_stagein):
        """
            Stage the file (stagein or stageout respect to is_stagein value)
            :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error

            this method was copied from lsm_sitemover (= the same in gfalcopy_sitemover)
        """
        
        timeout = self.getTimeOut(filesize)

        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" % (self.copy_command, e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" % (is_timeout, rcode, output))

        if is_stagein: # stage-in clean up: check if file was partially transferred
            if is_timeout or rcode: ## do clean up
                self.removeLocal(destination)

        if is_timeout:
            raise PilotException("Copy command self timed out after %s, timeout=%s, output=%s" % (dt, timeout, output), code=PilotErrors.ERR_GETTIMEOUT if is_stagein else PilotErrors.ERR_PUTTIMEOUT, state='CP_TIMEOUT')

        if rcode:
            self.log('WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s' % (is_stagein, cmd, rcode, output.replace("\n"," ")))
            error = self.resolveStageErrorFromOutput(output, source, is_stagein=is_stagein)
            rcode = error.get('rcode')
            if not rcode:
                rcode = PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED
            state = error.get('state')
            if not state:
                state = 'COPY_FAIL' #'STAGEIN_FAILED' if is_stagein else 'STAGEOUT_FAILED'

            raise PilotException(error.get('error'), code=rcode, state=state)

        # extract filesize and checksum values from output
        # check stage-out: not used at the moment

        return None, None
Esempio n. 15
0
class stormSiteMover(BaseSiteMover):
    """
    SiteMover that queries HTTP etag for physcial storage location, then
    symlinks for stage in, and copies for stage out
    """

    name = 'storm'
    schemes = ['file', 'srm', 'root', 'https', 'gsiftp']
    version = '20170227.001'

    require_replicas = True

    def __init__(self, *args, **kwargs):
        super(stormSiteMover, self).__init__(*args, **kwargs)
        self.log('storm sitemover version: %s' % self.version)

    def check_availablespace(self, maxinputsize, files):
        """
            Verify that enough local space is available to stage in and run the job
            :raise: PilotException in case of not enough space
            Not applicable for given Mover
        """
        pass

    def stageIn(self, source, destination, fspec):
        """
        Query HTTP for etag, then symlink to the pilot working directory.

        :param source:      original file location
        :param destination: where to create the link
        :param fspec:       dictionary containing destination replicas, scope, lfn
        :return:            destination file details (checksumtype, checksum, size)
        """

        self.log('source: %s' % str(source))
        self.log('destination: %s' % str(destination))
        self.log('fspec: %s' % str(fspec))
        self.log('fspec.scope: %s' % str(fspec.scope))
        self.log('fspec.lfn: %s' % str(fspec.lfn))
        self.log('fspec.ddmendpoint: %s' % str(fspec.ddmendpoint))

        # figure out the HTTP SURL from Rucio

        from rucio.client import ReplicaClient

        rc = ReplicaClient()
        http_surl_reps = [r for r in rc.list_replicas(dids=[{'scope': fspec.scope,
                                                             'name': fspec.lfn}],
                                                      schemes=['davs'],
                                                      rse_expression=fspec.ddmendpoint)]
        self.log('http_surl_reps: %s' % http_surl_reps)

        http_surl = http_surl_reps[0]['rses'][fspec.ddmendpoint][0].rsplit('_-')[0]
        self.log('http_surl: %s' % http_surl)

        # retrieve the TURL from the webdav etag
        cmd = 'davix-http --capath /cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase/etc/grid-security-emi/certificates --cert $X509_USER_PROXY -X PROPFIND %s' % http_surl
        self.log('ETAG retrieval: %s' % cmd)
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=10)
        except Exception, e:
            self.log('FATAL: could not retrieve STORM WebDAV ETag: %s' % e)
            raise PilotException('Could not retrieve STORM WebDAV ETag: %s' % e)
        p_output = minidom.parseString(output)

        # we need to strip off the quotation marks and the <timestamp> from the etag
        # but since we can have multiple underscores, we have to rely on the uniqueness
        # of the full LFN to make the split
        target = p_output.getElementsByTagName('d:getetag')[0].childNodes[0].nodeValue.replace('"', '')
        self.log('Symlink before: %s' % target)
        target = target.split(fspec.lfn)[0]+fspec.lfn
        self.log('Symlink after : %s' % target)

        # make the symlink
        try:
            self.log('Making symlink from %s to %s' % (target, destination))
            os.symlink(target, destination)
        except Exception, e:
            self.log('FATAL: could not create symlink: %s' % e)
            raise PilotException('Could not create symlink: %s' % e)
Esempio n. 16
0
    def resolve_replica(self, fspec, protocol, ddm=None):
        """
            :fspec: FileSpec object
            :protocol: dict('se':'', 'scheme':'str' or list)

            Resolve input replica either by protocol scheme
            or manually construct pfn according to protocol.se value (full local path is matched respect to ddm_se default protocol)
            :return: input file replica details: {'surl':'', 'ddmendpoint':'', 'pfn':''}
            :raise: PilotException in case of controlled error
        """

        # resolve proper surl (main srm replica) and find related replica

        if protocol.get('se'):
            scheme = str(protocol.get('se')).split(':', 1)[0]
        else:
            scheme = protocol.get('scheme')

        if not scheme:
            raise Exception(
                'Failed to resolve copytool scheme to be used, se field is corrupted?: protocol=%s'
                % protocol)

        if isinstance(scheme, str):
            scheme = [scheme]

        replica = None  # find first matched to protocol spec replica
        surl = None

        if protocol.get(
                'se'
        ):  # custom settings: match Rucio replica by default protocol se (quick stub until Rucio protocols are proper populated)
            for ddmendpoint, replicas, ddm_se, ddm_path in fspec.replicas:
                if not replicas:  # ignore ddms with no replicas
                    continue
                surl = replicas[0]  # assume srm protocol is first entry
                self.log(
                    "[stage-in] surl (srm replica) from Rucio: pfn=%s, ddmendpoint=%s, ddm.se=%s, ddm.se_path=%s"
                    % (surl, ddmendpoint, ddm_se, ddm_path))

                for r in replicas:
                    if r.startswith(
                            ddm_se):  # manually form pfn based on protocol.se
                        r_filename = r.replace(ddm_se, '', 1).replace(
                            ddm_path, '', 1)  # resolve replica filename
                        # quick hack: if hosted replica ddmendpoint and input protocol ddmendpoint mismatched => consider replica ddmendpoint.path
                        r_path = protocol.get('path')
                        if ddmendpoint != protocol.get('ddm'):
                            self.log(
                                "[stage-in] ignore protocol.path=%s since protocol.ddm=%s differs from found replica.ddm=%s ... will use ddm.path=%s to form TURL"
                                % (protocol.get('path'), protocol.get('ddm'),
                                   ddmendpoint, ddm_path))
                            r_path = ddm_path
                        replica = protocol.get('se') + r_path
                        if replica and r_filename and '/' not in (
                                replica[-1] + r_filename[0]):
                            replica += '/'
                        replica += r_filename
                        self.log(
                            "[stage-in] ignore_rucio_replicas since protocol.se is explicitly passed, protocol.se=%s, protocol.path=%s: found replica=%s matched ddm.se=%s, ddm.path=%s .. will use TURL=%s"
                            % (protocol.get('se'), protocol.get('path'), surl,
                               ddm_se, ddm_path, replica))
                        break

                if replica:
                    break

        if not replica:  # resolve replica from Rucio: use exact pfn from Rucio replicas
            for sval in scheme:
                for ddmendpoint, replicas, ddm_se, ddm_path in fspec.replicas:
                    if not replicas:  # ignore ddms with no replicas
                        continue
                    surl = replicas[0]  # assume srm protocol is first entry
                    self.log(
                        "[stage-in] surl (srm replica) from Rucio: pfn=%s, ddmendpoint=%s, ddm.se=%s, ddm.se_path=%s"
                        % (surl, ddmendpoint, ddm_se, ddm_path))
                    for r in replicas:
                        if r.startswith("%s://" % sval):
                            replica = r
                            break
                    if replica:
                        break
                if replica:
                    break

        if not replica:  # replica not found
            error = 'Failed to find replica for input file, protocol=%s, fspec=%s' % (
                protocol, fspec)
            self.log("resolve_replica: %s" % error)
            raise PilotException(error, code=PilotErrors.ERR_REPNOTFOUND)

        return {'surl': surl, 'ddmendpoint': ddmendpoint, 'pfn': replica}
Esempio n. 17
0
                            fdata.ddmendpoint = result.get('ddmendpoint')
                        if result.get('surl'):
                            fdata.surl = result.get('surl')
                        if result.get('pfn'):
                            fdata.turl = result.get('pfn')

                        #self.trace_report.update(url=fdata.surl) ###
                        self.trace_report.update(url=fdata.turl)  ###

                        break  # transferred successfully
                    except PilotException, e:
                        result = e
                        self.log(traceback.format_exc())
                    except Exception, e:
                        result = PilotException(
                            "stageIn failed with error=%s" % e,
                            code=PilotErrors.ERR_STAGEINFAILED)
                        self.log(traceback.format_exc())

                    self.log(
                        'WARNING: Error in copying file (attempt %s/%s): %s' %
                        (_attempt, self.stageinretry, result))

                if not isinstance(result,
                                  Exception):  # transferred successfully

                    # finalize and send trace report
                    self.trace_report.update(clientState='DONE',
                                             stateReason='OK',
                                             timeEnd=time.time())
                    self.sendTrace(self.trace_report)
Esempio n. 18
0
class xrdcpSiteMover(BaseSiteMover):
    """ SiteMover that uses xrdcp for both get and put """

    #name = "xrdcp"
    copy_command = "xrdcp"
    checksum_type = "adler32"
    checksum_command = "xrdadler32"

    schemes = ['root']  # list of supported schemes for transfers

    def __init__(self, *args, **kwargs):

        super(xrdcpSiteMover, self).__init__(*args, **kwargs)

        self.coption = self._resolve_checksum_option()

    def _resolve_checksum_option(self):

        cmd = "%s -h" % self.copy_command
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log(
            "Execute command (%s) to decide which option should be used to calc file checksum.."
            % cmd)

        c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
        output = c.communicate()[0]

        self.log("return code: %s" % c.returncode)

        coption = ""

        if c.returncode:
            self.log('FAILED to execute command=%s: %s' % (cmd, output))
        else:
            if "--cksum" in output:
                coption = "--cksum %s:print" % self.checksum_type
            elif "-adler" in output and self.checksum_type == 'adler32':
                coption = "-adler"
            elif "-md5" in output and self.checksum_type == 'md5':
                coption = "-md5"

        if coption:
            self.log("Use %s option to get the checksum" % coption)
        else:
            self.log(
                "Cannot find neither -adler nor --cksum. will not use checksum .. command output=%s"
                % output)

        return coption

    def _stagefile(self, source, destination, filesize, is_stagein):
        """
            Stage the file
            mode is stagein or stageout
            :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        if self.checksum_type not in ['adler32']:  # exclude md5
            raise PilotException(
                "Failed to stage file: internal error: unsupported checksum_type=%s .. "
                % self.checksum_type,
                code=PilotErrors.ERR_STAGEINFAILED
                if is_stagein else PilotErrors.ERR_STAGEOUTFAILED,
                state='BAD_CSUMTYPE')

        cmd = '%s -np -f %s %s %s' % (self.copy_command, self.coption, source,
                                      destination)
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        timeout = self.getTimeOut(filesize)
        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: xrdcp threw an exception: %s" % e)
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode = %s, output = %s" %
                 (is_timeout, rcode, output.replace("\n", " ")))

        if is_timeout:
            raise PilotException(
                "Copy command self timed out after %s, timeout=%s, output=%s" %
                (dt, self.timeout, output),
                code=PilotErrors.ERR_GETTIMEOUT
                if is_stagein else PilotErrors.ERR_PUTTIMEOUT,
                state='CP_TIMEOUT')

        if rcode:
            self.log(
                'WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s'
                % (is_stagein, cmd, rcode, output.replace("\n", " ")))
            error = self.resolveStageErrorFromOutput(output,
                                                     source,
                                                     is_stagein=is_stagein)

            if is_stagein:  # do clean up: check if file was partially transferred
                self.removeLocal(destination)

            #if rcode != PilotErrors.ERR_FILEEXIST:
            #    # check if file was partially transferred, if so, remove it
            #    #_ec, removeOutput = self.removeRemoteFile(destination)
            #    #if not _ec :
            #    #    self.log("Failed to remove file %s" % destination)
            #    #return rcode, outputRet
            rcode = error.get('rcode')
            if not rcode:
                rcode = PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED
            state = error.get('state')
            if not state:
                state = 'COPY_FAIL'  #'STAGEIN_FAILED' if is_stagein else 'STAGEOUT_FAILED'

            raise PilotException(error.get('error'), code=rcode, state=state)

        # extract filesize and checksum values from output

        checksum, checksum_type = self.getRemoteFileChecksumFromOutput(output)

        return checksum, checksum_type
Esempio n. 19
0
    def stageout(self, activity, files):
        """
            Copy files to dest SE:
            main control function, it should care about alternative stageout and retry-policy for diffrent ddmendpoints
        :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint
        :return: (transferred_files, failed_transfers)
        :raise: PilotException in case of error
        """

        if not files:
            raise PilotException(
                "Failed to put files: empty file list to be transferred")

        pandaqueue = self.si.getQueueName()  # FIX ME LATER
        protocols = self.protocols.setdefault(
            activity,
            self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue])
        copytools = self.si.resolvePandaCopytools(pandaqueue,
                                                  activity)[pandaqueue]

        self.log(
            "Mover.stageout() [new implementation] started for activity=%s, files=%s, protocols=%s, copytools=%s"
            % (activity, files, protocols, copytools))

        # check if file exists before actual processing
        # populate filesize if need

        for fspec in files:
            pfn = os.path.join(self.job.workdir, fspec.lfn)
            if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK):
                error = "Erron: input pfn file is not exist: %s" % pfn
                self.log(error)
                raise PilotException(error,
                                     code=PilotErrors.ERR_MISSINGOUTPUTFILE,
                                     state="FILE_INFO_FAIL")
            fspec.filesize = os.path.getsize(pfn)

        totalsize = reduce(lambda x, y: x + y.filesize, files, 0)

        transferred_files, failed_transfers = [], []

        self.log(
            "Found N=%s files to be transferred, total_size=%.3f MB: %s" %
            (len(files), totalsize / 1024. / 1024., [e.lfn for e in files]))

        # first resolve protocol settings from PQ specific aprotocols settings
        # then resolve settings from default ddm.protocols supported by copytools

        # group protocols, files by ddmendpoint
        ddmprotocols, ddmfiles = {}, {}
        for e in files:
            ddmfiles.setdefault(e.ddmendpoint, []).append(e)

        # load DDM conf/protocols
        self.ddmconf.update(self.si.resolveDDMConf(ddmfiles.keys()))

        for e in protocols:
            if e['ddm'] not in ddmfiles:  # skip not affected protocols settings
                continue
            e['copytools'] = [{
                'copytool': e['copytool'],
                'copysetup': e['copysetup']
            }]
            ddmprotocols.setdefault(e['ddm'], []).append(e)

        # generate default protocols from copytools/schemes and ddmconf
        unknown_ddms = set(ddmfiles) - set(ddmprotocols)
        for ddmendpoint in unknown_ddms:
            dd = self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {})
            dat = dd.get(activity, []) or dd.get('w', [])
            dprotocols = [
                dict(se=e[0], path=e[2], resolve_scheme=True)
                for e in sorted(dat, key=lambda x: x[1])
            ]
            ddmprotocols.setdefault(ddmendpoint, dprotocols)

        unknown_ddms = set(ddmfiles) - set(ddmprotocols)
        if unknown_ddms:
            raise PilotException(
                "Failed to put files: no protocols defined for output ddmendpoints=%s .. check aprotocols schedconfig settings for activity=%s or default ddm.aprotocols entries"
                % (unknown_ddms, activity),
                code=PilotErrors.ERR_NOSTORAGE)

        self.log(
            "[stage-out] [%s] filtered protocols to be used to transfer files: protocols=%s"
            % (activity, ddmprotocols))

        # get SURL endpoint for Panda callback registration
        # resolve from special protocol activity='SE' or fallback to activity='a', then to 'r'

        surl_protocols, no_surl_ddms = {}, set()

        for fspec in files:
            if not fspec.surl:  # initialize only if not already set
                d = self.ddmconf.get(fspec.ddmendpoint,
                                     {}).get('aprotocols', {})
                xprot = d.get('SE', [])
                if not xprot:
                    xprot = [
                        e for e in d.get('a', d.get('r', []))
                        if e[0] and e[0].startswith('srm')
                    ]
                surl_prot = [
                    dict(se=e[0], path=e[2])
                    for e in sorted(xprot, key=lambda x: x[1])
                ]
                if surl_prot:
                    surl_protocols.setdefault(fspec.ddmendpoint, surl_prot[0])
                else:
                    no_surl_ddms.add(fspec.ddmendpoint)

        if no_surl_ddms:  # failed to resolve SURLs
            self.log(
                'FAILED to resolve default SURL path for ddmendpoints=%s' %
                list(no_surl_ddms))
            raise PilotException(
                "Failed to put files: no SE/SURL protocols defined for output ddmendpoints=%s .. check ddmendpoints aprotocols settings for activity=SE/a/r"
                % list(no_surl_ddms),
                code=PilotErrors.ERR_NOSTORAGE)

        sitemover_objects = {}

        # try to iterate over protocol of given ddmendpoint until successfull transfer
        for ddmendpoint, iprotocols in ddmprotocols.iteritems():

            for dat in iprotocols:

                remain_files = [
                    e for e in ddmfiles.get(ddmendpoint)
                    if e.status not in ['transferred']
                ]
                if not remain_files:
                    self.log(
                        'INFO: all files to be transfered to ddm=%s have been successfully processed for activity=%s ..'
                        % (ddmendpoint, activity))
                    # stop checking other protocols of ddmendpoint
                    break

                if not 'copytools' in dat:
                    # use allowed copytools
                    cdat = []
                    for cp, settings in copytools:
                        cdat.append({
                            'copytool': cp,
                            'copysetup': settings.get('setup')
                        })
                    dat['copytools'] = cdat

                if not dat['copytools']:
                    msg = 'FAILED to resolve final copytools settings for ddmendpoint=%s, please check schedconf.copytools settings: copytools=%s, iprotocols=' % list(
                        ddmendpoint, copytools, iprotocols)
                    self.log(msg)
                    raise PilotException(msg, code=PilotErrors.ERR_NOSTORAGE)

                for cpsettings in dat.get('copytools', []):
                    copytool, copysetup = cpsettings.get(
                        'copytool'), cpsettings.get('copysetup')

                    try:
                        sitemover = sitemover_objects.get(copytool)
                        if not sitemover:
                            sitemover = getSiteMover(copytool)(
                                copysetup, workDir=self.job.workdir)
                            sitemover_objects.setdefault(copytool, sitemover)

                            sitemover.trace_report = self.trace_report
                            sitemover.protocol = dat  # ##
                            sitemover.ddmconf = self.ddmconf  # quick workaround  ###
                            sitemover.setup()
                        if dat.get('resolve_scheme'):
                            dat['scheme'] = sitemover.schemes
                    except Exception, e:
                        self.log(
                            'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s'
                            % (e, dat))
                        continue

                    if dat.get(
                            'scheme'
                    ):  # filter protocols by accepted scheme from copytool
                        should_skip = True
                        for scheme in dat.get('scheme'):
                            if dat['se'].startswith(scheme):
                                should_skip = False
                                break
                        if should_skip:
                            self.log(
                                "[stage-out] protocol=%s of ddmendpoint=%s is skipped since copytool=%s does not support it, accepted schemes=%s"
                                % (dat['se'], ddmendpoint, copytool,
                                   dat['scheme']))

                            continue

                    self.log("Copy command [stage-out]: %s, sitemover=%s" %
                             (copytool, sitemover))
                    self.log("Copy setup   [stage-out]: %s" % copysetup)

                    self.trace_report.update(protocol=copytool,
                                             localSite=ddmendpoint,
                                             remoteSite=ddmendpoint)

                    # validate se value?
                    se, se_path = dat.get('se', ''), dat.get('path', '')

                    for fdata in remain_files:

                        if not fdata.surl:
                            fdata.surl = sitemover.getSURL(
                                surl_protocols[fdata.ddmendpoint].get('se'),
                                surl_protocols[fdata.ddmendpoint].get('path'),
                                fdata.scope, fdata.lfn, self.job
                            )  # job is passing here for possible JOB specific processing

                        updateFileState(fdata.lfn,
                                        self.workDir,
                                        self.job.jobId,
                                        mode="file_state",
                                        state="not_transferred",
                                        ftype="output")

                        fdata.turl = sitemover.getSURL(
                            se, se_path, fdata.scope, fdata.lfn, self.job
                        )  # job is passing here for possible JOB specific processing

                        self.log(
                            "[stage-out] resolved SURL=%s to be used for lfn=%s, ddmendpoint=%s"
                            % (fdata.surl, fdata.lfn, fdata.ddmendpoint))

                        self.log(
                            "[stage-out] resolved TURL=%s to be used for lfn=%s, ddmendpoint=%s"
                            % (fdata.turl, fdata.lfn, fdata.ddmendpoint))

                        self.log(
                            "[stage-out] Prepare to put_data: ddmendpoint=%s, protocol=%s, fspec=%s"
                            % (ddmendpoint, dat, fdata))

                        self.trace_report.update(catStart=time.time(),
                                                 filename=fdata.lfn,
                                                 guid=fdata.guid.replace(
                                                     '-', ''))
                        self.trace_report.update(
                            scope=fdata.scope,
                            dataset=fdata.destinationDblock,
                            url=fdata.turl)

                        self.log(
                            "[stage-out] Preparing copy for lfn=%s using copytool=%s: mover=%s"
                            % (fdata.lfn, copytool, sitemover))
                        #dumpFileStates(self.workDir, self.job.jobId, ftype="output")

                        # loop over multple stage-out attempts
                        for _attempt in xrange(1, self.stageoutretry + 1):

                            if _attempt > 1:  # if not first stage-out attempt, take a nap before next attempt
                                self.log(
                                    " -- Waiting %s seconds before next stage-out attempt for file=%s --"
                                    % (self.stageout_sleeptime, fdata.lfn))
                                time.sleep(self.stageout_sleeptime)

                            self.log("Put attempt %s/%s for filename=%s" %
                                     (_attempt, self.stageoutretry, fdata.lfn))

                            try:
                                result = sitemover.put_data(fdata)
                                fdata.status = 'transferred'  # mark as successful
                                if result.get('surl'):
                                    fdata.surl = result.get('surl')
                                #if result.get('pfn'):
                                #    fdata.turl = result.get('pfn')

                                #self.trace_report.update(url=fdata.surl) ###
                                self.trace_report.update(url=fdata.turl)  ###

                                # finalize and send trace report
                                self.trace_report.update(clientState='DONE',
                                                         stateReason='OK',
                                                         timeEnd=time.time())
                                self.sendTrace(self.trace_report)

                                updateFileState(fdata.lfn,
                                                self.workDir,
                                                self.job.jobId,
                                                mode="file_state",
                                                state="transferred",
                                                ftype="output")
                                dumpFileStates(self.workDir,
                                               self.job.jobId,
                                               ftype="output")

                                self.updateSURLDictionary(
                                    fdata.guid, fdata.surl, self.workDir, self.
                                    job.jobId)  # FIXME LATER: isolate later

                                fdat = result.copy()
                                #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl)
                                transferred_files.append(fdat)

                                break  # transferred successfully
                            except PilotException, e:
                                result = e
                                self.log(traceback.format_exc())
                            except Exception, e:
                                result = PilotException(
                                    "stageOut failed with error=%s" % e,
                                    code=PilotErrors.ERR_STAGEOUTFAILED)
                                self.log(traceback.format_exc())

                            self.log(
                                'WARNING: Error in copying file (attempt %s/%s): %s'
                                % (_attempt, self.stageoutretry, result))

                        if isinstance(result, Exception):  # failure transfer
                            failed_transfers.append(result)
Esempio n. 20
0
                self.log("Remote checksum [%s]: %s  (%s)" %
                         (src_checksum_type, src_checksum, source))
                self.log("Local  checksum [%s]: %s  (%s)" %
                         (dst_checksum_type, dst_checksum, destination))
                self.log("checksum is_verified = %s" % is_verified)

                if not is_verified:
                    error = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % \
                                            (src_checksum_type, os.path.basename(destination), dst_checksum, src_checksum)
                    if src_checksum_type == 'adler32':
                        state = 'AD_MISMATCH'
                        rcode = PilotErrors.ERR_GETADMISMATCH
                    else:
                        state = 'MD5_MISMATCH'
                        rcode = PilotErrors.ERR_GETMD5MISMATCH
                    raise PilotException(error, code=rcode, state=state)

                self.log("verifying stagein done. [by checksum] [%s]" % source)
                self.trace_report.update(clientState="DONE")
                return {
                    'checksum': dst_checksum,
                    'checksum_type': dst_checksum_type,
                    'filesize': dst_fsize
                }

        except PilotException:
            raise
        except Exception, e:
            self.log(
                "verify StageIn: caught exception while doing file checksum verification: %s ..  skipped"
                % e)
Esempio n. 21
0
    def do_put_files(self, ddmendpoint, protocols,
                     files):  # old function : TO BE DEPRECATED ...
        """
        Copy files to dest SE
        :ddmendpoint: DDMEndpoint name used to store files
        :return: (list of transferred_files details, list of failed_transfers details)
        :raise: PilotException in case of error
        """

        self.log(
            '[deprecated do_put_files()]Prepare to copy files=%s to ddmendpoint=%s using protocols data=%s'
            % (files, ddmendpoint, protocols))
        self.log("[deprecated do_put_files()]Number of stage-out tries: %s" %
                 self.stageoutretry)

        # get SURL for Panda calback registration
        # resolve from special protocol activity=SE # fix me later to proper name of activitiy=SURL (panda SURL, at the moment only 2-letter name is allowed on AGIS side)
        # if SE is not found, try to fallback to a
        surl_prot = [
            dict(se=e[0], path=e[2]) for e in
            sorted(self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get(
                'SE',
                self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get(
                    'a', [])),
                   key=lambda x: x[1])
        ]

        if not surl_prot:
            self.log('FAILED to resolve default SURL path for ddmendpoint=%s' %
                     ddmendpoint)
            return [], []
        surl_prot = surl_prot[0]  # take first
        self.log("[do_put_files] SURL protocol to be used: %s" % surl_prot)

        self.trace_report.update(localSite=ddmendpoint, remoteSite=ddmendpoint)

        transferred_files, failed_transfers = [], []

        for dat in protocols:

            copytool, copysetup = dat.get('copytool'), dat.get('copysetup')

            try:
                sitemover = getSiteMover(copytool)(copysetup,
                                                   workDir=self.job.workdir)
                sitemover.trace_report = self.trace_report
                sitemover.protocol = dat  # ##
                sitemover.ddmconf = self.ddmconf  # quick workaround  ###
                sitemover.setup()
            except Exception, e:
                self.log(
                    '[do_put_files] WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s'
                    % (e, dat))
                continue

            self.log("[do_put_files] Copy command: %s, sitemover=%s" %
                     (copytool, sitemover))
            self.log("[do_put_files] Copy setup: %s" % copysetup)

            self.trace_report.update(protocol=copytool)

            se, se_path = dat.get('se', ''), dat.get('path', '')

            self.log("[do_put_files] Found N=%s files to be transferred: %s" %
                     (len(files), [e.get('pfn') for e in files]))

            for fdata in files:
                scope, lfn, pfn = fdata.get(
                    'scope', ''), fdata.get('lfn'), fdata.get('pfn')
                guid = fdata.get('guid', '')

                surl = sitemover.getSURL(
                    surl_prot.get('se'), surl_prot.get('path'), scope, lfn,
                    self.job
                )  # job is passing here for possible JOB specific processing
                turl = sitemover.getSURL(
                    se, se_path, scope, lfn, self.job
                )  # job is passing here for possible JOB specific processing

                self.trace_report.update(scope=scope,
                                         dataset=fdata.get('dsname_report'),
                                         url=surl)
                self.trace_report.update(catStart=time.time(),
                                         filename=lfn,
                                         guid=guid.replace('-', ''))

                self.log(
                    "[do_put_files] Preparing copy for pfn=%s to ddmendpoint=%s using copytool=%s: mover=%s"
                    % (pfn, ddmendpoint, copytool, sitemover))
                self.log("[do_put_files] lfn=%s: SURL=%s" % (lfn, surl))
                self.log("[do_put_files] TURL=%s" % turl)

                if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK):
                    error = "Erron: input pfn file is not exist: %s" % pfn
                    self.log(error)
                    raise PilotException(
                        error,
                        code=PilotErrors.ERR_MISSINGOUTPUTFILE,
                        state="FILE_INFO_FAIL")

                filename = os.path.basename(pfn)

                # update the current file state
                updateFileState(filename,
                                self.workDir,
                                self.job.jobId,
                                mode="file_state",
                                state="not_transferred")
                dumpFileStates(self.workDir, self.job.jobId)

                # loop over multple stage-out attempts
                for _attempt in xrange(1, self.stageoutretry + 1):

                    if _attempt > 1:  # if not first stage-out attempt, take a nap before next attempt
                        self.log(
                            " -- Waiting %d seconds before next stage-out attempt for file=%s --"
                            % (self.stageout_sleeptime, filename))
                        time.sleep(self.stageout_sleeptime)

                    self.log(
                        "[do_put_files] Put attempt %d/%d for filename=%s" %
                        (_attempt, self.stageoutretry, filename))

                    try:
                        # quick work around
                        from Job import FileSpec
                        stub_fspec = FileSpec(ddmendpoint=ddmendpoint,
                                              guid=guid,
                                              scope=scope,
                                              lfn=lfn)
                        result = sitemover.stageOut(pfn, turl, stub_fspec)
                        break  # transferred successfully
                    except PilotException, e:
                        result = e
                        self.log(traceback.format_exc())

                    except Exception, e:
                        self.log(traceback.format_exc())
                        result = PilotException(
                            "stageOut failed with error=%s" % e,
                            code=PilotErrors.ERR_STAGEOUTFAILED)

                    self.log(
                        'WARNING [do_put_files]: Error in copying file (attempt %s): %s'
                        % (_attempt, result))
Esempio n. 22
0
    def resolve_replica(self, fspec, protocol):
        """
            :fspec: FileSpec object
            :protocol: dict('se':'', 'scheme':'str' or list)

            Resolve input replica either by protocol scheme
            or manually construct pfn according to protocol.se value (full local path is matched respect to ddm_se default protocol)
            :return: input file replica details: {'surl':'', 'ddmendpoint':'', 'pfn':''}
            :raise: PilotException in case of controlled error
        """

        # resolve proper surl (main srm replica) and find related replica

        if protocol.get('se'):
            scheme = str(protocol.get('se')).split(':', 1)[0]
        else:
            scheme = protocol.get('scheme')

        if not scheme:
            raise Exception(
                'Failed to resolve copytool scheme to be used, se field is corrupted?: protocol=%s'
                % protocol)

        if isinstance(scheme, str):
            scheme = [scheme]

        replica = None  # find first matched to protocol spec replica
        surl = None

        for ddmendpoint, replicas, ddm_se in fspec.replicas:
            if not replicas:
                continue
            surl = replicas[0]  # assume srm protocol is first entry
            self.log(
                "[stage-in] surl (srm replica) from Rucio: pfn=%s, ddmendpoint=%s, ddm.se=%s"
                % (surl, ddmendpoint, ddm_se))

            for r in replicas:
                # match Rucio replica by default protocol se (quick stub until Rucio protocols are proper populated)
                if protocol.get('se') and r.startswith(
                        ddm_se):  # manually form pfn based on protocol.se
                    replica = protocol.get('se') + r.replace(ddm_se, '')
                    self.log(
                        "[stage-in] ignore_rucio_replicas since protocol.se is explicitly passed, protocol.se=%s: found replica=%s matched ddm.se=%s .. will use TURL=%s"
                        % (protocol.get('se'), surl, ddm_se, replica))
                    break
                # use exact pfn from Rucio replicas
                if not replica:
                    for sval in scheme:
                        if r.startswith("%s://" % sval):
                            replica = r
                            break
            if replica:
                break

        if not replica:  # replica not found
            error = 'Failed to find replica for input file, protocol=%s, fspec=%s' % (
                protocol, fspec)
            raise PilotException(error, code=PilotErrors.ERR_REPNOTFOUND)

        return {'surl': surl, 'ddmendpoint': ddmendpoint, 'pfn': replica}
Esempio n. 23
0
class lcgcpSiteMover(BaseSiteMover):
    """ SiteMover that uses lcg-cp for both get and put """

    name = "lcgcp"
    copy_command = "lcg-cp"
    checksum_type = "adler32"
    checksum_command = "lcg-get-checksum"

    schemes = ['srm', 'gsiftp']  # list of supported schemes for transfers

    def _stagefile(self, cmd, source, destination, filesize, is_stagein):
        """
            Stage the file
            mode is stagein or stageout
            :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        timeout = self.getTimeOut(filesize)

        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: %s threw an exception: %s" %
                     (self.copy_command, e))
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode=%s, output=%s" %
                 (is_timeout, rcode, output))

        if is_timeout or rcode:  ## do clean up
            if is_stagein:  # stage-in clean up: check if file was partially transferred
                self.removeLocal(destination)

        if is_timeout:
            raise PilotException(
                "Copy command self timed out after %s, timeout=%s, output=%s" %
                (dt, timeout, output),
                code=PilotErrors.ERR_GETTIMEOUT
                if is_stagein else PilotErrors.ERR_PUTTIMEOUT,
                state='CP_TIMEOUT')

        if rcode:
            self.log(
                'WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s'
                % (is_stagein, cmd, rcode, output.replace("\n", " ")))
            error = self.resolveStageErrorFromOutput(output,
                                                     source,
                                                     is_stagein=is_stagein)
            rcode = error.get('rcode')
            if not rcode:
                rcode = PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED
            state = error.get('state')
            if not state:
                state = 'COPY_FAIL'  #'STAGEIN_FAILED' if is_stagein else 'STAGEOUT_FAILED'

            raise PilotException(error.get('error'), code=rcode, state=state)

        # extract filesize and checksum values from output
        # check stage-out: not used at the moment

        return None, None
class xrdcpSiteMover(BaseSiteMover):
    """ SiteMover that uses xrdcp for both get and put """

    #name = "xrdcp"
    copy_command = "xrdcp"
    checksum_type = "adler32"
    checksum_command = "xrdadler32"


    def stageOutFile(self, source, destination):
        """
            Stage out the file
            Should be implementated by different site mover
            :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure
            :raise: PilotException in case of controlled error
        """

        if self.checksum_type not in ['adler32']: # exclude md5
            raise PilotException("Failed to stageOutFile(): internal error: unsupported checksum_type=%s .. " % self.checksum_type, code=PilotErrors.ERR_STAGEOUTFAILED, state='BAD_CSUMTYPE')

        cmd = "%s -h" % self.copy_command
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        self.log("Execute command (%s) to decide which option should be used to calc file checksum.." % cmd)

        c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
        output = c.communicate()[0]

        self.log("status: %s, output: %s" % (c.returncode, output))

        coption = ""

        if c.returncode:
            self.log('FAILED to execute command=%s: %s' % (cmd, output))
        else:
            if "--cksum" in output:
                coption = "--cksum %s:print" % self.checksum_type
            elif "-adler" in output and self.checksum_type == 'adler32':
                coption = "-adler"
            elif "-md5" in output and self.checksum_type == 'md5':
                coption = "-md5"

        if coption:
            self.log("Use %s option to get the checksum" % coption)
        else:
            self.log("Cannot find neither -adler nor --cksum. will not use checksum")

        cmd = '%s -np -f %s %s %s' % (self.copy_command, coption, source, destination)
        setup = self.getSetup()
        if setup:
            cmd = "%s; %s" % (setup, cmd)

        timeout = self.getTimeOut(os.path.getsize(source))
        self.log("Executing command: %s, timeout=%s" % (cmd, timeout))

        t0 = datetime.now()
        is_timeout = False
        try:
            timer = TimerCommand(cmd)
            rcode, output = timer.run(timeout=timeout)
            is_timeout = timer.is_timeout
        except Exception, e:
            self.log("WARNING: xrdcp threw an exception: %s" % e)
            rcode, output = -1, str(e)

        dt = datetime.now() - t0
        self.log("Command execution time: %s" % dt)
        self.log("is_timeout=%s, rcode = %s, output = %s" % (is_timeout, rcode, output.replace("\n", " ")))

        if is_timeout:
            raise PilotException("Copy command self timed out after %s, timeout=%s, output=%s" % (dt, self.timeout, output), code=PilotErrors.ERR_PUTTIMEOUT, state='CP_TIMEOUT')

        if rcode:
            self.log('WARNING: Stage Out command (%s) failed: Status=%s Output=%s' % (cmd, rcode, output.replace("\n"," ")))
            error = self.resolveStageOutError(output, source)

            #if rcode != PilotErrors.ERR_FILEEXIST:
            #    # check if file was partially transferred, if so, remove it
            #    #_ec, removeOutput = self.removeRemoteFile(destination)
            #    #if not _ec :
            #    #    self.log("Failed to remove file %s" % destination)
            #    #return rcode, outputRet

            raise PilotException(error.get('error'), code=error.get('rcode', PilotErrors.ERR_STAGEOUTFAILED), state=error.get('state', 'STAGEOUT_FAILED'))

        # extract remote filesize and checksum values from output

        checksum, checksum_type = self.getRemoteFileChecksumFromOutput(output)

        return checksum, checksum_type