Beispiel #1
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = Grid.__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type
                    in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        return True
Beispiel #2
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = Grid.__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        return True
Beispiel #3
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = Grid.__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        elif self.sandboxcache._name == 'DQ2SandboxCache':

            # generate a new dataset name if not given
            if not self.sandboxcache.dataset_name:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname
                self.sandboxcache.dataset_name, unused = dq2outputdatasetname(
                    "%s.input" % get_uuid(), 0, False, '')

            # subjobs inherits the dataset name from the master job
            for sj in job.subjobs:
                sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name

        elif self.sandboxcache._name == 'GridftpSandboxCache':
            if config['CreamInputSandboxBaseURI']:
                self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI']
            elif self.CE:
                ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0])
                self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % (
                    ce_host, self.sandboxcache.vo)
            else:
                logger.error('baseURI not available for GridftpSandboxCache')
                return False

        return True
Beispiel #4
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = Grid.__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        elif self.sandboxcache._name == 'DQ2SandboxCache':

            # generate a new dataset name if not given
            if not self.sandboxcache.dataset_name:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname
                self.sandboxcache.dataset_name, unused = dq2outputdatasetname(
                    "%s.input" % get_uuid(), 0, False, '')

            # subjobs inherits the dataset name from the master job
            for sj in job.subjobs:
                sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name

        elif self.sandboxcache._name == 'GridftpSandboxCache':
            if config['CreamInputSandboxBaseURI']:
                self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI']
            elif self.CE:
                ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0])
                self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % (
                    ce_host, self.sandboxcache.vo)
            else:
                logger.error('baseURI not available for GridftpSandboxCache')
                return False

        return True
Beispiel #5
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = Grid.__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type
                    in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        elif self.sandboxcache._name == 'GridftpSandboxCache':
            if config['CreamInputSandboxBaseURI']:
                self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI']
            elif self.CE:
                ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0])
                self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % (
                    ce_host, self.sandboxcache.vo)
            else:
                logger.error('baseURI not available for GridftpSandboxCache')
                return False

        return True
Beispiel #6
0
    def __check_and_prestage_inputfile__(self, file):
        '''Checks the given input file size and if it's size is
           over "BoundSandboxLimit", prestage it to a grid SE.

           The argument is a path of the local file.

           It returns a dictionary containing information to refer to the file:

               idx = {'lfc_host': lfc_host,
                      'local': [the local file pathes],
                      'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... }
                     }

           If prestaging failed, None object is returned.

           If the file has been previously uploaded (according to md5sum),
           the prestaging is ignored and index to the previously uploaded file
           is returned.
           '''

        idx = {'lfc_host': '', 'local': [], 'remote': {}}

        job = self.getJobObject()

        # read-in the previously uploaded files
        uploadedFiles = []

        # getting the uploaded file list from the master job
        if job.master:
            uploadedFiles += job.master.backend.sandboxcache.get_cached_files()

        # set and get the $LFC_HOST for uploading oversized sandbox
        self.__setup_sandboxcache__(job)

        uploadedFiles += self.sandboxcache.get_cached_files()

        lfc_host = None

        # for LCGSandboxCache, take the one specified in the sansboxcache object.
        # the value is exactly the same as the one from the local grid shell env. if
        # it is not specified exclusively.
        if self.sandboxcache._name == 'LCGSandboxCache':
            lfc_host = self.sandboxcache.lfc_host

        # or in general, query it from the Grid object
        if not lfc_host:
            lfc_host = Grid.__get_lfc_host__()

        idx['lfc_host'] = lfc_host

        abspath = os.path.abspath(file)
        fsize = os.path.getsize(abspath)

        if fsize > config['BoundSandboxLimit']:

            md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True)

            doUpload = True
            for uf in uploadedFiles:
                if uf.md5sum == md5sum:
                    # the same file has been uploaded to the iocache
                    idx['remote'][os.path.basename(file)] = uf.id
                    doUpload = False
                    break

            if doUpload:

                logger.warning(
                    'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...'
                    % (file, config['BoundSandboxLimit']))

                if self.sandboxcache.upload([abspath]):
                    remote_sandbox = self.sandboxcache.get_cached_files()[-1]
                    idx['remote'][remote_sandbox.name] = remote_sandbox.id
                else:
                    logger.error(
                        'Oversized sandbox not successfully pre-staged')
                    return None
        else:
            idx['local'].append(abspath)

        return idx
Beispiel #7
0
    def __check_and_prestage_inputfile__(self, file):
        '''Checks the given input file size and if it's size is
           over "BoundSandboxLimit", prestage it to a grid SE.

           The argument is a path of the local file.

           It returns a dictionary containing information to refer to the file:

               idx = {'lfc_host': lfc_host,
                      'local': [the local file pathes],
                      'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... }
                     }

           If prestaging failed, None object is returned.

           If the file has been previously uploaded (according to md5sum),
           the prestaging is ignored and index to the previously uploaded file
           is returned.
           '''

        idx = {'lfc_host': '', 'local': [], 'remote': {}}

        job = self.getJobObject()

        # read-in the previously uploaded files
        uploadedFiles = []

        # getting the uploaded file list from the master job
        if job.master:
            uploadedFiles += job.master.backend.sandboxcache.get_cached_files()

        # set and get the $LFC_HOST for uploading oversized sandbox
        self.__setup_sandboxcache__(job)

        uploadedFiles += self.sandboxcache.get_cached_files()

        lfc_host = None

        # for LCGSandboxCache, take the one specified in the sansboxcache object.
        # the value is exactly the same as the one from the local grid shell env. if
        # it is not specified exclusively.
        if self.sandboxcache._name == 'LCGSandboxCache':
            lfc_host = self.sandboxcache.lfc_host

        # or in general, query it from the Grid object
        if not lfc_host:
            lfc_host = Grid.__get_lfc_host__()

        idx['lfc_host'] = lfc_host

        abspath = os.path.abspath(file)
        fsize = os.path.getsize(abspath)

        if fsize > config['BoundSandboxLimit']:

            md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True)

            doUpload = True
            for uf in uploadedFiles:
                if uf.md5sum == md5sum:
                    # the same file has been uploaded to the iocache
                    idx['remote'][os.path.basename(file)] = uf.id
                    doUpload = False
                    break

            if doUpload:

                logger.warning(
                    'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...' % (file, config['BoundSandboxLimit']))

                if self.sandboxcache.upload([abspath]):
                    remote_sandbox = self.sandboxcache.get_cached_files()[-1]
                    idx['remote'][remote_sandbox.name] = remote_sandbox.id
                else:
                    logger.error(
                        'Oversized sandbox not successfully pre-staged')
                    return None
        else:
            idx['local'].append(abspath)

        return idx