예제 #1
0
 def resolve_input_paths(self, jobspec):
     # get input files
     inFiles = jobspec.get_input_file_attributes()
     # set path to each file
     for inLFN, inFile in iteritems(inFiles):
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
     # set
     jobspec.set_input_file_paths(inFiles)
     return True, ''
 def resolve_input_paths(self, jobspec):
     # get input files
     inFiles = jobspec.get_input_file_attributes()
     # set path to each file
     for inLFN, inFile in iteritems(inFiles):
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
     # set
     jobspec.set_input_file_paths(inFiles)
     return True, ''
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger,
                                  'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('start')

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(
                jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(
                self.basePath, inFile['scope'], inLFN)
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({
                'scope': inFile['scope'],
                'name': inLFN,
                'destination': dstpath
            })
        tmpLog.debug('files[] {0}'.format(files))
        data_client = data.StageInClient(site=jobspec.computingSite)
        allChecked = True
        ErrMsg = 'These files failed to download : '
        if len(files) > 0:
            result = data_client.transfer(files)
            tmpLog.debug(
                'pilot.api data.StageInClient.transfer(files) result: {0}'.
                format(result))

            # loop over each file check result all must be true for entire result to be true
            if result:
                for answer in result:
                    if answer['errno'] != 0:
                        allChecked = False
                        ErrMsg = ErrMsg + (" %s " % answer['name'])
            else:
                tmpLog.info(
                    'Looks like all files already inplace: {0}'.format(files))
        # return
        tmpLog.debug('stop')
        if allChecked:
            return True, ''
        else:
            return False, ErrMsg
 def resolve_input_paths(self, jobspec):
     #  input files
     inFileInfo = jobspec.get_input_file_attributes()
     pathInfo = dict()
     for tmpFileSpec in jobspec.inFiles:
         accPath = mover_utils.construct_file_path(self.localBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
                                                   tmpFileSpec.lfn)
         pathInfo[tmpFileSpec.lfn] = {'path': accPath}
     jobspec.set_input_file_paths(pathInfo)
     return True, ''
 def trigger_preparation(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('Start. Trigger data transfer for job: {0}'.format(jobspec.PandaID))
    
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # get input files
     files = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     # set path to each file
     tmpLog.info("Prepare files to download (construct path and verifiy existing files)")
     for inLFN, inFile in iteritems(inFiles):
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
         # check if file exist. Skip alrady downoladed files
         if os.path.exists(inFile['path']):
             checksum = core_utils.calc_adler32(inFile['path'])
             checksum = 'ad:%s' % checksum
             #tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
             if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                 #tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                 continue
         dstpath = os.path.dirname(inFile['path'])
         # check if path exists if not create it.
         if not os.access(dstpath, os.F_OK):
             os.makedirs(dstpath)
         files.append({'scope': inFile['scope'],
                       'name': inLFN,
                       'destination': dstpath})
     tmpLog.info('Number of files to dowload: {0} for job: {1}'.format(len(files), jobspec.PandaID))
     #tmpLog.debug('files {0}'.format(files))
     tmpLog.info('Setup of Pilot2 API client')
     data_client = data.StageInClient(site=jobspec.computingSite)
     allChecked = True
     ErrMsg = 'These files failed to download : '
     if len(files) > 0:
         tmpLog.info("Going to transfer {0} of files with one call to Pilot2 Data API".format(len(files)))
         try:
             result = data_client.transfer(files)
         except Exception as e:
             tmpLog.error("Pilot2 Data API rise error: {0}".format(e.message))
         tmpLog.debug('data_client.transfer(files) result:\n{0}'.format(result))
         tmpLog.info("Transfer call to Pilot2 Data API completed")
         # loop over each file check result all must be true for entire result to be true
         if result:
             for answer in result:
                 if answer['errno'] != 0:
                     allChecked = False
                     ErrMsg = ErrMsg + (" %s " % answer['name'])
         else:
             tmpLog.info('Looks like all files in place. Number of files: {0}'.format(len(files)))
     # return
     tmpLog.debug('Finished data transfer with {0} files for job {1}'.format(len(files), jobspec.PandaID))
     if allChecked:
         return True, ''
     else:
         return False, ErrMsg
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger,
                                  'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('Start. Trigger data transfer for job: {0}'.format(
            jobspec.PandaID))

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(
                jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        tmpLog.info(
            "Prepare files to download (construct path and verifiy existing files)"
        )
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(
                self.basePath, inFile['scope'], inLFN)
            # check if file exist. Skip alrady downoladed files
            if os.path.exists(inFile['path']):
                checksum = core_utils.calc_adler32(inFile['path'])
                checksum = 'ad:%s' % checksum
                #tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                if 'checksum' in inFile and inFile['checksum'] and inFile[
                        'checksum'] == checksum:
                    #tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({
                'scope': inFile['scope'],
                'name': inLFN,
                'destination': dstpath
            })
        tmpLog.info('Number of files to dowload: {0} for job: {1}'.format(
            len(files), jobspec.PandaID))
        #tmpLog.debug('files {0}'.format(files))
        tmpLog.info('Setup of Pilot2 API client')
        data_client = data.StageInClient(site=jobspec.computingSite)
        allChecked = True
        ErrMsg = 'These files failed to download : '
        if len(files) > 0:
            tmpLog.info(
                "Going to transfer {0} of files with one call to Pilot2 Data API"
                .format(len(files)))
            try:
                result = data_client.transfer(files)
            except Exception as e:
                tmpLog.error("Pilot2 Data API rise error: {0}".format(
                    e.message))
            tmpLog.debug(
                'data_client.transfer(files) result:\n{0}'.format(result))
            tmpLog.info("Transfer call to Pilot2 Data API completed")
            # loop over each file check result all must be true for entire result to be true
            if result:
                for answer in result:
                    if answer['errno'] != 0:
                        allChecked = False
                        ErrMsg = ErrMsg + (" %s " % answer['name'])
            else:
                tmpLog.info(
                    'Looks like all files in place. Number of files: {0}'.
                    format(len(files)))
        # return
        tmpLog.debug(
            'Finished data transfer with {0} files for job {1}'.format(
                len(files), jobspec.PandaID))
        if allChecked:
            return True, ''
        else:
            return False, ErrMsg
예제 #7
0
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('start')

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
            tmpLog.debug('To check file: %s' % inFile)
            if os.path.exists(inFile['path']):
                checksum = core_utils.calc_adler32(inFile['path'])
                checksum = 'ad:%s' % checksum
                tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                    tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({'scope': inFile['scope'],
                          'name': inLFN,
                          'destination': dstpath})
        tmpLog.debug('files[] {0}'.format(files))

        allChecked = True
        ErrMsg = 'These files failed to download : '
        if files:
            threads = []
            n_files_per_thread = (len(files) + self.n_threads - 1) / self.n_threads
            tmpLog.debug('num files per thread: %s' % n_files_per_thread)
            for i in range(0, len(files), n_files_per_thread):
                sub_files = files[i:i + n_files_per_thread]
                thread = threading.Thread(target=self.stage_in, kwargs={'tmpLog': tmpLog, 'jobspec': jobspec, 'files': sub_files})
                threads.append(thread)
            [t.start() for t in threads]
            while len(threads) > 0:
                time.sleep(1)
                threads = [t for t in threads if t and t.isAlive()]

            tmpLog.info('Checking all files: {0}'.format(files))
            for file in files:
                if file['errno'] != 0:
                    allChecked = False
                    ErrMsg = ErrMsg + (" %s " % file['name'])
        # return
        tmpLog.debug('stop')
        if allChecked:
            tmpLog.info('Looks like all files are successfully downloaded.')
            return True, ''
        else:
            return False, ErrMsg
예제 #8
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # loop over all files
     files = dict()
     transferIDs = dict()
     transferDatasets = dict()
     fileAttrs = jobspec.get_output_file_attributes()
     for fileSpec in jobspec.outFiles:
         # skip zipped files
         if fileSpec.zipFileID is not None:
             continue
         # skip if already processed
         if 'transferDataset' in fileSpec.fileAttributes:
             if fileSpec.fileType not in transferDatasets:
                 transferDatasets[
                     fileSpec.
                     fileType] = fileSpec.fileAttributes['transferDataset']
             if fileSpec.fileType not in transferIDs:
                 transferIDs[fileSpec.fileType] = fileSpec.fileAttributes[
                     'transferID']
             continue
         # set OS ID
         if fileSpec.fileType == ['es_output', 'zip_output']:
             fileSpec.objstoreID = self.objStoreID_ES
         # make path where file is copied for transfer
         if fileSpec.fileType != 'zip_output':
             scope = fileAttrs[fileSpec.lfn]['scope']
             datasetName = fileAttrs[fileSpec.lfn]['dataset']
         else:
             # use panda scope for zipped files
             scope = self.scopeForTmp
             datasetName = 'dummy'
         srcPath = fileSpec.path
         dstPath = mover_utils.construct_file_path(self.srcBasePath, scope,
                                                   fileSpec.lfn)
         # remove
         if os.path.exists(dstPath):
             os.remove(dstPath)
         # copy
         tmpLog.debug('copy src={srcPath} dst={dstPath}'.format(
             srcPath=srcPath, dstPath=dstPath))
         dstDir = os.path.dirname(dstPath)
         if not os.path.exists(dstDir):
             os.makedirs(dstDir)
         shutil.copyfile(srcPath, dstPath)
         # collect files
         tmpFile = dict()
         tmpFile['scope'] = scope
         tmpFile['name'] = fileSpec.lfn
         tmpFile['bytes'] = fileSpec.fsize
         if fileSpec.fileType not in files:
             files[fileSpec.fileType] = []
         files[fileSpec.fileType].append(tmpFile)
     # loop over all file types to be registered to rucio
     rucioAPI = RucioClient()
     for fileType, fileList in iteritems(files):
         # set destination RSE
         if fileType in ['es_output', 'zip_output']:
             dstRSE = self.dstRSE_ES
         elif fileType == 'output':
             dstRSE = self.dstRSE_Out
         elif fileType == 'log':
             dstRSE = self.dstRSE_Log
         else:
             errMsg = 'unsupported file type {0}'.format(fileType)
             tmpLog.error(errMsg)
             return (False, errMsg)
         # skip if destination is None
         if dstRSE is None:
             continue
         # make datasets if missing
         if fileType not in transferDatasets:
             try:
                 tmpScope = self.scopeForTmp
                 tmpDS = 'panda.harvester_stage_out.{0}'.format(
                     str(uuid.uuid4()))
                 rucioAPI.add_dataset(tmpScope,
                                      tmpDS,
                                      meta={'hidden': True},
                                      lifetime=7 * 24 * 60 * 60,
                                      files=fileList,
                                      rse=self.srcRSE)
                 transferDatasets[fileType] = tmpDS
                 # add rule
                 tmpDID = dict()
                 tmpDID['scope'] = tmpScope
                 tmpDID['name'] = tmpDS
                 tmpRet = rucioAPI.add_replication_rule([tmpDID],
                                                        1,
                                                        dstRSE,
                                                        lifetime=7 * 24 *
                                                        60 * 60)
                 tmpTransferIDs = tmpRet[0]
                 transferIDs[fileType] = tmpTransferIDs
                 tmpLog.debug('register dataset {0} with rule {1}'.format(
                     tmpDS, str(tmpTransferIDs)))
             except:
                 errMsg = core_utils.dump_error_message(tmpLog)
                 return (False, errMsg)
         else:
             # add files to existing dataset
             try:
                 tmpScope = self.scopeForTmp
                 tmpDS = transferDatasets[fileType]
                 rucioAPI.add_files_to_dataset(tmpScope, tmpDS, fileList,
                                               self.srcRSE)
                 tmpLog.debug('added files to {0}'.format(tmpDS))
             except:
                 errMsg = core_utils.dump_error_message(tmpLog)
                 return (False, errMsg)
     # set transfer datasets and rules
     for fileSpec in jobspec.outFiles:
         # skip zipped files
         if fileSpec.zipFileID is not None:
             continue
         # skip already done
         if fileSpec.status in ['finished', 'failed']:
             continue
         # skip if already processed
         if 'transferDataset' in fileSpec.fileAttributes:
             continue
         # no destination
         if fileSpec.fileType not in transferDatasets:
             fileSpec.status = 'finished'
             continue
         # set dataset
         fileSpec.fileAttributes['transferDataset'] = transferDatasets[
             fileSpec.fileType]
         # set rule
         fileSpec.fileAttributes['transferID'] = transferIDs[
             fileSpec.fileType]
         # force update
         fileSpec.force_update('fileAttributes')
     # return
     tmpLog.debug('done')
     return (True, '')
예제 #9
0
    def trigger_stage_out(self, jobspec):
        """Trigger the stage-out procedure for the job.
        Output files are available through jobspec.get_outfile_specs(skip_done=False) which gives
        a list of FileSpecs not yet done.
        FileSpec.attemptNr shows how many times transfer was tried for the file so far.

        :param jobspec: job specifications
        :type jobspec: JobSpec
        :return: A tuple of return code (True: success, False: fatal failure, None: temporary failure)
                 and error dialog
        :rtype: (bool, string)
        """

        # let gc clean up memory
        gc.collect()

        # make logger
        tmpLog = self.make_logger(_logger,
                                  'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_stage_out')
        tmpLog.debug('start')
        # get the environment
        harvester_env = os.environ.copy()
        #tmpLog.debug('Harvester environment : {}'.format(harvester_env))

        xrdcpOutput = None
        allfiles_transfered = True
        overall_errMsg = ""
        fileAttrs = jobspec.get_output_file_attributes()
        # loop over all output files
        for fileSpec in jobspec.get_output_file_specs(skip_done=True):
            # fileSpec.objstoreID = 123
            # fileSpec.fileAttributes['guid']
            # construct source and destination paths
            dstPath = mover_utils.construct_file_path(
                self.dstBasePath, fileAttrs[fileSpec.lfn]['scope'],
                fileSpec.lfn)
            # local path
            localPath = mover_utils.construct_file_path(
                self.localBasePath, fileAttrs[fileSpec.lfn]['scope'],
                fileSpec.lfn)
            tmpLog.debug('fileSpec.path - {0} fileSpec.lfn = {1}'.format(
                fileSpec.path, fileSpec.lfn))
            localPath = fileSpec.path
            if self.checkLocalPath:
                # check if already exits
                if os.path.exists(localPath):
                    # calculate checksum
                    checksum = core_utils.calc_adler32(localPath)
                    checksum = 'ad:{0}'.format(checksum)
                    if checksum == fileAttrs[fileSpec.lfn]['checksum']:
                        continue
            # collect list of output files
            if xrdcpOutput is None:
                xrdcpOutput = [dstPath]
            else:
                if dstPath not in xrdcpOutput:
                    xrdcpOutput.append(dstPath)
            # transfer using xrdcp one file at a time
            tmpLog.debug('execute xrdcp')
            args = ['xrdcp', '--nopbar', '--force']
            args_files = [localPath, dstPath]
            if self.xrdcpOpts is not None:
                args += self.xrdcpOpts.split()
            args += args_files
            fileSpec.attemptNr += 1
            try:
                xrdcp_cmd = ' '.join(args)
                tmpLog.debug('execute: {0}'.format(xrdcp_cmd))
                process = subprocess.Popen(xrdcp_cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE,
                                           env=harvester_env,
                                           shell=True)
                try:
                    stdout, stderr = process.communicate(timeout=self.timeout)
                except subprocess.TimeoutExpired:
                    process.kill()
                    stdout, stderr = process.communicate()
                    tmpLog.warning('command timeout')
                return_code = process.returncode
                if stdout is not None:
                    if not isinstance(stdout, str):
                        stdout = stdout.decode()
                    stdout = stdout.replace('\n', ' ')
                if stderr is not None:
                    if not isinstance(stderr, str):
                        stderr = stderr.decode()
                    stderr = stderr.replace('\n', ' ')
                tmpLog.debug("stdout: %s" % stdout)
                tmpLog.debug("stderr: %s" % stderr)
            except Exception:
                core_utils.dump_error_message(tmpLog)
                return_code = 1
            if return_code == 0:
                fileSpec.status = 'finished'
            else:
                overall_errMsg += "file - {0} did not transfer error code {1} ".format(
                    localPath, return_code)
                allfiles_transfered = False
                errMsg = 'failed with {0}'.format(return_code)
                tmpLog.error(errMsg)
                # check attemptNr
                if fileSpec.attemptNr >= self.maxAttempts:
                    tmpLog.error(
                        'reached maxattempts: {0}, marked it as failed'.format(
                            self.maxAttempts))
                    fileSpec.status = 'failed'

            # force update
            fileSpec.force_update('status')
            tmpLog.debug('file: {0} status: {1}'.format(
                fileSpec.lfn, fileSpec.status))
            del process, stdout, stderr

        # end loop over output files

        # nothing to transfer
        if xrdcpOutput is None:
            tmpLog.debug('done with no transfers')
            return True, ''
        # check if all files were transfered
        tmpLog.debug('done')
        if allfiles_transfered:
            return True, ''
        else:
            return None, overall_errMsg
 def make_local_access_path(self, scope, lfn):
     return mover_utils.construct_file_path(self.localBasePath, scope, lfn)
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger,
                                  'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('start')

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(
                jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(
                self.basePath, inFile['scope'], inLFN)
            tmpLog.debug('To check file: %s' % inFile)
            if os.path.exists(inFile['path']):
                # checksum = core_utils.calc_adler32(inFile['path'])
                # checksum = 'ad:%s' % checksum
                # tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                # if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                #     tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                #     continue

                # lazy but unsafe check to be faster...
                file_size = os.stat(inFile['path']).st_size
                tmpLog.debug('file size for file %s is %s' %
                             (inFile['path'], file_size))
                if 'fsize' in inFile and inFile['fsize'] and inFile[
                        'fsize'] == file_size:
                    tmpLog.debug('File %s already exists at %s' %
                                 (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            file_data = {
                'scope': inFile['scope'],
                'dataset': inFile.get('dataset'),
                'lfn': inLFN,
                'ddmendpoint': inFile.get('endpoint'),
                'guid': inFile.get('guid'),
                'workdir': dstpath,
            }
            pilotfilespec = PilotFileSpec(type='input', **file_data)
            files.append(pilotfilespec)
        # tmpLog.debug('files[] {0}'.format(files))
        tmpLog.debug('path set')

        allChecked = True
        ErrMsg = 'These files failed to download : '
        if files:
            threads = []
            n_files_per_thread = (len(files) + self.n_threads -
                                  1) // self.n_threads
            tmpLog.debug('num files per thread: %s' % n_files_per_thread)
            for i in range(0, len(files), n_files_per_thread):
                sub_files = files[i:i + n_files_per_thread]
                thread = threading.Thread(target=self.stage_in,
                                          kwargs={
                                              'tmpLog': tmpLog,
                                              'jobspec': jobspec,
                                              'files': sub_files,
                                          })
                threads.append(thread)
            [t.start() for t in threads]
            while len(threads) > 0:
                time.sleep(1)
                threads = [t for t in threads if t and t.isAlive()]

            tmpLog.info('Checking all files: {0}'.format(files))
            for file in files:
                if file.status_code != 0:
                    allChecked = False
                    ErrMsg = ErrMsg + (" %s " % file.lfn)
            for inLFN, inFile in iteritems(inFiles):
                if not os.path.isfile(inFile['path']):
                    allChecked = False
                    ErrMsg = ErrMsg + (" %s " % file.lfn)
        # return
        tmpLog.debug('stop')
        if allChecked:
            tmpLog.info('Looks like all files are successfully downloaded.')
            return True, ''
        else:
            # keep retrying
            return None, ErrMsg
예제 #12
0
 def trigger_preparation(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')
     # loop over all inputs
     inFileInfo = jobspec.get_input_file_attributes()
     gucInput = None
     for tmpFileSpec in jobspec.inFiles:
         # construct source and destination paths
         srcPath = mover_utils.construct_file_path(
             self.srcBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         dstPath = mover_utils.construct_file_path(
             self.dstBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         # local access path
         accPath = mover_utils.construct_file_path(
             self.localBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         if self.checkLocalPath:
             # check if already exits
             if os.path.exists(accPath):
                 # calculate checksum
                 checksum = core_utils.calc_adler32(accPath)
                 checksum = 'ad:{0}'.format(checksum)
                 if checksum == inFileInfo[tmpFileSpec.lfn]['checksum']:
                     continue
             # make directories if needed
             if not os.path.isdir(os.path.dirname(accPath)):
                 os.makedirs(os.path.dirname(accPath))
         # make input for globus-url-copy
         if gucInput is None:
             gucInput = tempfile.NamedTemporaryFile(mode='w',
                                                    delete=False,
                                                    suffix='_guc_in.tmp')
         gucInput.write("{0} {1}\n".format(srcPath, dstPath))
         tmpFileSpec.attemptNr += 1
     # nothing to transfer
     if gucInput is None:
         tmpLog.debug('done with no transfers')
         return True, ''
     # transfer
     tmpLog.debug('execute globus-url-copy')
     gucInput.close()
     args = ['globus-url-copy', '-f', gucInput.name, '-cd']
     if self.gulOpts is not None:
         args += self.gulOpts.split()
     try:
         tmpLog.debug('execute: ' + ' '.join(args))
         p = subprocess.Popen(args,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
         try:
             stdout, stderr = p.communicate(timeout=self.timeout)
         except subprocess.TimeoutExpired:
             p.kill()
             stdout, stderr = p.communicate()
             tmpLog.warning('command timeout')
         return_code = p.returncode
         if stdout is not None:
             if not isinstance(stdout, str):
                 stdout = stdout.decode()
             stdout = stdout.replace('\n', ' ')
         if stderr is not None:
             if not isinstance(stderr, str):
                 stderr = stderr.decode()
             stderr = stderr.replace('\n', ' ')
         tmpLog.debug("stdout: %s" % stdout)
         tmpLog.debug("stderr: %s" % stderr)
     except Exception:
         core_utils.dump_error_message(tmpLog)
         return_code = 1
     os.remove(gucInput.name)
     if return_code == 0:
         tmpLog.debug('succeeded')
         return True, ''
     else:
         errMsg = 'failed with {0}'.format(return_code)
         tmpLog.error(errMsg)
         # check attemptNr
         for tmpFileSpec in jobspec.inFiles:
             if tmpFileSpec.attemptNr >= self.maxAttempts:
                 errMsg = 'gave up due to max attempts'
                 tmpLog.error(errMsg)
                 return (False, errMsg)
         return None, errMsg
예제 #13
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')               
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc :
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually  
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints 
         Globus_srcpath = mover_utils.construct_file_path(self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({'scope': inFile['scope'],
                       'name': inLFN,
                       'Globus_dstPath': Globus_dstpath,
                       'Globus_srcPath': Globus_srcpath})
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc :
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst :
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False,errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add 
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}})
                 tmpLog.debug('done')
                 return True,''
             else:
                 return False,transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
예제 #14
0
 def trigger_preparation(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')
     # get the environment
     harvester_env = os.environ.copy()
     #tmpLog.debug('Harvester environment : {}'.format(harvester_env))
     # loop over all inputs
     inFileInfo = jobspec.get_input_file_attributes()
     xrdcpInput = None
     allfiles_transfered = True
     overall_errMsg = ""
     for tmpFileSpec in jobspec.inFiles:
         # construct source and destination paths
         srcPath = mover_utils.construct_file_path(
             self.srcBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         # local path
         localPath = mover_utils.construct_file_path(
             self.localBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         if self.checkLocalPath:
             # check if already exits
             if os.path.exists(localPath):
                 # calculate checksum
                 checksum = core_utils.calc_adler32(localPath)
                 checksum = 'ad:{0}'.format(checksum)
                 if checksum == inFileInfo[tmpFileSpec.lfn]['checksum']:
                     continue
             # make directories if needed
             if not os.path.isdir(os.path.dirname(localPath)):
                 os.makedirs(os.path.dirname(localPath))
                 tmpLog.debug('Make directory - {0}'.format(
                     os.path.dirname(localPath)))
         # collect list of input files
         if xrdcpInput is None:
             xrdcpInput = [srcPath]
         else:
             xrdcpInput.append[srcPath]
         # transfer using xrdcp one file at a time
         tmpLog.debug('execute xrdcp')
         args = ['xrdcp', '--nopbar', '--force']
         args_files = [srcPath, localPath]
         if self.xrdcpOpts is not None:
             args += self.xrdcpOpts.split()
         args += args_files
         tmpFileSpec.attemptNr += 1
         try:
             xrdcp_cmd = ' '.join(args)
             tmpLog.debug('execute: {0}'.format(xrdcp_cmd))
             p = subprocess.Popen(xrdcp_cmd,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  env=harvester_env,
                                  shell=True)
             try:
                 stdout, stderr = p.communicate(timeout=self.timeout)
             except subprocess.TimeoutExpired:
                 p.kill()
                 stdout, stderr = p.communicate()
                 tmpLog.warning('command timeout')
             return_code = p.returncode
             if stdout is not None:
                 if not isinstance(stdout, str):
                     stdout = stdout.decode()
                 stdout = stdout.replace('\n', ' ')
             if stderr is not None:
                 if not isinstance(stderr, str):
                     stderr = stderr.decode()
                 stderr = stderr.replace('\n', ' ')
             tmpLog.debug("stdout: %s" % stdout)
             tmpLog.debug("stderr: %s" % stderr)
         except Exception:
             core_utils.dump_error_message(tmpLog)
             return_code = 1
         if return_code != 0:
             overall_errMsg += "file - {0} did not transfer error code {1} ".format(
                 localPath, return_code)
             allfiles_transfered = False
             errMsg = 'failed with {0}'.format(return_code)
             tmpLog.error(errMsg)
             # check attemptNr
             if tmpFileSpec.attemptNr >= self.maxAttempts:
                 errMsg = 'gave up due to max attempts'
                 tmpLog.error(errMsg)
                 return (False, errMsg)
     # end loop over input files
     # nothing to transfer
     if xrdcpInput is None:
         tmpLog.debug('done with no transfers')
         return True, ''
     # check if all files were transfered
     if allfiles_transfered:
         return True, ''
     else:
         return None, overall_errMsg
예제 #15
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # loop over all files
     files = dict()
     transferIDs = dict()
     transferDatasets = dict()
     fileAttrs = jobspec.get_output_file_attributes()
     for fileSpec in jobspec.outFiles:
         # skip zipped files
         if fileSpec.zipFileID is not None:
             continue
         # skip if already processed
         if 'transferDataset' in fileSpec.fileAttributes:
             if fileSpec.fileType not in transferDatasets:
                 transferDatasets[fileSpec.fileType] = fileSpec.fileAttributes['transferDataset']
             if fileSpec.fileType not in transferIDs:
                 transferIDs[fileSpec.fileType] = fileSpec.fileAttributes['transferID']
             continue
         # set OS ID
         if fileSpec.fileType == ['es_output', 'zip_output']:
             fileSpec.objstoreID = self.objStoreID_ES
         # make path where file is copied for transfer
         if fileSpec.fileType != 'zip_output':
             scope = fileAttrs[fileSpec.lfn]['scope']
             datasetName = fileAttrs[fileSpec.lfn]['dataset']
         else:
             # use panda scope for zipped files
             scope = self.scopeForTmp
             datasetName = 'dummy'
         srcPath = fileSpec.path
         dstPath = mover_utils.construct_file_path(self.srcBasePath, scope, fileSpec.lfn)
         # remove
         if os.path.exists(dstPath):
             os.remove(dstPath)
         # copy
         tmpLog.debug('copy src={srcPath} dst={dstPath}'.format(srcPath=srcPath, dstPath=dstPath))
         dstDir = os.path.dirname(dstPath)
         if not os.path.exists(dstDir):
             os.makedirs(dstDir)
         shutil.copyfile(srcPath, dstPath)
         # collect files
         tmpFile = dict()
         tmpFile['scope'] = scope
         tmpFile['name'] = fileSpec.lfn
         tmpFile['bytes'] = fileSpec.fsize
         if fileSpec.fileType not in files:
             files[fileSpec.fileType] = []
         files[fileSpec.fileType].append(tmpFile)
     # loop over all file types to be registered to rucio
     rucioAPI = RucioClient()
     for fileType, fileList in iteritems(files):
         # set destination RSE
         if fileType in ['es_output', 'zip_output']:
             dstRSE = self.dstRSE_ES
         elif fileType == 'output':
             dstRSE = self.dstRSE_Out
         elif fileType == 'log':
             dstRSE = self.dstRSE_Log
         else:
             errMsg = 'unsupported file type {0}'.format(fileType)
             tmpLog.error(errMsg)
             return (False, errMsg)
         # skip if destination is None
         if dstRSE is None:
             continue
         # make datasets if missing
         if fileType not in transferDatasets:
             try:
                 tmpScope = self.scopeForTmp
                 tmpDS = 'panda.harvester_stage_out.{0}'.format(str(uuid.uuid4()))
                 rucioAPI.add_dataset(tmpScope, tmpDS,
                                      meta={'hidden': True},
                                      lifetime=30*24*60*60,
                                      files=fileList,
                                      rse=self.srcRSE
                                      )
                 transferDatasets[fileType] = tmpDS
                 # add rule
                 tmpDID = dict()
                 tmpDID['scope'] = tmpScope
                 tmpDID['name'] = tmpDS
                 tmpRet = rucioAPI.add_replication_rule([tmpDID], 1, dstRSE,
                                                        lifetime=30*24*60*60
                                                        )
                 tmpTransferIDs = tmpRet[0]
                 transferIDs[fileType] = tmpTransferIDs
                 tmpLog.debug('register dataset {0} with rule {1}'.format(tmpDS, str(tmpTransferIDs)))
             except:
                 errMsg = core_utils.dump_error_message(tmpLog)
                 return (False, errMsg)
         else:
             # add files to existing dataset
             try:
                 tmpScope = self.scopeForTmp
                 tmpDS = transferDatasets[fileType]
                 rucioAPI.add_files_to_dataset(tmpScope, tmpDS, fileList, self.srcRSE)
                 tmpLog.debug('added files to {0}'.format(tmpDS))
             except:
                 errMsg = core_utils.dump_error_message(tmpLog)
                 return (False, errMsg)
     # set transfer datasets and rules
     for fileSpec in jobspec.outFiles:
         # skip zipped files
         if fileSpec.zipFileID is not None:
             continue
         # skip already done
         if fileSpec.status in ['finished', 'failed']:
             continue
         # skip if already processed
         if 'transferDataset' in fileSpec.fileAttributes:
             continue
         # no destination
         if fileSpec.fileType not in transferDatasets:
             fileSpec.status = 'finished'
             continue
         # set dataset
         fileSpec.fileAttributes['transferDataset'] = transferDatasets[fileSpec.fileType]
         # set rule
         fileSpec.fileAttributes['transferID'] = transferIDs[fileSpec.fileType]
         # force update
         fileSpec.force_update('fileAttributes')
     # return
     tmpLog.debug('done')
     return (True, '')
예제 #16
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # loop over all files
     gucInput = None
     is_multistep = isinstance(self.intermediateBasePaths, list) and len(self.intermediateBasePaths) > 0
     guc_inputs_list = [None] * (len(self.intermediateBasePaths) + 1) if is_multistep else []
     for fileSpec in jobspec.outFiles:
         # skip if already done
         if fileSpec.status in ['finished', 'failed']:
             continue
         # scope
         if fileSpec.fileType in ['es_output', 'zip_output']:
             scope = self.scopeForTmp
         else:
             scope = fileSpec.fileAttributes.get('scope')
             if scope is None:
                 scope = fileSpec.scope
         # construct source and destination paths
         srcPath = re.sub(self.srcOldBasePath, self.srcNewBasePath, fileSpec.path)
         dstPath = mover_utils.construct_file_path(self.dstBasePath, scope, fileSpec.lfn)
         # make tempfiles of paths to transfer
         if is_multistep:
             # multi-step transfer
             for ibp_i in range(len(self.intermediateBasePaths) + 1):
                 base_paths_old = self.intermediateBasePaths[ibp_i - 1] if ibp_i > 0 else ''
                 base_paths_new = self.intermediateBasePaths[ibp_i] if ibp_i < len(self.intermediateBasePaths) else ''
                 src_base = base_paths_old[1] if isinstance(base_paths_old, list) else base_paths_old
                 dst_base = base_paths_new[0] if isinstance(base_paths_new, list) else base_paths_new
                 # construct temporary source and destination paths
                 tmp_src_path = re.sub(self.srcNewBasePath, src_base, srcPath)
                 tmp_dest_path = re.sub(self.srcNewBasePath, dst_base, srcPath)
                 # make input for globus-url-copy
                 if guc_inputs_list[ibp_i] is None:
                     guc_inputs_list[ibp_i] = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='_guc_out_{0}.tmp'.format(ibp_i))
                 guc_input = guc_inputs_list[ibp_i]
                 if ibp_i == 0:
                     guc_input.write("{0} {1}\n".format(srcPath, tmp_dest_path))
                     tmpLog.debug("step {0}: {1} {2}".format(ibp_i + 1, srcPath, tmp_dest_path))
                 elif ibp_i == len(self.intermediateBasePaths):
                     guc_input.write("{0} {1}\n".format(tmp_src_path, dstPath))
                     tmpLog.debug("step {0}: {1} {2}".format(ibp_i + 1, tmp_src_path, dstPath))
                 else:
                     guc_input.write("{0} {1}\n".format(tmp_src_path, tmp_dest_path))
                     tmpLog.debug("step {0}: {1} {2}".format(ibp_i + 1, tmp_src_path, tmp_dest_path))
         else:
             # single-step transfer
             # make input for globus-url-copy
             if gucInput is None:
                 gucInput = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='_guc_out.tmp')
             gucInput.write("{0} {1}\n".format(srcPath, dstPath))
         fileSpec.attemptNr += 1
     # nothing to transfer
     if is_multistep:
         for guc_input in guc_inputs_list:
             if guc_input is None:
                 tmpLog.debug('done with no transfers (multistep)')
                 return True, ''
     else:
         if gucInput is None:
             tmpLog.debug('done with no transfers')
             return True, ''
     # transfer
     if is_multistep:
         [ guc_input.close() for guc_input in guc_inputs_list ]
         tmpLog.debug('start multistep transfer')
         guc_input_i = 1
         for guc_input in guc_inputs_list:
             args = ['globus-url-copy', '-f', guc_input.name, '-cd']
             if self.gulOpts is not None:
                 args += self.gulOpts.split()
             try:
                 tmpLog.debug('execute: ' + ' '.join(args))
                 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                 try:
                     stdout, stderr = p.communicate(timeout=self.timeout)
                 except subprocess.TimeoutExpired:
                     p.kill()
                     stdout, stderr = p.communicate()
                     tmpLog.warning('command timeout')
                 return_code = p.returncode
                 if stdout is not None:
                     if not isinstance(stdout, str):
                         stdout = stdout.decode()
                     stdout = stdout.replace('\n', ' ')
                 if stderr is not None:
                     if not isinstance(stderr, str):
                         stderr = stderr.decode()
                     stderr = stderr.replace('\n', ' ')
                 tmpLog.debug("stdout: %s" % stdout)
                 tmpLog.debug("stderr: %s" % stderr)
             except Exception:
                 core_utils.dump_error_message(tmpLog)
                 return_code = 1
             os.remove(guc_input.name)
             if return_code == 0:
                 tmpLog.debug('step {0} succeeded'.format(guc_input_i))
                 guc_input_i += 1
             else:
                 errMsg = 'step {0} failed with {1}'.format(guc_input_i, return_code)
                 tmpLog.error(errMsg)
                 # check attemptNr
                 for fileSpec in jobspec.inFiles:
                     if fileSpec.attemptNr >= self.maxAttempts:
                         errMsg = 'gave up due to max attempts'
                         tmpLog.error(errMsg)
                         return (False, errMsg)
                 return None, errMsg
         tmpLog.debug('multistep transfer ({0} steps) succeeded'.format(len(guc_inputs_list)))
         return True, ''
     else:
         gucInput.close()
         args = ['globus-url-copy', '-f', gucInput.name, '-cd']
         if self.gulOpts is not None:
             args += self.gulOpts.split()
         try:
             tmpLog.debug('execute: ' + ' '.join(args))
             p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             try:
                 stdout, stderr = p.communicate(timeout=self.timeout)
             except subprocess.TimeoutExpired:
                 p.kill()
                 stdout, stderr = p.communicate()
                 tmpLog.warning('command timeout')
             return_code = p.returncode
             if stdout is not None:
                 if not isinstance(stdout, str):
                     stdout = stdout.decode()
                 stdout = stdout.replace('\n', ' ')
             if stderr is not None:
                 if not isinstance(stderr, str):
                     stderr = stderr.decode()
                 stderr = stderr.replace('\n', ' ')
             tmpLog.debug("stdout: %s" % stdout)
             tmpLog.debug("stderr: %s" % stderr)
         except Exception:
             core_utils.dump_error_message(tmpLog)
             return_code = 1
         os.remove(gucInput.name)
         if return_code == 0:
             tmpLog.debug('succeeded')
             return True, ''
         else:
             errMsg = 'failed with {0}'.format(return_code)
             tmpLog.error(errMsg)
             # check attemptNr
             for fileSpec in jobspec.inFiles:
                 if fileSpec.attemptNr >= self.maxAttempts:
                     errMsg = 'gave up due to max attempts'
                     tmpLog.error(errMsg)
                     return (False, errMsg)
             return None, errMsg
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # loop over all files
     gucInput = None
     for fileSpec in jobspec.outFiles:
         # skip if already done
         if fileSpec.status in ['finished', 'failed']:
             continue
         # scope
         if fileSpec.fileType in ['es_output', 'zip_output']:
             scope = self.scopeForTmp
         else:
             scope = fileSpec.fileAttributes.get('scope')
             if scope is None:
                 scope = fileSpec.scope
         # construct source and destination paths
         srcPath = re.sub(self.srcOldBasePath, self.srcNewBasePath, fileSpec.path)
         dstPath = mover_utils.construct_file_path(self.dstBasePath, scope, fileSpec.lfn)
         # make input for globus-url-copy
         if gucInput is None:
             gucInput = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='_guc_out.tmp')
         gucInput.write("{0} {1}\n".format(srcPath, dstPath))
         fileSpec.attemptNr += 1
     # nothing to transfer
     if gucInput is None:
         tmpLog.debug('done with no transfers')
         return True, ''
     # transfer
     gucInput.close()
     args = ['globus-url-copy', '-f', gucInput.name, '-cd']
     if self.gulOpts is not None:
         args += self.gulOpts.split()
     try:
         tmpLog.debug('execute globus-url-copy' + ' '.join(args))
         p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         stdout, stderr = p.communicate()
         return_code = p.returncode
         if stdout is not None:
             stdout = stdout.replace('\n', ' ')
         if stderr is not None:
             stderr = stderr.replace('\n', ' ')
         tmpLog.debug("stdout: %s" % stdout)
         tmpLog.debug("stderr: %s" % stderr)
     except Exception:
         core_utils.dump_error_message(tmpLog)
         return_code = 1
     os.remove(gucInput.name)
     if return_code == 0:
         tmpLog.debug('succeeded')
         return True, ''
     else:
         errMsg = 'failed with {0}'.format(return_code)
         tmpLog.error(errMsg)
         # check attemptNr
         for fileSpec in jobspec.inFiles:
             if fileSpec.attemptNr >= self.maxAttempts:
                 errMsg = 'gave up due to max attempts'
                 tmpLog.error(errMsg)
                 return (False, errMsg)
         return None, errMsg
예제 #18
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = core_utils.make_logger(_logger,
                                     'PandaID={0}'.format(jobspec.PandaID),
                                     method_name='trigger_preparation')
     tmpLog.debug('start')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(
         tmpLog, self.tc, label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(
             str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(
             self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints
         Globus_srcpath = mover_utils.construct_file_path(
             self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(
             self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({
             'scope': inFile['scope'],
             'name': inLFN,
             'Globus_dstPath': Globus_dstpath,
             'Globus_srcPath': Globus_srcpath
         })
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc:
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst:
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False, errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],
                                myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files(
                     {transferID: {
                         'lfns': lfns,
                         'groupStatus': 'active'
                     }})
                 tmpLog.debug('done')
                 return True, ''
             else:
                 return False, transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('start')

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
            tmpLog.debug('To check file: %s' % inFile)
            if os.path.exists(inFile['path']):
                checksum = core_utils.calc_adler32(inFile['path'])
                checksum = 'ad:%s' % checksum
                tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                    tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({'scope': inFile['scope'],
                          'name': inLFN,
                          'destination': dstpath})
        tmpLog.debug('files[] {0}'.format(files))

        allChecked = True
        ErrMsg = 'These files failed to download : '
        if files:
            threads = []
            n_files_per_thread = (len(files) + self.n_threads - 1) / self.n_threads
            tmpLog.debug('num files per thread: %s' % n_files_per_thread)
            for i in range(0, len(files), n_files_per_thread):
                sub_files = files[i:i + n_files_per_thread]
                thread = threading.Thread(target=self.stage_in, kwargs={'tmpLog': tmpLog, 'jobspec': jobspec, 'files': sub_files})
                threads.append(thread)
            [t.start() for t in threads]
            while len(threads) > 0:
                time.sleep(1)
                threads = [t for t in threads if t and t.isAlive()]

            tmpLog.info('Checking all files: {0}'.format(files))
            for file in files:
                if file['errno'] != 0:
                    allChecked = False
                    ErrMsg = ErrMsg + (" %s " % file['name'])
        # return
        tmpLog.debug('stop')
        if allChecked:
            tmpLog.info('Looks like all files are successfully downloaded.')
            return True, ''
        else:
            return False, ErrMsg