def scan_files_in_dir(dir_name, patterns=None):
    fileList = []
    for root, dirs, filenames in walk(dir_name):
        for filename in filenames:
            # check filename
            if patterns is not None:
                matched = False
                for pattern in patterns:
                    if re.search(pattern, filename) is not None:
                        matched = True
                        break
                if not matched:
                    continue
            # make dict
            tmpFileDict = dict()
            pfn = os.path.join(root, filename)
            lfn = os.path.basename(pfn)
            tmpFileDict['path'] = pfn
            tmpFileDict['fsize'] = os.stat(pfn).st_size
            tmpFileDict['type'] = 'es_output'
            tmpFileDict['guid'] = str(uuid.uuid4())
            tmpFileDict['chksum'] = core_utils.calc_adler32(pfn)
            tmpFileDict['eventRangeID'] = lfn.split('.')[-1]
            tmpFileDict['eventStatus'] = "finished"
            fileList.append(tmpFileDict)
    return fileList
def scan_files_in_dir(dir_name, patterns=None):
    fileList = []
    for root, dirs, filenames in walk(dir_name):
        for filename in filenames:
            # check filename
            if patterns is not None:
                matched = False
                for pattern in patterns:
                    if re.search(pattern, filename) is not None:
                        matched = True
                        break
                if not matched:
                    continue
            # make dict
            tmpFileDict = dict()
            pfn = os.path.join(root, filename)
            lfn = os.path.basename(pfn)
            tmpFileDict['path'] = pfn
            tmpFileDict['fsize'] = os.stat(pfn).st_size
            tmpFileDict['type'] = 'es_output'
            tmpFileDict['guid'] = str(uuid.uuid4())
            tmpFileDict['chksum'] = core_utils.calc_adler32(pfn)
            tmpFileDict['eventRangeID'] = lfn.split('.')[-1]
            tmpFileDict['eventStatus'] = "finished"
            fileList.append(tmpFileDict)
    return fileList
 def simple_zip_output(self, jobspec, tmp_log):
     tmp_log.debug('start')
     try:
         for fileSpec in jobspec.outFiles:
             if self.zipDir == "${SRCDIR}":
                 # the same directory as src
                 zipDir = os.path.dirname(
                     next(iter(fileSpec.associatedFiles)).path)
             elif self.zipDir == "${WORKDIR}":
                 # work dir
                 workSpec = jobspec.get_workspec_list()[0]
                 zipDir = workSpec.get_access_point()
             else:
                 zipDir = self.zipDir
             zipPath = os.path.join(zipDir, fileSpec.lfn)
             msgStr = 'self.zipDir - {0} zipDir - {1} fileSpec.lfn - {2} zipPath - {3}' \
                 .format(self.zipDir, zipDir, fileSpec.lfn, zipPath)
             tmp_log.debug(msgStr)
             # make zip if doesn't exist
             if not os.path.exists(zipPath):
                 tmpZipPath = zipPath + '.' + str(uuid.uuid4())
                 with tarfile.open(tmpZipPath, "w") as zf:
                     for assFileSpec in fileSpec.associatedFiles:
                         zf.add(assFileSpec.path,
                                os.path.basename(assFileSpec.path))
                 # avoid overwriting
                 lockName = 'zip.lock.{0}'.format(fileSpec.lfn)
                 lockInterval = 60
                 tmpStat = False
                 # get lock
                 for i in range(lockInterval):
                     tmpStat = self.dbInterface.get_object_lock(
                         lockName, lock_interval=lockInterval)
                     if tmpStat:
                         break
                     time.sleep(1)
                 # failed to lock
                 if not tmpStat:
                     msgStr = 'failed to get zip lock for {0}'.format(
                         fileSpec.lfn)
                     tmp_log.error(msgStr)
                     return None, msgStr
                 if not os.path.exists(zipPath):
                     os.rename(tmpZipPath, zipPath)
                 # release lock
                 self.dbInterface.release_object_lock(lockName)
             # set path
             fileSpec.path = zipPath
             # get size
             statInfo = os.stat(zipPath)
             fileSpec.fsize = statInfo.st_size
             fileSpec.chksum = core_utils.calc_adler32(zipPath)
             msgStr = 'fileSpec.path - {0}, fileSpec.fsize - {1}, fileSpec.chksum(adler32) - {2}'\
                 .format(fileSpec.path, fileSpec.fsize, fileSpec.chksum)
             tmp_log.debug(msgStr)
     except Exception:
         errMsg = core_utils.dump_error_message(tmp_log)
         return False, 'failed to zip with {0}'.format(errMsg)
     tmp_log.debug('done')
     return True, ''
Beispiel #4
0
def scan_files_in_dir(dir_name, patterns=None, zip_patterns=None):
    fileList = []
    for root, dirs, filenames in walk(dir_name):
        for filename in filenames:
            # check if zipped
            is_zipped = False
            if zip_patterns:
                matched = False
                for pattern in zip_patterns:
                    if re.search(pattern, filename) is not None:
                        matched = True
                        break
                if matched:
                    is_zipped = True
            # check filename
            if not is_zipped and patterns:
                matched = False
                for pattern in patterns:
                    if re.search(pattern, filename) is not None:
                        matched = True
                        break
                if not matched:
                    continue
            # make dict
            tmpFileDict = dict()
            pfn = os.path.join(root, filename)
            tmpFileDict['path'] = pfn
            tmpFileDict['fsize'] = os.stat(pfn).st_size
            tmpFileDict['guid'] = str(uuid.uuid4())
            tmpFileDict['chksum'] = core_utils.calc_adler32(pfn)
            tmpFileDict['eventStatus'] = "finished"
            if is_zipped:
                lfns = []
                # extract actual event filenames from zip
                with tarfile.open(pfn) as f:
                    for tar_info in f.getmembers():
                        lfns.append(os.path.basename(tar_info.name))
                tmpFileDict['type'] = 'zip_output'
            else:
                lfns = [os.path.basename(pfn)]
                tmpFileDict['type'] = 'es_output'
            for lfn in lfns:
                tmpDict = copy.copy(tmpFileDict)
                tmpDict['eventRangeID'] = lfn.split('.')[-1]

                fileList.append(tmpDict)
    return fileList
 def trigger_preparation(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')
     # loop over all inputs
     inFileInfo = jobspec.get_input_file_attributes()
     gucInput = None
     for tmpFileSpec in jobspec.inFiles:
         # construct source and destination paths
         srcPath = mover_utils.construct_file_path(
             self.srcBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         dstPath = mover_utils.construct_file_path(
             self.dstBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         # local access path
         accPath = mover_utils.construct_file_path(
             self.localBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         if self.checkLocalPath:
             # check if already exits
             if os.path.exists(accPath):
                 # calculate checksum
                 checksum = core_utils.calc_adler32(accPath)
                 checksum = 'ad:{0}'.format(checksum)
                 if checksum == inFileInfo[tmpFileSpec.lfn]['checksum']:
                     continue
             # make directories if needed
             if not os.path.isdir(os.path.dirname(accPath)):
                 os.makedirs(os.path.dirname(accPath))
         # make input for globus-url-copy
         if gucInput is None:
             gucInput = tempfile.NamedTemporaryFile(mode='w',
                                                    delete=False,
                                                    suffix='_guc_in.tmp')
         gucInput.write("{0} {1}\n".format(srcPath, dstPath))
         tmpFileSpec.attemptNr += 1
     # nothing to transfer
     if gucInput is None:
         tmpLog.debug('done with no transfers')
         return True, ''
     # transfer
     tmpLog.debug('execute globus-url-copy')
     gucInput.close()
     args = ['globus-url-copy', '-f', gucInput.name, '-cd']
     if self.gulOpts is not None:
         args += self.gulOpts.split()
     try:
         tmpLog.debug('execute: ' + ' '.join(args))
         p = subprocess.Popen(args,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
         try:
             stdout, stderr = p.communicate(timeout=self.timeout)
         except subprocess.TimeoutExpired:
             p.kill()
             stdout, stderr = p.communicate()
             tmpLog.warning('command timeout')
         return_code = p.returncode
         if stdout is not None:
             if not isinstance(stdout, str):
                 stdout = stdout.decode()
             stdout = stdout.replace('\n', ' ')
         if stderr is not None:
             if not isinstance(stderr, str):
                 stderr = stderr.decode()
             stderr = stderr.replace('\n', ' ')
         tmpLog.debug("stdout: %s" % stdout)
         tmpLog.debug("stderr: %s" % stderr)
     except Exception:
         core_utils.dump_error_message(tmpLog)
         return_code = 1
     os.remove(gucInput.name)
     if return_code == 0:
         tmpLog.debug('succeeded')
         return True, ''
     else:
         errMsg = 'failed with {0}'.format(return_code)
         tmpLog.error(errMsg)
         # check attemptNr
         for tmpFileSpec in jobspec.inFiles:
             if tmpFileSpec.attemptNr >= self.maxAttempts:
                 errMsg = 'gave up due to max attempts'
                 tmpLog.error(errMsg)
                 return (False, errMsg)
         return None, errMsg
 def get_files_to_stage_out(self, workspec):
     # get logger
     tmpLog = core_utils.make_logger(_logger,
                                     'workerID={0}'.format(
                                         workspec.workerID),
                                     method_name='get_files_to_stage_out')
     fileDict = dict()
     # look for the json just under the access point
     for pandaID in workspec.pandaid_list:
         # look for the json just under the access point
         accessPoint = self.get_access_point(workspec, pandaID)
         jsonFilePath = os.path.join(accessPoint, jsonOutputsFileName)
         readJsonPath = jsonFilePath + suffixReadJson
         # first look for json.read which is not yet acknowledged
         tmpLog.debug('looking for output file {0}'.format(readJsonPath))
         if os.path.exists(readJsonPath):
             pass
         else:
             tmpLog.debug(
                 'looking for output file {0}'.format(jsonFilePath))
             if not os.path.exists(jsonFilePath):
                 # not found
                 tmpLog.debug('not found')
                 continue
             try:
                 tmpLog.debug('found')
                 # rename to prevent from being overwritten
                 os.rename(jsonFilePath, readJsonPath)
             except Exception:
                 tmpLog.error('failed to rename json')
                 continue
         # load json
         toSkip = False
         loadDict = None
         try:
             with open(readJsonPath) as jsonFile:
                 loadDict = json.load(jsonFile)
         except Exception:
             tmpLog.error('failed to load json')
             toSkip = True
         # test validity of data format (ie it should be a Dictionary)
         if not toSkip:
             if not isinstance(loadDict, dict):
                 tmpLog.error('loaded data is not a dictionary')
                 toSkip = True
         # collect files and events
         nData = 0
         if not toSkip:
             sizeMap = dict()
             chksumMap = dict()
             eventsList = dict()
             for tmpPandaID, tmpEventMapList in iteritems(loadDict):
                 tmpPandaID = long(tmpPandaID)
                 # test if tmpEventMapList is a list
                 if not isinstance(tmpEventMapList, list):
                     tmpLog.error('loaded data item is not a list')
                     toSkip = True
                     break
                 for tmpEventInfo in tmpEventMapList:
                     try:
                         nData += 1
                         if 'eventRangeID' in tmpEventInfo:
                             tmpEventRangeID = tmpEventInfo['eventRangeID']
                         else:
                             tmpEventRangeID = None
                         tmpFileDict = dict()
                         pfn = tmpEventInfo['path']
                         lfn = os.path.basename(pfn)
                         tmpFileDict['path'] = pfn
                         if pfn not in sizeMap:
                             if 'fsize' in tmpEventInfo:
                                 sizeMap[pfn] = tmpEventInfo['fsize']
                             else:
                                 sizeMap[pfn] = os.stat(pfn).st_size
                         tmpFileDict['fsize'] = sizeMap[pfn]
                         tmpFileDict['type'] = tmpEventInfo['type']
                         if tmpEventInfo['type'] in ['log', 'output']:
                             # disable zipping
                             tmpFileDict['isZip'] = 0
                         elif tmpEventInfo['type'] == 'zip_output':
                             # already zipped
                             tmpFileDict['isZip'] = 1
                         elif 'isZip' in tmpEventInfo:
                             tmpFileDict['isZip'] = tmpEventInfo['isZip']
                         # guid
                         if 'guid' in tmpEventInfo:
                             tmpFileDict['guid'] = tmpEventInfo['guid']
                         else:
                             tmpFileDict['guid'] = str(uuid.uuid4())
                         # get checksum
                         if pfn not in chksumMap:
                             if 'chksum' in tmpEventInfo:
                                 chksumMap[pfn] = tmpEventInfo['chksum']
                             else:
                                 chksumMap[pfn] = core_utils.calc_adler32(
                                     pfn)
                         tmpFileDict['chksum'] = chksumMap[pfn]
                         if tmpPandaID not in fileDict:
                             fileDict[tmpPandaID] = dict()
                         if lfn not in fileDict[tmpPandaID]:
                             fileDict[tmpPandaID][lfn] = []
                         fileDict[tmpPandaID][lfn].append(tmpFileDict)
                         # skip if unrelated to events
                         if tmpFileDict['type'] not in [
                                 'es_output', 'zip_output'
                         ]:
                             continue
                         tmpFileDict['eventRangeID'] = tmpEventRangeID
                         if tmpPandaID not in eventsList:
                             eventsList[tmpPandaID] = list()
                         eventsList[tmpPandaID].append({
                             'eventRangeID':
                             tmpEventRangeID,
                             'eventStatus':
                             tmpEventInfo['eventStatus']
                         })
                     except Exception:
                         core_utils.dump_error_message(tmpLog)
             # dump events
             if not toSkip:
                 if len(eventsList) > 0:
                     curName = os.path.join(accessPoint,
                                            jsonEventsUpdateFileName)
                     newName = curName + '.new'
                     f = open(newName, 'w')
                     json.dump(eventsList, f)
                     f.close()
                     os.rename(newName, curName)
         # remove empty file
         if toSkip or nData == 0:
             try:
                 os.remove(readJsonPath)
             except Exception:
                 pass
         tmpLog.debug('got {0} files for PandaID={1}'.format(
             nData, pandaID))
     return fileDict
Beispiel #7
0
 def make_one_zip(self, arg_dict):
     self.zip_tmp_log.debug('start')
     try:
         zipPath = arg_dict['zipPath']
         lfn = os.path.basename(zipPath)
         self.zip_tmp_log.debug(
             '{0} start zipPath={1} with {2} files'.format(
                 lfn, zipPath, len(arg_dict['associatedFiles'])))
         # make zip if doesn't exist
         if not os.path.exists(zipPath):
             # tmp file names
             tmpZipPath = zipPath + '.' + str(uuid.uuid4())
             tmpZipPathIn = tmpZipPath + '.in'
             with open(tmpZipPathIn, "w") as f:
                 for associatedFile in arg_dict['associatedFiles']:
                     f.write("{0}\n".format(associatedFile))
             # make command
             com = 'tar -c -f {0} -T {1} '.format(tmpZipPath, tmpZipPathIn)
             com += "--transform 's/.*\///' "
             # execute
             p = subprocess.Popen(com,
                                  shell=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
             stdOut, stdErr = p.communicate()
             retCode = p.returncode
             if retCode != 0:
                 msgStr = 'failed to make zip for {0} with {1}:{2}'.format(
                     lfn, stdOut, stdErr)
                 self.zip_tmp_log.error(msgStr)
                 return None, msgStr, {}
             # avoid overwriting
             lockName = 'zip.lock.{0}'.format(lfn)
             lockInterval = 60
             tmpStat = False
             # get lock
             for i in range(lockInterval):
                 tmpStat = self.dbInterface.get_object_lock(
                     lockName, lock_interval=lockInterval)
                 if tmpStat:
                     break
                 time.sleep(1)
             # failed to lock
             if not tmpStat:
                 msgStr = 'failed to lock for {0}'.format(lfn)
                 self.zip_tmp_log.error(msgStr)
                 return None, msgStr
             if not os.path.exists(zipPath):
                 os.rename(tmpZipPath, zipPath)
             # release lock
             self.dbInterface.release_object_lock(lockName)
         # make return
         fileInfo = dict()
         fileInfo['path'] = zipPath
         # get size
         statInfo = os.stat(zipPath)
         fileInfo['fsize'] = statInfo.st_size
         fileInfo['chksum'] = core_utils.calc_adler32(zipPath)
     except Exception:
         errMsg = core_utils.dump_error_message(self.zip_tmp_log)
         return False, 'failed to zip with {0}'.format(errMsg)
     self.zip_tmp_log.debug('{0} done'.format(lfn))
     return True, '', fileInfo
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger,
                                  'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('Start. Trigger data transfer for job: {0}'.format(
            jobspec.PandaID))

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(
                jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        tmpLog.info(
            "Prepare files to download (construct path and verifiy existing files)"
        )
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(
                self.basePath, inFile['scope'], inLFN)
            # check if file exist. Skip alrady downoladed files
            if os.path.exists(inFile['path']):
                checksum = core_utils.calc_adler32(inFile['path'])
                checksum = 'ad:%s' % checksum
                #tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                if 'checksum' in inFile and inFile['checksum'] and inFile[
                        'checksum'] == checksum:
                    #tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({
                'scope': inFile['scope'],
                'name': inLFN,
                'destination': dstpath
            })
        tmpLog.info('Number of files to dowload: {0} for job: {1}'.format(
            len(files), jobspec.PandaID))
        #tmpLog.debug('files {0}'.format(files))
        tmpLog.info('Setup of Pilot2 API client')
        data_client = data.StageInClient(site=jobspec.computingSite)
        allChecked = True
        ErrMsg = 'These files failed to download : '
        if len(files) > 0:
            tmpLog.info(
                "Going to transfer {0} of files with one call to Pilot2 Data API"
                .format(len(files)))
            try:
                result = data_client.transfer(files)
            except Exception as e:
                tmpLog.error("Pilot2 Data API rise error: {0}".format(
                    e.message))
            tmpLog.debug(
                'data_client.transfer(files) result:\n{0}'.format(result))
            tmpLog.info("Transfer call to Pilot2 Data API completed")
            # loop over each file check result all must be true for entire result to be true
            if result:
                for answer in result:
                    if answer['errno'] != 0:
                        allChecked = False
                        ErrMsg = ErrMsg + (" %s " % answer['name'])
            else:
                tmpLog.info(
                    'Looks like all files in place. Number of files: {0}'.
                    format(len(files)))
        # return
        tmpLog.debug(
            'Finished data transfer with {0} files for job {1}'.format(
                len(files), jobspec.PandaID))
        if allChecked:
            return True, ''
        else:
            return False, ErrMsg
Beispiel #9
0
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('start')

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
            tmpLog.debug('To check file: %s' % inFile)
            if os.path.exists(inFile['path']):
                checksum = core_utils.calc_adler32(inFile['path'])
                checksum = 'ad:%s' % checksum
                tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                    tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({'scope': inFile['scope'],
                          'name': inLFN,
                          'destination': dstpath})
        tmpLog.debug('files[] {0}'.format(files))

        allChecked = True
        ErrMsg = 'These files failed to download : '
        if files:
            threads = []
            n_files_per_thread = (len(files) + self.n_threads - 1) / self.n_threads
            tmpLog.debug('num files per thread: %s' % n_files_per_thread)
            for i in range(0, len(files), n_files_per_thread):
                sub_files = files[i:i + n_files_per_thread]
                thread = threading.Thread(target=self.stage_in, kwargs={'tmpLog': tmpLog, 'jobspec': jobspec, 'files': sub_files})
                threads.append(thread)
            [t.start() for t in threads]
            while len(threads) > 0:
                time.sleep(1)
                threads = [t for t in threads if t and t.isAlive()]

            tmpLog.info('Checking all files: {0}'.format(files))
            for file in files:
                if file['errno'] != 0:
                    allChecked = False
                    ErrMsg = ErrMsg + (" %s " % file['name'])
        # return
        tmpLog.debug('stop')
        if allChecked:
            tmpLog.info('Looks like all files are successfully downloaded.')
            return True, ''
        else:
            return False, ErrMsg
Beispiel #10
0
    def trigger_stage_out(self, jobspec):
        """Trigger the stage-out procedure for the job.
        Output files are available through jobspec.get_outfile_specs(skip_done=False) which gives
        a list of FileSpecs not yet done.
        FileSpec.attemptNr shows how many times transfer was tried for the file so far.

        :param jobspec: job specifications
        :type jobspec: JobSpec
        :return: A tuple of return code (True: success, False: fatal failure, None: temporary failure)
                 and error dialog
        :rtype: (bool, string)
        """

        # let gc clean up memory
        gc.collect()

        # make logger
        tmpLog = self.make_logger(_logger,
                                  'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_stage_out')
        tmpLog.debug('start')
        # get the environment
        harvester_env = os.environ.copy()
        #tmpLog.debug('Harvester environment : {}'.format(harvester_env))

        xrdcpOutput = None
        allfiles_transfered = True
        overall_errMsg = ""
        fileAttrs = jobspec.get_output_file_attributes()
        # loop over all output files
        for fileSpec in jobspec.get_output_file_specs(skip_done=True):
            # fileSpec.objstoreID = 123
            # fileSpec.fileAttributes['guid']
            # construct source and destination paths
            dstPath = mover_utils.construct_file_path(
                self.dstBasePath, fileAttrs[fileSpec.lfn]['scope'],
                fileSpec.lfn)
            # local path
            localPath = mover_utils.construct_file_path(
                self.localBasePath, fileAttrs[fileSpec.lfn]['scope'],
                fileSpec.lfn)
            tmpLog.debug('fileSpec.path - {0} fileSpec.lfn = {1}'.format(
                fileSpec.path, fileSpec.lfn))
            localPath = fileSpec.path
            if self.checkLocalPath:
                # check if already exits
                if os.path.exists(localPath):
                    # calculate checksum
                    checksum = core_utils.calc_adler32(localPath)
                    checksum = 'ad:{0}'.format(checksum)
                    if checksum == fileAttrs[fileSpec.lfn]['checksum']:
                        continue
            # collect list of output files
            if xrdcpOutput is None:
                xrdcpOutput = [dstPath]
            else:
                if dstPath not in xrdcpOutput:
                    xrdcpOutput.append(dstPath)
            # transfer using xrdcp one file at a time
            tmpLog.debug('execute xrdcp')
            args = ['xrdcp', '--nopbar', '--force']
            args_files = [localPath, dstPath]
            if self.xrdcpOpts is not None:
                args += self.xrdcpOpts.split()
            args += args_files
            fileSpec.attemptNr += 1
            try:
                xrdcp_cmd = ' '.join(args)
                tmpLog.debug('execute: {0}'.format(xrdcp_cmd))
                process = subprocess.Popen(xrdcp_cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE,
                                           env=harvester_env,
                                           shell=True)
                try:
                    stdout, stderr = process.communicate(timeout=self.timeout)
                except subprocess.TimeoutExpired:
                    process.kill()
                    stdout, stderr = process.communicate()
                    tmpLog.warning('command timeout')
                return_code = process.returncode
                if stdout is not None:
                    if not isinstance(stdout, str):
                        stdout = stdout.decode()
                    stdout = stdout.replace('\n', ' ')
                if stderr is not None:
                    if not isinstance(stderr, str):
                        stderr = stderr.decode()
                    stderr = stderr.replace('\n', ' ')
                tmpLog.debug("stdout: %s" % stdout)
                tmpLog.debug("stderr: %s" % stderr)
            except Exception:
                core_utils.dump_error_message(tmpLog)
                return_code = 1
            if return_code == 0:
                fileSpec.status = 'finished'
            else:
                overall_errMsg += "file - {0} did not transfer error code {1} ".format(
                    localPath, return_code)
                allfiles_transfered = False
                errMsg = 'failed with {0}'.format(return_code)
                tmpLog.error(errMsg)
                # check attemptNr
                if fileSpec.attemptNr >= self.maxAttempts:
                    tmpLog.error(
                        'reached maxattempts: {0}, marked it as failed'.format(
                            self.maxAttempts))
                    fileSpec.status = 'failed'

            # force update
            fileSpec.force_update('status')
            tmpLog.debug('file: {0} status: {1}'.format(
                fileSpec.lfn, fileSpec.status))
            del process, stdout, stderr

        # end loop over output files

        # nothing to transfer
        if xrdcpOutput is None:
            tmpLog.debug('done with no transfers')
            return True, ''
        # check if all files were transfered
        tmpLog.debug('done')
        if allfiles_transfered:
            return True, ''
        else:
            return None, overall_errMsg
 def get_files_to_stage_out(self, workspec):
     # get logger
     tmpLog = core_utils.make_logger(_logger, 'workerID={0}'.format(workspec.workerID),
                                     method_name='get_files_to_stage_out')
     fileDict = dict()
     # look for the json just under the access point
     for pandaID in workspec.pandaid_list:
         # look for the json just under the access point
         accessPoint = self.get_access_point(workspec, pandaID)
         jsonFilePath = os.path.join(accessPoint, jsonOutputsFileName)
         readJsonPath = jsonFilePath + suffixReadJson
         # first look for json.read which is not yet acknowledged
         tmpLog.debug('looking for output file {0}'.format(readJsonPath))
         if os.path.exists(readJsonPath):
             pass
         else:
             tmpLog.debug('looking for output file {0}'.format(jsonFilePath))
             if not os.path.exists(jsonFilePath):
                 # not found
                 tmpLog.debug('not found')
                 continue
             try:
                 tmpLog.debug('found')
                 # rename to prevent from being overwritten
                 os.rename(jsonFilePath, readJsonPath)
             except Exception:
                 tmpLog.error('failed to rename json')
                 continue
         # load json
         toSkip = False
         loadDict = None
         try:
             with open(readJsonPath) as jsonFile:
                 loadDict = json.load(jsonFile)
         except Exception:
             tmpLog.error('failed to load json')
             toSkip = True
         # test validity of data format (ie it should be a Dictionary)
         if not toSkip:
             if not isinstance(loadDict, dict):
                 tmpLog.error('loaded data is not a dictionary')
                 toSkip = True
         # collect files and events
         nData = 0
         if not toSkip:
             sizeMap = dict()
             chksumMap = dict()
             eventsList = dict()
             for tmpPandaID, tmpEventMapList in iteritems(loadDict):
                 tmpPandaID = long(tmpPandaID)
                 # test if tmpEventMapList is a list
                 if not isinstance(tmpEventMapList, list):
                     tmpLog.error('loaded data item is not a list')
                     toSkip = True
                     break
                 for tmpEventInfo in tmpEventMapList:
                     try:
                         nData += 1
                         if 'eventRangeID' in tmpEventInfo:
                             tmpEventRangeID = tmpEventInfo['eventRangeID']
                         else:
                             tmpEventRangeID = None
                         tmpFileDict = dict()
                         pfn = tmpEventInfo['path']
                         lfn = os.path.basename(pfn)
                         tmpFileDict['path'] = pfn
                         if pfn not in sizeMap:
                             if 'fsize' in tmpEventInfo:
                                 sizeMap[pfn] = tmpEventInfo['fsize']
                             else:
                                 sizeMap[pfn] = os.stat(pfn).st_size
                         tmpFileDict['fsize'] = sizeMap[pfn]
                         tmpFileDict['type'] = tmpEventInfo['type']
                         if tmpEventInfo['type'] in ['log', 'output']:
                             # disable zipping
                             tmpFileDict['isZip'] = 0
                         elif tmpEventInfo['type'] == 'zip_output':
                             # already zipped
                             tmpFileDict['isZip'] = 1
                         elif 'isZip' in tmpEventInfo:
                             tmpFileDict['isZip'] = tmpEventInfo['isZip']
                         # guid
                         if 'guid' in tmpEventInfo:
                             tmpFileDict['guid'] = tmpEventInfo['guid']
                         else:
                             tmpFileDict['guid'] = str(uuid.uuid4())
                         # get checksum
                         if pfn not in chksumMap:
                             if 'chksum' in tmpEventInfo:
                                 chksumMap[pfn] = tmpEventInfo['chksum']
                             else:
                                 chksumMap[pfn] = core_utils.calc_adler32(pfn)
                         tmpFileDict['chksum'] = chksumMap[pfn]
                         if tmpPandaID not in fileDict:
                             fileDict[tmpPandaID] = dict()
                         if lfn not in fileDict[tmpPandaID]:
                             fileDict[tmpPandaID][lfn] = []
                         fileDict[tmpPandaID][lfn].append(tmpFileDict)
                         # skip if unrelated to events
                         if tmpFileDict['type'] not in ['es_output', 'zip_output']:
                             continue
                         tmpFileDict['eventRangeID'] = tmpEventRangeID
                         if tmpPandaID not in eventsList:
                             eventsList[tmpPandaID] = list()
                         eventsList[tmpPandaID].append({'eventRangeID': tmpEventRangeID,
                                                        'eventStatus': tmpEventInfo['eventStatus']})
                     except Exception:
                         core_utils.dump_error_message(tmpLog)
             # dump events
             if not toSkip:
                 if len(eventsList) > 0:
                     curName = os.path.join(accessPoint, jsonEventsUpdateFileName)
                     newName = curName + '.new'
                     f = open(newName, 'w')
                     json.dump(eventsList, f)
                     f.close()
                     os.rename(newName, curName)
         # remove empty file
         if toSkip or nData == 0:
             try:
                 os.remove(readJsonPath)
             except Exception:
                 pass
         tmpLog.debug('got {0} files for PandaID={1}'.format(nData, pandaID))
     return fileDict
 def trigger_preparation(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('Start. Trigger data transfer for job: {0}'.format(jobspec.PandaID))
    
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # get input files
     files = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     # set path to each file
     tmpLog.info("Prepare files to download (construct path and verifiy existing files)")
     for inLFN, inFile in iteritems(inFiles):
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
         # check if file exist. Skip alrady downoladed files
         if os.path.exists(inFile['path']):
             checksum = core_utils.calc_adler32(inFile['path'])
             checksum = 'ad:%s' % checksum
             #tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
             if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                 #tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                 continue
         dstpath = os.path.dirname(inFile['path'])
         # check if path exists if not create it.
         if not os.access(dstpath, os.F_OK):
             os.makedirs(dstpath)
         files.append({'scope': inFile['scope'],
                       'name': inLFN,
                       'destination': dstpath})
     tmpLog.info('Number of files to dowload: {0} for job: {1}'.format(len(files), jobspec.PandaID))
     #tmpLog.debug('files {0}'.format(files))
     tmpLog.info('Setup of Pilot2 API client')
     data_client = data.StageInClient(site=jobspec.computingSite)
     allChecked = True
     ErrMsg = 'These files failed to download : '
     if len(files) > 0:
         tmpLog.info("Going to transfer {0} of files with one call to Pilot2 Data API".format(len(files)))
         try:
             result = data_client.transfer(files)
         except Exception as e:
             tmpLog.error("Pilot2 Data API rise error: {0}".format(e.message))
         tmpLog.debug('data_client.transfer(files) result:\n{0}'.format(result))
         tmpLog.info("Transfer call to Pilot2 Data API completed")
         # loop over each file check result all must be true for entire result to be true
         if result:
             for answer in result:
                 if answer['errno'] != 0:
                     allChecked = False
                     ErrMsg = ErrMsg + (" %s " % answer['name'])
         else:
             tmpLog.info('Looks like all files in place. Number of files: {0}'.format(len(files)))
     # return
     tmpLog.debug('Finished data transfer with {0} files for job {1}'.format(len(files), jobspec.PandaID))
     if allChecked:
         return True, ''
     else:
         return False, ErrMsg
 def trigger_preparation(self, jobspec):
     # make logger
     tmpLog = self.make_logger(baseLogger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')
     # get the environment
     harvester_env = os.environ.copy()
     #tmpLog.debug('Harvester environment : {}'.format(harvester_env))
     # loop over all inputs
     inFileInfo = jobspec.get_input_file_attributes()
     xrdcpInput = None
     allfiles_transfered = True
     overall_errMsg = ""
     for tmpFileSpec in jobspec.inFiles:
         # construct source and destination paths
         srcPath = mover_utils.construct_file_path(
             self.srcBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         # local path
         localPath = mover_utils.construct_file_path(
             self.localBasePath, inFileInfo[tmpFileSpec.lfn]['scope'],
             tmpFileSpec.lfn)
         if self.checkLocalPath:
             # check if already exits
             if os.path.exists(localPath):
                 # calculate checksum
                 checksum = core_utils.calc_adler32(localPath)
                 checksum = 'ad:{0}'.format(checksum)
                 if checksum == inFileInfo[tmpFileSpec.lfn]['checksum']:
                     continue
             # make directories if needed
             if not os.path.isdir(os.path.dirname(localPath)):
                 os.makedirs(os.path.dirname(localPath))
                 tmpLog.debug('Make directory - {0}'.format(
                     os.path.dirname(localPath)))
         # collect list of input files
         if xrdcpInput is None:
             xrdcpInput = [srcPath]
         else:
             xrdcpInput.append[srcPath]
         # transfer using xrdcp one file at a time
         tmpLog.debug('execute xrdcp')
         args = ['xrdcp', '--nopbar', '--force']
         args_files = [srcPath, localPath]
         if self.xrdcpOpts is not None:
             args += self.xrdcpOpts.split()
         args += args_files
         tmpFileSpec.attemptNr += 1
         try:
             xrdcp_cmd = ' '.join(args)
             tmpLog.debug('execute: {0}'.format(xrdcp_cmd))
             p = subprocess.Popen(xrdcp_cmd,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  env=harvester_env,
                                  shell=True)
             try:
                 stdout, stderr = p.communicate(timeout=self.timeout)
             except subprocess.TimeoutExpired:
                 p.kill()
                 stdout, stderr = p.communicate()
                 tmpLog.warning('command timeout')
             return_code = p.returncode
             if stdout is not None:
                 if not isinstance(stdout, str):
                     stdout = stdout.decode()
                 stdout = stdout.replace('\n', ' ')
             if stderr is not None:
                 if not isinstance(stderr, str):
                     stderr = stderr.decode()
                 stderr = stderr.replace('\n', ' ')
             tmpLog.debug("stdout: %s" % stdout)
             tmpLog.debug("stderr: %s" % stderr)
         except Exception:
             core_utils.dump_error_message(tmpLog)
             return_code = 1
         if return_code != 0:
             overall_errMsg += "file - {0} did not transfer error code {1} ".format(
                 localPath, return_code)
             allfiles_transfered = False
             errMsg = 'failed with {0}'.format(return_code)
             tmpLog.error(errMsg)
             # check attemptNr
             if tmpFileSpec.attemptNr >= self.maxAttempts:
                 errMsg = 'gave up due to max attempts'
                 tmpLog.error(errMsg)
                 return (False, errMsg)
     # end loop over input files
     # nothing to transfer
     if xrdcpInput is None:
         tmpLog.debug('done with no transfers')
         return True, ''
     # check if all files were transfered
     if allfiles_transfered:
         return True, ''
     else:
         return None, overall_errMsg
    def trigger_preparation(self, jobspec):
        # make logger
        tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID),
                                  method_name='trigger_preparation')
        tmpLog.debug('start')

        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # get input files
        files = []
        inFiles = jobspec.get_input_file_attributes(skip_ready=True)
        # set path to each file
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
            tmpLog.debug('To check file: %s' % inFile)
            if os.path.exists(inFile['path']):
                checksum = core_utils.calc_adler32(inFile['path'])
                checksum = 'ad:%s' % checksum
                tmpLog.debug('checksum for file %s is %s' % (inFile['path'], checksum))
                if 'checksum' in inFile and inFile['checksum'] and inFile['checksum'] == checksum:
                    tmpLog.debug('File %s already exists at %s' % (inLFN, inFile['path']))
                    continue
            dstpath = os.path.dirname(inFile['path'])
            # check if path exists if not create it.
            if not os.access(dstpath, os.F_OK):
                os.makedirs(dstpath)
            files.append({'scope': inFile['scope'],
                          'name': inLFN,
                          'destination': dstpath})
        tmpLog.debug('files[] {0}'.format(files))

        allChecked = True
        ErrMsg = 'These files failed to download : '
        if files:
            threads = []
            n_files_per_thread = (len(files) + self.n_threads - 1) / self.n_threads
            tmpLog.debug('num files per thread: %s' % n_files_per_thread)
            for i in range(0, len(files), n_files_per_thread):
                sub_files = files[i:i + n_files_per_thread]
                thread = threading.Thread(target=self.stage_in, kwargs={'tmpLog': tmpLog, 'jobspec': jobspec, 'files': sub_files})
                threads.append(thread)
            [t.start() for t in threads]
            while len(threads) > 0:
                time.sleep(1)
                threads = [t for t in threads if t and t.isAlive()]

            tmpLog.info('Checking all files: {0}'.format(files))
            for file in files:
                if file['errno'] != 0:
                    allChecked = False
                    ErrMsg = ErrMsg + (" %s " % file['name'])
        # return
        tmpLog.debug('stop')
        if allChecked:
            tmpLog.info('Looks like all files are successfully downloaded.')
            return True, ''
        else:
            return False, ErrMsg